Fix color mapping to tag
Using the papis-like value getting from the options file we should now correctly get the values for mapping colors to tags. Why did they not just implement e.g. a toml reader I wonder?
This commit is contained in:
parent
ff84a28c4a
commit
e68b801ca1
1 changed files with 50 additions and 14 deletions
|
@ -1,4 +1,5 @@
|
|||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import Levenshtein
|
||||
import fitz_new as fitz
|
||||
|
@ -7,8 +8,6 @@ import papis.config
|
|||
|
||||
from papis_extract.annotation_data import Annotation
|
||||
|
||||
COLOR_MAPPING = {}
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
|
||||
|
@ -23,15 +22,17 @@ def start(filename: Path) -> list[Annotation]:
|
|||
for page in doc:
|
||||
for annot in page.annots():
|
||||
quote, note = _retrieve_annotation_content(page, annot)
|
||||
if not quote and not note:
|
||||
continue
|
||||
a = Annotation(
|
||||
file=str(filename),
|
||||
text=quote,
|
||||
content=note,
|
||||
text=quote or "",
|
||||
content=note or "",
|
||||
colors=annot.colors,
|
||||
type=annot.type[1],
|
||||
page=(page.number or 0) + 1,
|
||||
)
|
||||
a.tag = _tag_from_colorname(a.colorname)
|
||||
a.tag = _tag_from_colorname(a.colorname or "")
|
||||
annotations.append(a)
|
||||
logger.debug(
|
||||
f"Found {len(annotations)} "
|
||||
|
@ -40,11 +41,17 @@ def start(filename: Path) -> list[Annotation]:
|
|||
return annotations
|
||||
|
||||
|
||||
def _tag_from_colorname(colorname):
|
||||
return COLOR_MAPPING.get(colorname, "")
|
||||
def _tag_from_colorname(colorname: str) -> str:
|
||||
color_mapping: dict[str,str] = getdict("tags", "plugins.extract")
|
||||
if not color_mapping:
|
||||
return ""
|
||||
|
||||
return color_mapping.get(colorname, "")
|
||||
|
||||
|
||||
def _retrieve_annotation_content(page, annotation):
|
||||
def _retrieve_annotation_content(
|
||||
page: fitz.Page, annotation: fitz.Annot
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""Gets the text content of an annotation.
|
||||
|
||||
Returns the actual content of an annotation. Sometimes
|
||||
|
@ -62,12 +69,41 @@ def _retrieve_annotation_content(page, annotation):
|
|||
papis.config.getfloat("minimum_similarity_content", "plugins.extract") or 1.0
|
||||
)
|
||||
if Levenshtein.ratio(content, written) > minimum_similarity:
|
||||
return (content, "")
|
||||
# an independent note, not a highlight
|
||||
elif content and not written:
|
||||
return ("", content)
|
||||
return (content, None)
|
||||
# both a highlight and a note
|
||||
elif content:
|
||||
elif content and written:
|
||||
return (written, content)
|
||||
# an independent note, not a highlight
|
||||
elif content:
|
||||
return (None, content)
|
||||
# highlight with selection not in note
|
||||
return (written, "")
|
||||
elif written:
|
||||
return (written, None)
|
||||
# just a highlight without any text
|
||||
return (None, None)
|
||||
|
||||
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
|
||||
def getdict(key: str, section: Optional[str] = None) -> dict[str, str]:
|
||||
"""Dict getter
|
||||
|
||||
:returns: A python dict
|
||||
:raises SyntaxError: Whenever the parsed syntax is either not a valid
|
||||
python object or a valid python dict.
|
||||
"""
|
||||
rawvalue: Any = papis.config.general_get(key, section=section)
|
||||
if isinstance(rawvalue, dict):
|
||||
return rawvalue
|
||||
try:
|
||||
rawvalue = eval(rawvalue)
|
||||
except Exception:
|
||||
raise SyntaxError(
|
||||
"The key '{}' must be a valid Python object: {}"
|
||||
.format(key, rawvalue))
|
||||
else:
|
||||
if not isinstance(rawvalue, dict):
|
||||
raise SyntaxError(
|
||||
"The key '{}' must be a valid Python dict. Got: {} (type {!r})"
|
||||
.format(key, rawvalue, type(rawvalue).__name__))
|
||||
|
||||
return rawvalue
|
||||
|
||||
|
|
Loading…
Reference in a new issue