Fix color mapping to tag

Using the papis-like value getting from the options file we should
now correctly get the values for mapping colors to tags.
Why did they not just implement e.g. a toml reader I wonder?
This commit is contained in:
Marty Oehme 2023-08-28 16:41:18 +02:00
parent ff84a28c4a
commit e68b801ca1
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import Any, Optional
import Levenshtein
import fitz_new as fitz
@ -7,8 +8,6 @@ import papis.config
from papis_extract.annotation_data import Annotation
COLOR_MAPPING = {}
logger = papis.logging.get_logger(__name__)
@ -23,15 +22,17 @@ def start(filename: Path) -> list[Annotation]:
for page in doc:
for annot in page.annots():
quote, note = _retrieve_annotation_content(page, annot)
if not quote and not note:
continue
a = Annotation(
file=str(filename),
text=quote,
content=note,
text=quote or "",
content=note or "",
colors=annot.colors,
type=annot.type[1],
page=(page.number or 0) + 1,
)
a.tag = _tag_from_colorname(a.colorname)
a.tag = _tag_from_colorname(a.colorname or "")
annotations.append(a)
logger.debug(
f"Found {len(annotations)} "
@ -40,11 +41,17 @@ def start(filename: Path) -> list[Annotation]:
return annotations
def _tag_from_colorname(colorname):
return COLOR_MAPPING.get(colorname, "")
def _tag_from_colorname(colorname: str) -> str:
color_mapping: dict[str,str] = getdict("tags", "plugins.extract")
if not color_mapping:
return ""
return color_mapping.get(colorname, "")
def _retrieve_annotation_content(page, annotation):
def _retrieve_annotation_content(
page: fitz.Page, annotation: fitz.Annot
) -> tuple[str | None, str | None]:
"""Gets the text content of an annotation.
Returns the actual content of an annotation. Sometimes
@ -62,12 +69,41 @@ def _retrieve_annotation_content(page, annotation):
papis.config.getfloat("minimum_similarity_content", "plugins.extract") or 1.0
)
if Levenshtein.ratio(content, written) > minimum_similarity:
return (content, "")
# an independent note, not a highlight
elif content and not written:
return ("", content)
return (content, None)
# both a highlight and a note
elif content:
elif content and written:
return (written, content)
# an independent note, not a highlight
elif content:
return (None, content)
# highlight with selection not in note
return (written, "")
elif written:
return (written, None)
# just a highlight without any text
return (None, None)
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
def getdict(key: str, section: Optional[str] = None) -> dict[str, str]:
"""Dict getter
:returns: A python dict
:raises SyntaxError: Whenever the parsed syntax is either not a valid
python object or a valid python dict.
"""
rawvalue: Any = papis.config.general_get(key, section=section)
if isinstance(rawvalue, dict):
return rawvalue
try:
rawvalue = eval(rawvalue)
except Exception:
raise SyntaxError(
"The key '{}' must be a valid Python object: {}"
.format(key, rawvalue))
else:
if not isinstance(rawvalue, dict):
raise SyntaxError(
"The key '{}' must be a valid Python dict. Got: {} (type {!r})"
.format(key, rawvalue, type(rawvalue).__name__))
return rawvalue