Fix color mapping to tag

Using the papis-like value getting from the options file we should now correctly get the values for mapping colors to tags. Why did they not just implement e.g. a toml reader I wonder?
2023-08-28 16:41:18 +02:00 · 2023-08-28 16:41:18 +02:00 · e68b801ca1
commit e68b801ca1
parent ff84a28c4a
1 changed files with 50 additions and 14 deletions
--- a/papis_extract/extractor.py
+++ b/papis_extract/extractor.py
@ -1,4 +1,5 @@
 from pathlib import Path
+from typing import Any, Optional

 import Levenshtein
 import fitz_new as fitz
@ -7,8 +8,6 @@ import papis.config

 from papis_extract.annotation_data import Annotation

-COLOR_MAPPING = {}
-
 logger = papis.logging.get_logger(__name__)


@ -23,15 +22,17 @@ def start(filename: Path) -> list[Annotation]:
        for page in doc:
            for annot in page.annots():
                quote, note = _retrieve_annotation_content(page, annot)
+                if not quote and not note:
+                    continue
                a = Annotation(
                    file=str(filename),
-                    text=quote,
-                    content=note,
+                    text=quote or "",
+                    content=note or "",
                    colors=annot.colors,
                    type=annot.type[1],
                    page=(page.number or 0) + 1,
                )
-                a.tag = _tag_from_colorname(a.colorname)
+                a.tag = _tag_from_colorname(a.colorname or "")
                annotations.append(a)
    logger.debug(
        f"Found {len(annotations)} "
@ -40,11 +41,17 @@ def start(filename: Path) -> list[Annotation]:
    return annotations


-def _tag_from_colorname(colorname):
-    return COLOR_MAPPING.get(colorname, "")
+def _tag_from_colorname(colorname: str) -> str:
+    color_mapping: dict[str,str] = getdict("tags", "plugins.extract")
+    if not color_mapping:
+        return ""
+
+    return color_mapping.get(colorname, "")


-def _retrieve_annotation_content(page, annotation):
+def _retrieve_annotation_content(
+    page: fitz.Page, annotation: fitz.Annot
+) -> tuple[str | None, str | None]:
    """Gets the text content of an annotation.

    Returns the actual content of an annotation. Sometimes
@ -62,12 +69,41 @@ def _retrieve_annotation_content(page, annotation):
        papis.config.getfloat("minimum_similarity_content", "plugins.extract") or 1.0
    )
    if Levenshtein.ratio(content, written) > minimum_similarity:
-        return (content, "")
-    # an independent note, not a highlight
-    elif content and not written:
-        return ("", content)
+        return (content, None)
    # both a highlight and a note
-    elif content:
+    elif content and written:
        return (written, content)
+    # an independent note, not a highlight
+    elif content:
+        return (None, content)
    # highlight with selection not in note
-    return (written, "")
+    elif written:
+        return (written, None)
+    # just a highlight without any text
+    return (None, None)
+
+# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
+def getdict(key: str, section: Optional[str] = None) -> dict[str, str]:
+    """Dict getter
+
+    :returns: A python dict
+    :raises SyntaxError: Whenever the parsed syntax is either not a valid
+        python object or a valid python dict.
+    """
+    rawvalue: Any = papis.config.general_get(key, section=section)
+    if isinstance(rawvalue, dict):
+        return rawvalue
+    try:
+        rawvalue = eval(rawvalue)
+    except Exception:
+        raise SyntaxError(
+            "The key '{}' must be a valid Python object: {}"
+            .format(key, rawvalue))
+    else:
+        if not isinstance(rawvalue, dict):
+            raise SyntaxError(
+                "The key '{}' must be a valid Python dict. Got: {} (type {!r})"
+                .format(key, rawvalue, type(rawvalue).__name__))
+
+        return rawvalue
+