Fix color mapping to tag

Using the papis-like value getting from the options file we should now correctly get the values for mapping colors to tags. Why did they not just implement e.g. a toml reader I wonder?
2023-08-28 16:41:18 +02:00 · 2023-08-28 16:41:18 +02:00 · e68b801ca1
commit e68b801ca1
parent ff84a28c4a
1 changed files with 50 additions and 14 deletions
--- a/papis_extract/extractor.py
+++ b/papis_extract/extractor.py
@ -1,4 +1,5 @@
 from pathlib import Path
 from typing import Any, Optional
 import Levenshtein
 import fitz_new as fitz
@ -7,8 +8,6 @@ import papis.config
 from papis_extract.annotation_data import Annotation
 COLOR_MAPPING = {}
 logger = papis.logging.get_logger(__name__)
@ -23,15 +22,17 @@ def start(filename: Path) -> list[Annotation]:
        for page in doc:
            for annot in page.annots():
                quote, note = _retrieve_annotation_content(page, annot)
                if not quote and not note:
                    continue
                a = Annotation(
                    file=str(filename),
-                    text=quote,
+                    text=quote or "",
-                    content=note,
+                    content=note or "",
                    colors=annot.colors,
                    type=annot.type[1],
                    page=(page.number or 0) + 1,
                )
-                a.tag = _tag_from_colorname(a.colorname)
+                a.tag = _tag_from_colorname(a.colorname or "")
                annotations.append(a)
    logger.debug(
        f"Found {len(annotations)} "
@ -40,11 +41,17 @@ def start(filename: Path) -> list[Annotation]:
    return annotations
-def _tag_from_colorname(colorname):
+def _tag_from_colorname(colorname: str) -> str:
-    return COLOR_MAPPING.get(colorname, "")
+    color_mapping: dict[str,str] = getdict("tags", "plugins.extract")
    if not color_mapping:
        return ""
    return color_mapping.get(colorname, "")
-def _retrieve_annotation_content(page, annotation):
+def _retrieve_annotation_content(
    page: fitz.Page, annotation: fitz.Annot
 ) -> tuple[str | None, str | None]:
    """Gets the text content of an annotation.
    Returns the actual content of an annotation. Sometimes
@ -62,12 +69,41 @@ def _retrieve_annotation_content(page, annotation):
        papis.config.getfloat("minimum_similarity_content", "plugins.extract") or 1.0
    )
    if Levenshtein.ratio(content, written) > minimum_similarity:
-        return (content, "")
+        return (content, None)
    # an independent note, not a highlight
    elif content and not written:
        return ("", content)
    # both a highlight and a note
-    elif content:
+    elif content and written:
        return (written, content)
    # an independent note, not a highlight
    elif content:
        return (None, content)
    # highlight with selection not in note
-    return (written, "")
+    elif written:
        return (written, None)
    # just a highlight without any text
    return (None, None)
 # mimics the functions in papis.config.{getlist,getint,getfloat} etc.
 def getdict(key: str, section: Optional[str] = None) -> dict[str, str]:
    """Dict getter
    :returns: A python dict
    :raises SyntaxError: Whenever the parsed syntax is either not a valid
        python object or a valid python dict.
    """
    rawvalue: Any = papis.config.general_get(key, section=section)
    if isinstance(rawvalue, dict):
        return rawvalue
    try:
        rawvalue = eval(rawvalue)
    except Exception:
        raise SyntaxError(
            "The key '{}' must be a valid Python object: {}"
            .format(key, rawvalue))
    else:
        if not isinstance(rawvalue, dict):
            raise SyntaxError(
                "The key '{}' must be a valid Python dict. Got: {} (type {!r})"
                .format(key, rawvalue, type(rawvalue).__name__))
        return rawvalue