refactor: Move tagging by color to Annotation

This commit is contained in:
Marty Oehme 2024-01-23 23:45:49 +01:00
parent 3bd6247888
commit ddb34fca7b
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
2 changed files with 51 additions and 40 deletions

View file

@ -1,9 +1,10 @@
import math import math
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Optional
import chevron
import papis.config import papis.config
from papis.document import Document from papis.document import Document
import chevron
TEXT_SIMILARITY_MINIMUM = 0.75 TEXT_SIMILARITY_MINIMUM = 0.75
COLOR_SIMILARITY_MINIMUM = 0.833 COLOR_SIMILARITY_MINIMUM = 0.833
@ -17,7 +18,6 @@ COLORS = {
"orange": (1, 0.65, 0), "orange": (1, 0.65, 0),
} }
@dataclass @dataclass
class Annotation: class Annotation:
"""A PDF annotation object. """A PDF annotation object.
@ -26,7 +26,7 @@ class Annotation:
""" """
file: str file: str
colors: tuple[float, float, float] = field(default_factory=lambda: (0.0, 0.0, 0.0)) color: tuple[float, float, float]
content: str = "" content: str = ""
note: str = "" note: str = ""
page: int = 0 page: int = 0
@ -34,6 +34,10 @@ class Annotation:
type: str = "Highlight" type: str = "Highlight"
minimum_similarity_color: float = 1.0 minimum_similarity_color: float = 1.0
def __post_init__(self):
self._color = self.color or field(default_factory=lambda: (0.0, 0.0, 0.0))
self.tag = self.tag or self._tag_from_colorname(self.colorname or "")
def format(self, formatting: str, doc: Document = Document()): def format(self, formatting: str, doc: Document = Document()):
"""Return a formatted string of the annotation. """Return a formatted string of the annotation.
@ -52,6 +56,15 @@ class Annotation:
} }
return chevron.render(formatting, data) return chevron.render(formatting, data)
@property
def color(self):
return self._color
@color.setter
def color(self, value: tuple[float, float, float]):
self._color = value
self.tag = self._tag_from_colorname(self.colorname or "")
@property @property
def colorname(self): def colorname(self):
"""Return the stringified version of the annotation color. """Return the stringified version of the annotation color.
@ -59,7 +72,7 @@ class Annotation:
Finds the closest named color to the annotation and returns it, Finds the closest named color to the annotation and returns it,
using euclidian distance between the two color vectors. using euclidian distance between the two color vectors.
""" """
annot_colors = self.colors or (0.0, 0.0, 0.0) annot_colors = self.color or (0.0, 0.0, 0.0)
nearest = None nearest = None
minimum_similarity = ( minimum_similarity = (
papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0 papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0
@ -81,3 +94,37 @@ class Annotation:
difference between full black and full white, as a float. difference between full black and full white, as a float.
""" """
return 1 - (abs(math.dist([*color_one], [*color_two])) / 3) return 1 - (abs(math.dist([*color_one], [*color_two])) / 3)
def _tag_from_colorname(self, colorname: str) -> str:
color_mapping: dict[str, str] = self._getdict("tags", "plugins.extract")
if not color_mapping:
return ""
return color_mapping.get(colorname, "")
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
def _getdict(self, key: str, section: Optional[str] = None) -> dict[str, str]:
"""Dict getter
:returns: A python dict
:raises SyntaxError: Whenever the parsed syntax is either not a valid
python object or a valid python dict.
"""
rawvalue: Any = papis.config.general_get(key, section=section)
if isinstance(rawvalue, dict):
return rawvalue
try:
rawvalue = eval(rawvalue)
except Exception:
raise SyntaxError(
"The key '{}' must be a valid Python object: {}".format(key, rawvalue)
)
else:
if not isinstance(rawvalue, dict):
raise SyntaxError(
"The key '{}' must be a valid Python dict. Got: {} (type {!r})".format(
key, rawvalue, type(rawvalue).__name__
)
)
return rawvalue

View file

@ -47,7 +47,6 @@ class PdfExtractor:
type=annot.type[1], type=annot.type[1],
page=(page.number or 0) + 1, page=(page.number or 0) + 1,
) )
a.tag = self._tag_from_colorname(a.colorname or "")
annotations.append(a) annotations.append(a)
logger.debug( logger.debug(
f"Found {len(annotations)} " f"Found {len(annotations)} "
@ -61,15 +60,6 @@ class PdfExtractor:
return magic.from_file(fname, mime=True) == "application/pdf" return magic.from_file(fname, mime=True) == "application/pdf"
def _tag_from_colorname(self, colorname: str) -> str:
color_mapping: dict[str, str] = self._getdict("tags", "plugins.extract")
if not color_mapping:
return ""
return color_mapping.get(colorname, "")
def _retrieve_annotation_content(self, def _retrieve_annotation_content(self,
page: fitz.Page, annotation: fitz.Annot page: fitz.Page, annotation: fitz.Annot
) -> tuple[str | None, str | None]: ) -> tuple[str | None, str | None]:
@ -104,29 +94,3 @@ class PdfExtractor:
return (None, None) return (None, None)
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
def _getdict(self, key: str, section: Optional[str] = None) -> dict[str, str]:
"""Dict getter
:returns: A python dict
:raises SyntaxError: Whenever the parsed syntax is either not a valid
python object or a valid python dict.
"""
rawvalue: Any = papis.config.general_get(key, section=section)
if isinstance(rawvalue, dict):
return rawvalue
try:
rawvalue = eval(rawvalue)
except Exception:
raise SyntaxError(
"The key '{}' must be a valid Python object: {}".format(key, rawvalue)
)
else:
if not isinstance(rawvalue, dict):
raise SyntaxError(
"The key '{}' must be a valid Python dict. Got: {} (type {!r})".format(
key, rawvalue, type(rawvalue).__name__
)
)
return rawvalue