refactor: Move tagging by color to Annotation
This commit is contained in:
parent
3bd6247888
commit
ddb34fca7b
2 changed files with 51 additions and 40 deletions
|
@ -1,9 +1,10 @@
|
||||||
import math
|
import math
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import chevron
|
||||||
import papis.config
|
import papis.config
|
||||||
from papis.document import Document
|
from papis.document import Document
|
||||||
import chevron
|
|
||||||
|
|
||||||
TEXT_SIMILARITY_MINIMUM = 0.75
|
TEXT_SIMILARITY_MINIMUM = 0.75
|
||||||
COLOR_SIMILARITY_MINIMUM = 0.833
|
COLOR_SIMILARITY_MINIMUM = 0.833
|
||||||
|
@ -17,7 +18,6 @@ COLORS = {
|
||||||
"orange": (1, 0.65, 0),
|
"orange": (1, 0.65, 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Annotation:
|
class Annotation:
|
||||||
"""A PDF annotation object.
|
"""A PDF annotation object.
|
||||||
|
@ -26,7 +26,7 @@ class Annotation:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
file: str
|
file: str
|
||||||
colors: tuple[float, float, float] = field(default_factory=lambda: (0.0, 0.0, 0.0))
|
color: tuple[float, float, float]
|
||||||
content: str = ""
|
content: str = ""
|
||||||
note: str = ""
|
note: str = ""
|
||||||
page: int = 0
|
page: int = 0
|
||||||
|
@ -34,6 +34,10 @@ class Annotation:
|
||||||
type: str = "Highlight"
|
type: str = "Highlight"
|
||||||
minimum_similarity_color: float = 1.0
|
minimum_similarity_color: float = 1.0
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
self._color = self.color or field(default_factory=lambda: (0.0, 0.0, 0.0))
|
||||||
|
self.tag = self.tag or self._tag_from_colorname(self.colorname or "")
|
||||||
|
|
||||||
def format(self, formatting: str, doc: Document = Document()):
|
def format(self, formatting: str, doc: Document = Document()):
|
||||||
"""Return a formatted string of the annotation.
|
"""Return a formatted string of the annotation.
|
||||||
|
|
||||||
|
@ -52,6 +56,15 @@ class Annotation:
|
||||||
}
|
}
|
||||||
return chevron.render(formatting, data)
|
return chevron.render(formatting, data)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def color(self):
|
||||||
|
return self._color
|
||||||
|
|
||||||
|
@color.setter
|
||||||
|
def color(self, value: tuple[float, float, float]):
|
||||||
|
self._color = value
|
||||||
|
self.tag = self._tag_from_colorname(self.colorname or "")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def colorname(self):
|
def colorname(self):
|
||||||
"""Return the stringified version of the annotation color.
|
"""Return the stringified version of the annotation color.
|
||||||
|
@ -59,7 +72,7 @@ class Annotation:
|
||||||
Finds the closest named color to the annotation and returns it,
|
Finds the closest named color to the annotation and returns it,
|
||||||
using euclidian distance between the two color vectors.
|
using euclidian distance between the two color vectors.
|
||||||
"""
|
"""
|
||||||
annot_colors = self.colors or (0.0, 0.0, 0.0)
|
annot_colors = self.color or (0.0, 0.0, 0.0)
|
||||||
nearest = None
|
nearest = None
|
||||||
minimum_similarity = (
|
minimum_similarity = (
|
||||||
papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0
|
papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0
|
||||||
|
@ -81,3 +94,37 @@ class Annotation:
|
||||||
difference between full black and full white, as a float.
|
difference between full black and full white, as a float.
|
||||||
"""
|
"""
|
||||||
return 1 - (abs(math.dist([*color_one], [*color_two])) / 3)
|
return 1 - (abs(math.dist([*color_one], [*color_two])) / 3)
|
||||||
|
|
||||||
|
def _tag_from_colorname(self, colorname: str) -> str:
|
||||||
|
color_mapping: dict[str, str] = self._getdict("tags", "plugins.extract")
|
||||||
|
if not color_mapping:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return color_mapping.get(colorname, "")
|
||||||
|
|
||||||
|
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
|
||||||
|
def _getdict(self, key: str, section: Optional[str] = None) -> dict[str, str]:
|
||||||
|
"""Dict getter
|
||||||
|
|
||||||
|
:returns: A python dict
|
||||||
|
:raises SyntaxError: Whenever the parsed syntax is either not a valid
|
||||||
|
python object or a valid python dict.
|
||||||
|
"""
|
||||||
|
rawvalue: Any = papis.config.general_get(key, section=section)
|
||||||
|
if isinstance(rawvalue, dict):
|
||||||
|
return rawvalue
|
||||||
|
try:
|
||||||
|
rawvalue = eval(rawvalue)
|
||||||
|
except Exception:
|
||||||
|
raise SyntaxError(
|
||||||
|
"The key '{}' must be a valid Python object: {}".format(key, rawvalue)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if not isinstance(rawvalue, dict):
|
||||||
|
raise SyntaxError(
|
||||||
|
"The key '{}' must be a valid Python dict. Got: {} (type {!r})".format(
|
||||||
|
key, rawvalue, type(rawvalue).__name__
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return rawvalue
|
||||||
|
|
|
@ -47,7 +47,6 @@ class PdfExtractor:
|
||||||
type=annot.type[1],
|
type=annot.type[1],
|
||||||
page=(page.number or 0) + 1,
|
page=(page.number or 0) + 1,
|
||||||
)
|
)
|
||||||
a.tag = self._tag_from_colorname(a.colorname or "")
|
|
||||||
annotations.append(a)
|
annotations.append(a)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Found {len(annotations)} "
|
f"Found {len(annotations)} "
|
||||||
|
@ -61,15 +60,6 @@ class PdfExtractor:
|
||||||
return magic.from_file(fname, mime=True) == "application/pdf"
|
return magic.from_file(fname, mime=True) == "application/pdf"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _tag_from_colorname(self, colorname: str) -> str:
|
|
||||||
color_mapping: dict[str, str] = self._getdict("tags", "plugins.extract")
|
|
||||||
if not color_mapping:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
return color_mapping.get(colorname, "")
|
|
||||||
|
|
||||||
|
|
||||||
def _retrieve_annotation_content(self,
|
def _retrieve_annotation_content(self,
|
||||||
page: fitz.Page, annotation: fitz.Annot
|
page: fitz.Page, annotation: fitz.Annot
|
||||||
) -> tuple[str | None, str | None]:
|
) -> tuple[str | None, str | None]:
|
||||||
|
@ -104,29 +94,3 @@ class PdfExtractor:
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
|
|
||||||
def _getdict(self, key: str, section: Optional[str] = None) -> dict[str, str]:
|
|
||||||
"""Dict getter
|
|
||||||
|
|
||||||
:returns: A python dict
|
|
||||||
:raises SyntaxError: Whenever the parsed syntax is either not a valid
|
|
||||||
python object or a valid python dict.
|
|
||||||
"""
|
|
||||||
rawvalue: Any = papis.config.general_get(key, section=section)
|
|
||||||
if isinstance(rawvalue, dict):
|
|
||||||
return rawvalue
|
|
||||||
try:
|
|
||||||
rawvalue = eval(rawvalue)
|
|
||||||
except Exception:
|
|
||||||
raise SyntaxError(
|
|
||||||
"The key '{}' must be a valid Python object: {}".format(key, rawvalue)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if not isinstance(rawvalue, dict):
|
|
||||||
raise SyntaxError(
|
|
||||||
"The key '{}' must be a valid Python dict. Got: {} (type {!r})".format(
|
|
||||||
key, rawvalue, type(rawvalue).__name__
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return rawvalue
|
|
||||||
|
|
Loading…
Reference in a new issue