diff --git a/papis_extract/annotation_data.py b/papis_extract/annotation_data.py index 26e63cd..fa06e0d 100644 --- a/papis_extract/annotation_data.py +++ b/papis_extract/annotation_data.py @@ -23,7 +23,7 @@ class Annotation: """A PDF annotation object""" file: str - colors: dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)}) + colors: tuple[float, float, float] = field(default_factory=lambda: (0.0, 0.0, 0.0)) content: str = "" page: int = 0 tag: str = "" @@ -56,7 +56,7 @@ class Annotation: using euclidian distance between the two color vectors. """ annot_colors = ( - self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0) + self.colors or (0.0, 0.0, 0.0) ) nearest = None minimum_similarity = ( diff --git a/papis_extract/extractor.py b/papis_extract/extractor.py index 51d6ade..a4bc536 100644 --- a/papis_extract/extractor.py +++ b/papis_extract/extractor.py @@ -14,12 +14,13 @@ from papis_extract.annotation_data import Annotation, AnnotatedDocument logger = papis.logging.get_logger(__name__) + def start( documents: list[Document], ) -> list[AnnotatedDocument]: """Extract all annotations from passed documents. - Returns all annotations contained in the papis + Returns all annotations contained in the papis documents passed in. """ @@ -45,6 +46,7 @@ def start( output.append(AnnotatedDocument(doc, annotations)) return output + def extract(filename: Path) -> list[Annotation]: """Extract annotations from a file. @@ -58,11 +60,16 @@ def extract(filename: Path) -> list[Annotation]: quote, note = _retrieve_annotation_content(page, annot) if not quote and not note: continue + col = ( + annot.colors.get("fill") + or annot.colors.get("stroke") + or (0.0, 0.0, 0.0) + ) a = Annotation( file=str(filename), text=quote or "", content=note or "", - colors=annot.colors, + colors=col, type=annot.type[1], page=(page.number or 0) + 1, ) @@ -79,8 +86,6 @@ def is_pdf(fname: Path) -> bool: return magic.from_file(fname, mime=True) == "application/pdf" - - def _is_file_processable(fname: Path) -> bool: if not fname.is_file(): logger.error(f"File {str(fname)} not readable.") @@ -89,6 +94,7 @@ def _is_file_processable(fname: Path) -> bool: return False return True + def _tag_from_colorname(colorname: str) -> str: color_mapping: dict[str, str] = getdict("tags", "plugins.extract") if not color_mapping: diff --git a/tests/test_annotation.py b/tests/test_annotation.py index d9f6188..542c3a8 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -27,7 +27,7 @@ def test_formatting(fmt_string, expected): def test_colorname_matches_exact(): sut = Annotation( - "testfile", colors={"stroke": (1.0,0.0,0.0)}, minimum_similarity_color=1.0 + "testfile", colors=(1.0,0.0,0.0), minimum_similarity_color=1.0 ) c_name = sut.colorname assert c_name == "red" @@ -45,7 +45,7 @@ def test_colorname_matches_exact(): ) def test_matches_inexact_colorname(color_value): sut = Annotation( - "testfile", colors={"stroke": color_value}, minimum_similarity_color=0.833 + "testfile", colors=color_value, minimum_similarity_color=0.833 ) c_name = sut.colorname assert c_name == "red"