6 changed files with 84 additions and 148 deletions
--- a/papis_extract/init.py
+++ b/papis_extract/init.py
@ -1,14 +1,19 @@
+from pathlib import Path
+import re
+
 import click
+import fitz_new as fitz
+import magic
 import papis.cli
 import papis.config
 import papis.document
+from papis.document import Document
 import papis.logging
 import papis.notes
 import papis.strings
-from papis.document import Document

 from papis_extract import extractor, exporter
-from papis_extract.annotation_data import AnnotatedDocument
+from papis_extract.annotation_data import Annotation, AnnotatedDocument

 logger = papis.logging.get_logger(__name__)

@ -71,19 +76,50 @@ def main(
        logger.warning(papis.strings.no_documents_retrieved_message)
        return

-    run(documents, edit=manual, write=write, git=git)
-
-
-def run(
-    documents: list[Document],
-    edit: bool = False,
-    write: bool = False,
-    git: bool = False,
-) -> None:
-
-    doc_annotations: list[AnnotatedDocument] = extractor.start(documents)
+    doc_annotations: list[AnnotatedDocument] = _get_annotations_for_documents(documents)

    if write:
-        exporter.to_notes(doc_annotations, edit=edit, git=git)
+        exporter.to_notes(doc_annotations, edit=manual, git=git)
    else:
        exporter.to_stdout(doc_annotations)
+
+    # note_file: Path = Path(papis.notes.notes_path_ensured(documents[0]))
+
+
+def is_pdf(fname: Path) -> bool:
+    return magic.from_file(fname, mime=True) == "application/pdf"
+
+
+def _get_annotations_for_documents(
+    documents: list[Document],
+) -> list[AnnotatedDocument]:
+    output: list[AnnotatedDocument] = []
+    for doc in documents:
+        annotations: list[Annotation] = []
+        found_pdf: bool = False
+        for file in doc.get_files():
+            fname = Path(file)
+            if not _is_file_processable(fname):
+                break
+            found_pdf = True
+
+            try:
+                annotations.extend(extractor.start(fname))
+            except fitz.FileDataError as e:
+                print(f"File structure errors for {file}.\n{e}")
+
+        if not found_pdf:
+            # have to remove curlys or papis logger gets upset
+            desc = re.sub("[{}]", "", papis.document.describe(doc))
+            logger.warning("Did not find suitable PDF file for document: " f"{desc}")
+        output.append(AnnotatedDocument(doc, annotations))
+    return output
+
+
+def _is_file_processable(fname: Path) -> bool:
+    if not fname.is_file():
+        logger.error(f"File {str(fname)} not readable.")
+        return False
+    if not is_pdf(fname):
+        return False
+    return True
--- a/papis_extract/annotation_data.py
+++ b/papis_extract/annotation_data.py
@ -1,9 +1,9 @@
+import re
 import math
 from dataclasses import dataclass, field

 import papis.config
 from papis.document import Document
-import chevron

 TEXT_SIMILARITY_MINIMUM = 0.75
 COLOR_SIMILARITY_MINIMUM = 0.833
@ -23,13 +23,12 @@ class Annotation:
    """A PDF annotation object"""

    file: str
-    colors: tuple[float, float, float] = field(default_factory=lambda: (0.0, 0.0, 0.0))
-    content: str = ""
-    page: int = 0
-    tag: str = ""
-    text: str = ""
    type: str = "Highlight"
-    minimum_similarity_color: float = 1.0
+    text: str = ""
+    content: str = ""
+    page: int = 1
+    colors: dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)})
+    tag: str = ""

    def format(self, formatting):
        """Return a formatted string of the annotation.
@ -38,15 +37,27 @@ class Annotation:
        formatted with the correct marker replacements and removals, ready
        for display or writing.
        """
-        data = {
-            "file": self.file,
-            "quote": self.text,
-            "note": self.content,
-            "page": self.page,
-            "tag": self.tag,
-            "type": self.type,
+        output = formatting
+        replacements = {
+            r"{quote}": self.text,
+            r"{note}": self.content,
+            r"{page}": str(self.page),
+            r"{newline}": "\n",
+            r"{tag}": self.tag,
        }
-        return chevron.render(formatting, data)
+        pattern = re.compile(
+            "|".join(
+                [re.escape(k) for k in sorted(replacements, key=len, reverse=True)]
+            ),
+            flags=re.DOTALL,
+        )
+        patt_quote_container = re.compile(r"{%quote_container(.*?)%}")
+        patt_note_container = re.compile(r"{%note_container(.*?)%}")
+        patt_tag_container = re.compile(r"{%tag_container(.*?)%}")
+        output = patt_quote_container.sub(r"\1" if self.text else "", output)
+        output = patt_note_container.sub(r"\1" if self.content else "", output)
+        output = patt_tag_container.sub(r"\1" if self.tag else "", output)
+        return pattern.sub(lambda x: replacements[x.group(0)], output)

    @property
    def colorname(self):
@ -56,16 +67,15 @@ class Annotation:
        using euclidian distance between the two color vectors.
        """
        annot_colors = (
-            self.colors or (0.0, 0.0, 0.0)
+            self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0)
        )
        nearest = None
        minimum_similarity = (
            papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0
        )
-        minimum_similarity = self.minimum_similarity_color
        for name, values in COLORS.items():
            similarity_ratio = self._color_similarity_ratio(values, annot_colors)
-            if similarity_ratio >= minimum_similarity:
+            if similarity_ratio > minimum_similarity:
                minimum_similarity = similarity_ratio
                nearest = name
        return nearest
--- a/papis_extract/extractor.py
+++ b/papis_extract/extractor.py
@ -1,53 +1,17 @@
-import re
 from pathlib import Path
 from typing import Any, Optional

 import Levenshtein
-import magic
 import fitz_new as fitz
 import papis.logging
 import papis.config
-import papis.document
-from papis.document import Document

-from papis_extract.annotation_data import Annotation, AnnotatedDocument
+from papis_extract.annotation_data import Annotation

 logger = papis.logging.get_logger(__name__)


-def start(
-    documents: list[Document],
-) -> list[AnnotatedDocument]:
-    """Extract all annotations from passed documents.
-
-    Returns all annotations contained in the papis
-    documents passed in.
-    """
-
-    output: list[AnnotatedDocument] = []
-    for doc in documents:
-        annotations: list[Annotation] = []
-        found_pdf: bool = False
-        for file in doc.get_files():
-            fname = Path(file)
-            if not _is_file_processable(fname):
-                break
-            found_pdf = True
-
-            try:
-                annotations.extend(extract(fname))
-            except fitz.FileDataError as e:
-                print(f"File structure errors for {file}.\n{e}")
-
-        if not found_pdf:
-            # have to remove curlys or papis logger gets upset
-            desc = re.sub("[{}]", "", papis.document.describe(doc))
-            logger.warning("Did not find suitable PDF file for document: " f"{desc}")
-        output.append(AnnotatedDocument(doc, annotations))
-    return output
-
-
-def extract(filename: Path) -> list[Annotation]:
+def start(filename: Path) -> list[Annotation]:
    """Extract annotations from a file.

    Returns all readable annotations contained in the file
@ -60,16 +24,11 @@ def extract(filename: Path) -> list[Annotation]:
                quote, note = _retrieve_annotation_content(page, annot)
                if not quote and not note:
                    continue
-                col = (
-                    annot.colors.get("fill")
-                    or annot.colors.get("stroke")
-                    or (0.0, 0.0, 0.0)
-                )
                a = Annotation(
                    file=str(filename),
                    text=quote or "",
                    content=note or "",
-                    colors=col,
+                    colors=annot.colors,
                    type=annot.type[1],
                    page=(page.number or 0) + 1,
                )
@ -82,19 +41,6 @@ def extract(filename: Path) -> list[Annotation]:
    return annotations


-def is_pdf(fname: Path) -> bool:
-    return magic.from_file(fname, mime=True) == "application/pdf"
-
-
-def _is_file_processable(fname: Path) -> bool:
-    if not fname.is_file():
-        logger.error(f"File {str(fname)} not readable.")
-        return False
-    if not is_pdf(fname):
-        return False
-    return True
-
-
 def _tag_from_colorname(colorname: str) -> str:
    color_mapping: dict[str, str] = getdict("tags", "plugins.extract")
    if not color_mapping:
--- a/poetry.lock
+++ b/poetry.lock
@ -147,17 +147,6 @@ files = [
    {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
 ]

-[[package]]
-name = "chevron"
-version = "0.14.0"
-description = "Mustache templating language renderer"
-optional = false
-python-versions = "*"
-files = [
-    {file = "chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443"},
-    {file = "chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf"},
-]
-
 [[package]]
 name = "click"
 version = "8.1.7"
@ -991,4 +980,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "a3af36ed2941235df158c20ba9b66bdf5a0af0554235fd004ec77c3e88def3c3"
+content-hash = "d519605837788792d06ffc7bca7a92b315612ca6052227c53c558ec49dffec9f"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -14,7 +14,6 @@ papis = "^0.13"
 click = "^8.1.7"
 whoosh = "^2.7.4"
 python-magic = "^0.4.27"
-chevron = "^0.14.0"

 [tool.poetry.plugins."papis.command"]
 extract = "papis_extract:main"
--- a/tests/test_annotation.py
+++ b/tests/test_annotation.py
@ -1,51 +1,7 @@
-import pytest
 from papis_extract.annotation_data import Annotation


-@pytest.mark.parametrize(
-    "fmt_string,expected",
-    [
-        ("{{quote}}", "I am the text value"),
-        (
-            "> {{quote}}\n{{#note}}Note: {{note}}{{/note}}",
-            "> I am the text value\nNote: Whereas I represent the note",
-        ),
-        (
-            "{{#note}}Note: {{note}}{{/note}}{{#page}}, p. {{page}}{{/page}}",
-            "Note: Whereas I represent the note",
-        ),
-    ],
-)
-def test_formatting(fmt_string, expected):
-    sut = Annotation(
-        "myfile",
-        text="I am the text value",
-        content="Whereas I represent the note",
-    )
-
-    assert sut.format(fmt_string) == expected
-
-def test_colorname_matches_exact():
-    sut = Annotation(
-        "testfile", colors=(1.0,0.0,0.0), minimum_similarity_color=1.0
-    )
-    c_name = sut.colorname
-    assert c_name == "red"
-
-# TODO inject closeness value instead of relying on default
-@pytest.mark.parametrize(
-    "color_value",
-    [
-        (1.0, 0.0, 0.0),
-        (0.9, 0.0, 0.0),
-        (0.8, 0.0, 0.0),
-        (0.7, 0.0, 0.0),
-        (0.51, 0.0, 0.0),
-    ],
-)
-def test_matches_inexact_colorname(color_value):
-    sut = Annotation(
-        "testfile", colors=color_value, minimum_similarity_color=0.833
-    )
+def test_matches_colorname_exact():
+    sut = Annotation("testfile", colors={"stroke": (1.0, 0.0, 0.0)})
    c_name = sut.colorname
    assert c_name == "red"