From 256117d45175ded87c3887ec5ae74c05442f8780 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 29 Aug 2023 13:49:22 +0200 Subject: [PATCH] Add mustache templating Added mustache templating engine to be able to provide custom formatting strings. --- papis_extract/annotation_data.py | 42 +++++++++++----------------- poetry.lock | 13 ++++++++- pyproject.toml | 1 + tests/test_annotation.py | 48 ++++++++++++++++++++++++++++++-- 4 files changed, 75 insertions(+), 29 deletions(-) diff --git a/papis_extract/annotation_data.py b/papis_extract/annotation_data.py index 298f964..26e63cd 100644 --- a/papis_extract/annotation_data.py +++ b/papis_extract/annotation_data.py @@ -1,9 +1,9 @@ -import re import math from dataclasses import dataclass, field import papis.config from papis.document import Document +import chevron TEXT_SIMILARITY_MINIMUM = 0.75 COLOR_SIMILARITY_MINIMUM = 0.833 @@ -23,12 +23,13 @@ class Annotation: """A PDF annotation object""" file: str - type: str = "Highlight" - text: str = "" - content: str = "" - page: int = 1 colors: dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)}) + content: str = "" + page: int = 0 tag: str = "" + text: str = "" + type: str = "Highlight" + minimum_similarity_color: float = 1.0 def format(self, formatting): """Return a formatted string of the annotation. @@ -37,27 +38,15 @@ class Annotation: formatted with the correct marker replacements and removals, ready for display or writing. """ - output = formatting - replacements = { - r"{quote}": self.text, - r"{note}": self.content, - r"{page}": str(self.page), - r"{newline}": "\n", - r"{tag}": self.tag, + data = { + "file": self.file, + "quote": self.text, + "note": self.content, + "page": self.page, + "tag": self.tag, + "type": self.type, } - pattern = re.compile( - "|".join( - [re.escape(k) for k in sorted(replacements, key=len, reverse=True)] - ), - flags=re.DOTALL, - ) - patt_quote_container = re.compile(r"{%quote_container(.*?)%}") - patt_note_container = re.compile(r"{%note_container(.*?)%}") - patt_tag_container = re.compile(r"{%tag_container(.*?)%}") - output = patt_quote_container.sub(r"\1" if self.text else "", output) - output = patt_note_container.sub(r"\1" if self.content else "", output) - output = patt_tag_container.sub(r"\1" if self.tag else "", output) - return pattern.sub(lambda x: replacements[x.group(0)], output) + return chevron.render(formatting, data) @property def colorname(self): @@ -73,9 +62,10 @@ class Annotation: minimum_similarity = ( papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0 ) + minimum_similarity = self.minimum_similarity_color for name, values in COLORS.items(): similarity_ratio = self._color_similarity_ratio(values, annot_colors) - if similarity_ratio > minimum_similarity: + if similarity_ratio >= minimum_similarity: minimum_similarity = similarity_ratio nearest = name return nearest diff --git a/poetry.lock b/poetry.lock index 8d6734d..1336bfc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -147,6 +147,17 @@ files = [ {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, ] +[[package]] +name = "chevron" +version = "0.14.0" +description = "Mustache templating language renderer" +optional = false +python-versions = "*" +files = [ + {file = "chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443"}, + {file = "chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf"}, +] + [[package]] name = "click" version = "8.1.7" @@ -980,4 +991,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "d519605837788792d06ffc7bca7a92b315612ca6052227c53c558ec49dffec9f" +content-hash = "a3af36ed2941235df158c20ba9b66bdf5a0af0554235fd004ec77c3e88def3c3" diff --git a/pyproject.toml b/pyproject.toml index 4ca3257..8ee68a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ papis = "^0.13" click = "^8.1.7" whoosh = "^2.7.4" python-magic = "^0.4.27" +chevron = "^0.14.0" [tool.poetry.plugins."papis.command"] extract = "papis_extract:main" diff --git a/tests/test_annotation.py b/tests/test_annotation.py index 72c7a75..d9f6188 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -1,7 +1,51 @@ +import pytest from papis_extract.annotation_data import Annotation -def test_matches_colorname_exact(): - sut = Annotation("testfile", colors={"stroke": (1.0, 0.0, 0.0)}) +@pytest.mark.parametrize( + "fmt_string,expected", + [ + ("{{quote}}", "I am the text value"), + ( + "> {{quote}}\n{{#note}}Note: {{note}}{{/note}}", + "> I am the text value\nNote: Whereas I represent the note", + ), + ( + "{{#note}}Note: {{note}}{{/note}}{{#page}}, p. {{page}}{{/page}}", + "Note: Whereas I represent the note", + ), + ], +) +def test_formatting(fmt_string, expected): + sut = Annotation( + "myfile", + text="I am the text value", + content="Whereas I represent the note", + ) + + assert sut.format(fmt_string) == expected + +def test_colorname_matches_exact(): + sut = Annotation( + "testfile", colors={"stroke": (1.0,0.0,0.0)}, minimum_similarity_color=1.0 + ) + c_name = sut.colorname + assert c_name == "red" + +# TODO inject closeness value instead of relying on default +@pytest.mark.parametrize( + "color_value", + [ + (1.0, 0.0, 0.0), + (0.9, 0.0, 0.0), + (0.8, 0.0, 0.0), + (0.7, 0.0, 0.0), + (0.51, 0.0, 0.0), + ], +) +def test_matches_inexact_colorname(color_value): + sut = Annotation( + "testfile", colors={"stroke": color_value}, minimum_similarity_color=0.833 + ) c_name = sut.colorname assert c_name == "red"