Add mustache templating

Added mustache templating engine to be able to provide custom
formatting strings.
This commit is contained in:
Marty Oehme 2023-08-29 13:49:22 +02:00
parent e325b89c9b
commit 256117d451
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
4 changed files with 75 additions and 29 deletions

View file

@ -1,9 +1,9 @@
import re
import math import math
from dataclasses import dataclass, field from dataclasses import dataclass, field
import papis.config import papis.config
from papis.document import Document from papis.document import Document
import chevron
TEXT_SIMILARITY_MINIMUM = 0.75 TEXT_SIMILARITY_MINIMUM = 0.75
COLOR_SIMILARITY_MINIMUM = 0.833 COLOR_SIMILARITY_MINIMUM = 0.833
@ -23,12 +23,13 @@ class Annotation:
"""A PDF annotation object""" """A PDF annotation object"""
file: str file: str
type: str = "Highlight"
text: str = ""
content: str = ""
page: int = 1
colors: dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)}) colors: dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)})
content: str = ""
page: int = 0
tag: str = "" tag: str = ""
text: str = ""
type: str = "Highlight"
minimum_similarity_color: float = 1.0
def format(self, formatting): def format(self, formatting):
"""Return a formatted string of the annotation. """Return a formatted string of the annotation.
@ -37,27 +38,15 @@ class Annotation:
formatted with the correct marker replacements and removals, ready formatted with the correct marker replacements and removals, ready
for display or writing. for display or writing.
""" """
output = formatting data = {
replacements = { "file": self.file,
r"{quote}": self.text, "quote": self.text,
r"{note}": self.content, "note": self.content,
r"{page}": str(self.page), "page": self.page,
r"{newline}": "\n", "tag": self.tag,
r"{tag}": self.tag, "type": self.type,
} }
pattern = re.compile( return chevron.render(formatting, data)
"|".join(
[re.escape(k) for k in sorted(replacements, key=len, reverse=True)]
),
flags=re.DOTALL,
)
patt_quote_container = re.compile(r"{%quote_container(.*?)%}")
patt_note_container = re.compile(r"{%note_container(.*?)%}")
patt_tag_container = re.compile(r"{%tag_container(.*?)%}")
output = patt_quote_container.sub(r"\1" if self.text else "", output)
output = patt_note_container.sub(r"\1" if self.content else "", output)
output = patt_tag_container.sub(r"\1" if self.tag else "", output)
return pattern.sub(lambda x: replacements[x.group(0)], output)
@property @property
def colorname(self): def colorname(self):
@ -73,9 +62,10 @@ class Annotation:
minimum_similarity = ( minimum_similarity = (
papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0 papis.config.getfloat("minimum_similarity_color", "plugins.extract") or 1.0
) )
minimum_similarity = self.minimum_similarity_color
for name, values in COLORS.items(): for name, values in COLORS.items():
similarity_ratio = self._color_similarity_ratio(values, annot_colors) similarity_ratio = self._color_similarity_ratio(values, annot_colors)
if similarity_ratio > minimum_similarity: if similarity_ratio >= minimum_similarity:
minimum_similarity = similarity_ratio minimum_similarity = similarity_ratio
nearest = name nearest = name
return nearest return nearest

13
poetry.lock generated
View file

@ -147,6 +147,17 @@ files = [
{file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
] ]
[[package]]
name = "chevron"
version = "0.14.0"
description = "Mustache templating language renderer"
optional = false
python-versions = "*"
files = [
{file = "chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443"},
{file = "chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf"},
]
[[package]] [[package]]
name = "click" name = "click"
version = "8.1.7" version = "8.1.7"
@ -980,4 +991,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "d519605837788792d06ffc7bca7a92b315612ca6052227c53c558ec49dffec9f" content-hash = "a3af36ed2941235df158c20ba9b66bdf5a0af0554235fd004ec77c3e88def3c3"

View file

@ -14,6 +14,7 @@ papis = "^0.13"
click = "^8.1.7" click = "^8.1.7"
whoosh = "^2.7.4" whoosh = "^2.7.4"
python-magic = "^0.4.27" python-magic = "^0.4.27"
chevron = "^0.14.0"
[tool.poetry.plugins."papis.command"] [tool.poetry.plugins."papis.command"]
extract = "papis_extract:main" extract = "papis_extract:main"

View file

@ -1,7 +1,51 @@
import pytest
from papis_extract.annotation_data import Annotation from papis_extract.annotation_data import Annotation
def test_matches_colorname_exact(): @pytest.mark.parametrize(
sut = Annotation("testfile", colors={"stroke": (1.0, 0.0, 0.0)}) "fmt_string,expected",
[
("{{quote}}", "I am the text value"),
(
"> {{quote}}\n{{#note}}Note: {{note}}{{/note}}",
"> I am the text value\nNote: Whereas I represent the note",
),
(
"{{#note}}Note: {{note}}{{/note}}{{#page}}, p. {{page}}{{/page}}",
"Note: Whereas I represent the note",
),
],
)
def test_formatting(fmt_string, expected):
sut = Annotation(
"myfile",
text="I am the text value",
content="Whereas I represent the note",
)
assert sut.format(fmt_string) == expected
def test_colorname_matches_exact():
sut = Annotation(
"testfile", colors={"stroke": (1.0,0.0,0.0)}, minimum_similarity_color=1.0
)
c_name = sut.colorname
assert c_name == "red"
# TODO inject closeness value instead of relying on default
@pytest.mark.parametrize(
"color_value",
[
(1.0, 0.0, 0.0),
(0.9, 0.0, 0.0),
(0.8, 0.0, 0.0),
(0.7, 0.0, 0.0),
(0.51, 0.0, 0.0),
],
)
def test_matches_inexact_colorname(color_value):
sut = Annotation(
"testfile", colors={"stroke": color_value}, minimum_similarity_color=0.833
)
c_name = sut.colorname c_name = sut.colorname
assert c_name == "red" assert c_name == "red"