Add configurable minimum color similarity
This commit is contained in:
parent
2b476206a2
commit
d0027ca5d1
1 changed files with 37 additions and 11 deletions
|
@ -18,6 +18,8 @@ from pubs.content import check_file, read_text_file, write_file
|
||||||
from pubs.query import get_paper_filter
|
from pubs.query import get_paper_filter
|
||||||
|
|
||||||
CONFIRMATION_PAPER_THRESHOLD = 5
|
CONFIRMATION_PAPER_THRESHOLD = 5
|
||||||
|
TEXT_SIMILARITY_MINIMUM = 0.75
|
||||||
|
COLOR_SIMILARITY_MINIMUM = 0.833
|
||||||
|
|
||||||
COLORS = {
|
COLORS = {
|
||||||
"red": (1, 0, 0),
|
"red": (1, 0, 0),
|
||||||
|
@ -42,7 +44,13 @@ class Annotation:
|
||||||
colors: Dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)})
|
colors: Dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)})
|
||||||
tag: str = ""
|
tag: str = ""
|
||||||
|
|
||||||
def formatted(self, formatting):
|
def format(self, formatting):
|
||||||
|
"""Return a formatted string of the annotation.
|
||||||
|
|
||||||
|
Given a provided formatting pattern, this method returns the annotation
|
||||||
|
formatted with the correct marker replacements and removals, ready
|
||||||
|
for display or writing.
|
||||||
|
"""
|
||||||
output = formatting
|
output = formatting
|
||||||
replacements = {
|
replacements = {
|
||||||
r"{quote}": self.text,
|
r"{quote}": self.text,
|
||||||
|
@ -67,18 +75,31 @@ class Annotation:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def colorname(self):
|
def colorname(self):
|
||||||
|
"""Return the stringified version of the annotation color.
|
||||||
|
|
||||||
|
Finds the closest named color to the annotation and returns it.
|
||||||
|
"""
|
||||||
annot_colors = (
|
annot_colors = (
|
||||||
self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0)
|
self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0)
|
||||||
)
|
)
|
||||||
nearest = None
|
nearest = None
|
||||||
smallest_dist = 2.0
|
minimum_similarity = COLOR_SIMILARITY_THRESHOLD
|
||||||
for name, values in COLORS.items():
|
for name, values in COLORS.items():
|
||||||
dist = math.dist([*values], [*annot_colors])
|
similarity_ratio = self._color_similarity_ratio(values, annot_colors)
|
||||||
if dist < smallest_dist:
|
if similarity_ratio > minimum_similarity:
|
||||||
smallest_dist = dist
|
minimum_similarity = similarity_ratio
|
||||||
nearest = name
|
nearest = name
|
||||||
return nearest
|
return nearest
|
||||||
|
|
||||||
|
def _color_similarity_ratio(self, color_one, color_two):
|
||||||
|
"""Return the similarity of two colors between 0 and 1.
|
||||||
|
|
||||||
|
Takes two rgb color tuples made of floats between 0 and 1, e.g. (1, 0.65, 0) for orange,
|
||||||
|
and returns the similarity between them, with 1 being the same color and 0 being the
|
||||||
|
difference between full black and full white, as a float.
|
||||||
|
"""
|
||||||
|
return 1 - (abs(math.dist([*color_one], [*color_two])) / 3)
|
||||||
|
|
||||||
|
|
||||||
class ExtractPlugin(PapersPlugin):
|
class ExtractPlugin(PapersPlugin):
|
||||||
"""Extract annotations from any pdf document.
|
"""Extract annotations from any pdf document.
|
||||||
|
@ -105,7 +126,12 @@ class ExtractPlugin(PapersPlugin):
|
||||||
|
|
||||||
settings = conf["plugins"].get("extract", {})
|
settings = conf["plugins"].get("extract", {})
|
||||||
self.on_import = settings.get("on_import", False)
|
self.on_import = settings.get("on_import", False)
|
||||||
self.minimum_similarity = float(settings.get("minimum_similarity", 0.75))
|
self.minimum_similarity = float(
|
||||||
|
settings.get("minimum_similarity", TEXT_SIMILARITY_MINIMUM)
|
||||||
|
)
|
||||||
|
self.minimum_color_similarity = float(
|
||||||
|
settings.get("minimum_color_similarity", COLOR_SIMILARITY_MINIMUM)
|
||||||
|
)
|
||||||
self.formatting = settings.get(
|
self.formatting = settings.get(
|
||||||
"formatting",
|
"formatting",
|
||||||
"{%quote_container> {quote} %}[{page}]{%note_container{newline}Note: {note} %}{%tag_container #{tag}%}",
|
"{%quote_container> {quote} %}[{page}]{%note_container{newline}Note: {note} %}{%tag_container #{tag}%}",
|
||||||
|
@ -193,7 +219,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
return papers_annotated
|
return papers_annotated
|
||||||
|
|
||||||
def tag_from_colorname(self, colorname):
|
def tag_from_colorname(self, colorname):
|
||||||
return self.color_mapping.get(colorname)
|
return self.color_mapping.get(colorname, "")
|
||||||
|
|
||||||
def _gather_papers(self, conf, args):
|
def _gather_papers(self, conf, args):
|
||||||
"""Get all papers for citekeys.
|
"""Get all papers for citekeys.
|
||||||
|
@ -307,7 +333,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
output += f"------ {citekey} ------\n"
|
output += f"------ {citekey} ------\n"
|
||||||
for annotation in annotations:
|
for annotation in annotations:
|
||||||
# for annot in annotations:
|
# for annot in annotations:
|
||||||
output += f"{annotation.formatted(self.formatting)}\n"
|
output += f"{annotation.format(self.formatting)}\n"
|
||||||
output += "\n"
|
output += "\n"
|
||||||
self.ui.message(output)
|
self.ui.message(output)
|
||||||
|
|
||||||
|
@ -339,7 +365,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
"""
|
"""
|
||||||
output = "# Annotations\n\n"
|
output = "# Annotations\n\n"
|
||||||
for annotation in annotations:
|
for annotation in annotations:
|
||||||
output += f"{annotation.formatted(self.formatting)}\n\n"
|
output += f"{annotation.format(self.formatting)}\n\n"
|
||||||
write_file(notepath, output, "w")
|
write_file(notepath, output, "w")
|
||||||
|
|
||||||
def _append_to_note(self, notepath, annotations):
|
def _append_to_note(self, notepath, annotations):
|
||||||
|
@ -351,14 +377,14 @@ class ExtractPlugin(PapersPlugin):
|
||||||
existing = read_text_file(notepath)
|
existing = read_text_file(notepath)
|
||||||
# removed annotations already found in the note
|
# removed annotations already found in the note
|
||||||
existing_dropped = [
|
existing_dropped = [
|
||||||
x for x in annotations if x.formatted(self.formatting) not in existing
|
x for x in annotations if x.format(self.formatting) not in existing
|
||||||
]
|
]
|
||||||
if not existing_dropped:
|
if not existing_dropped:
|
||||||
return
|
return
|
||||||
|
|
||||||
output = ""
|
output = ""
|
||||||
for annotation in existing_dropped:
|
for annotation in existing_dropped:
|
||||||
output += f"{annotation.formatted(self.formatting)}\n\n"
|
output += f"{annotation.format(self.formatting)}\n\n"
|
||||||
write_file(notepath, output, "a")
|
write_file(notepath, output, "a")
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue