Change formatting configuration style for containers

This commit is contained in:
Marty Oehme 2022-12-24 23:32:34 +01:00
parent 5bddf97e58
commit 2b476206a2
Signed by: Marty
GPG Key ID: 73BA40D5AFAF49C9
1 changed files with 17 additions and 15 deletions

View File

@ -2,8 +2,8 @@ import os
import re import re
import argparse import argparse
import math import math
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import Tuple from typing import Dict
import fitz import fitz
import Levenshtein import Levenshtein
@ -39,8 +39,8 @@ class Annotation:
text: str = "" text: str = ""
content: str = "" content: str = ""
page: int = 1 page: int = 1
colors: Tuple = (0.0, 0.0, 0.0) colors: Dict = field(default_factory=lambda: {"stroke": (0.0, 0.0, 0.0)})
tag: str = None tag: str = ""
def formatted(self, formatting): def formatted(self, formatting):
output = formatting output = formatting
@ -51,25 +51,25 @@ class Annotation:
r"{newline}": "\n", r"{newline}": "\n",
r"{tag}": self.tag, r"{tag}": self.tag,
} }
if self.text == "":
output = re.sub(r"{quote_begin}.*{quote_end}", "", output)
if self.content == "":
output = re.sub(r"{note_begin}.*{note_end}", "", output)
output = re.sub(r"{note_begin}", "", output)
output = re.sub(r"{note_end}", "", output)
output = re.sub(r"{quote_begin}", "", output)
output = re.sub(r"{quote_end}", "", output)
pattern = re.compile( pattern = re.compile(
"|".join( "|".join(
[re.escape(k) for k in sorted(replacements, key=len, reverse=True)] [re.escape(k) for k in sorted(replacements, key=len, reverse=True)]
), ),
flags=re.DOTALL, flags=re.DOTALL,
) )
patt_quote_container = re.compile(r"{%quote_container(.*?)%}")
patt_note_container = re.compile(r"{%note_container(.*?)%}")
patt_tag_container = re.compile(r"{%tag_container(.*?)%}")
output = patt_quote_container.sub(r"\1" if self.text else "", output)
output = patt_note_container.sub(r"\1" if self.content else "", output)
output = patt_tag_container.sub(r"\1" if self.tag else "", output)
return pattern.sub(lambda x: replacements[x.group(0)], output) return pattern.sub(lambda x: replacements[x.group(0)], output)
@property @property
def colorname(self): def colorname(self):
annot_colors = self.colors.get("stroke") or self.colors.get("fill") annot_colors = (
self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0)
)
nearest = None nearest = None
smallest_dist = 2.0 smallest_dist = 2.0
for name, values in COLORS.items(): for name, values in COLORS.items():
@ -108,7 +108,7 @@ class ExtractPlugin(PapersPlugin):
self.minimum_similarity = float(settings.get("minimum_similarity", 0.75)) self.minimum_similarity = float(settings.get("minimum_similarity", 0.75))
self.formatting = settings.get( self.formatting = settings.get(
"formatting", "formatting",
"{newline}{quote_begin}> {quote} {quote_end}[{page}]{note_begin}{newline}Note: {note} {note_end} #{tag}", "{%quote_container> {quote} %}[{page}]{%note_container{newline}Note: {note} %}{%tag_container #{tag}%}",
) )
self.color_mapping = settings.get("color_mapping", {}) self.color_mapping = settings.get("color_mapping", {})
@ -350,7 +350,9 @@ class ExtractPlugin(PapersPlugin):
""" """
existing = read_text_file(notepath) existing = read_text_file(notepath)
# removed annotations already found in the note # removed annotations already found in the note
existing_dropped = [x for x in annotations if x.formatted(self.formatting) not in existing] existing_dropped = [
x for x in annotations if x.formatted(self.formatting) not in existing
]
if not existing_dropped: if not existing_dropped:
return return