Add formatting configuration

This commit is contained in:
Marty Oehme 2022-12-22 23:47:50 +01:00
parent e3aacc4b15
commit a3e2d8693d
Signed by: Marty
GPG key ID: 73BA40D5AFAF49C9

View file

@ -43,11 +43,10 @@ class ExtractPlugin(PapersPlugin):
self.minimum_similarity = float( self.minimum_similarity = float(
conf["plugins"].get("extract", {}).get("minimum_similarity", 0.75) conf["plugins"].get("extract", {}).get("minimum_similarity", 0.75)
) )
self.highlight_prefix = ( self.formatting = (
conf["plugins"].get("extract", {}).get("quote_prefix", "> ") conf["plugins"]
) .get("extract", {})
self.note_prefix = ( .get("formatting", "> {quote} [{page}]\nNote: {note}")
conf["plugins"].get("extract", {}).get("note_prefix", "Note: ")
) )
def update_parser(self, subparsers, conf): def update_parser(self, subparsers, conf):
@ -139,16 +138,21 @@ class ExtractPlugin(PapersPlugin):
with fitz.Document(filename) as doc: with fitz.Document(filename) as doc:
for page in doc: for page in doc:
for annot in page.annots(): for annot in page.annots():
content = self._retrieve_annotation_content( quote, note = self._retrieve_annotation_content(page, annot)
page, annot, self.highlight_prefix, self.note_prefix
) replacements = [
if content: ("{quote}", quote),
annotations.append(f"[{(page.number or 0) + 1}] {content}") ("{note}", note),
("{page}", str(page.number)),
]
output = self.formatting
for rep in replacements:
output = output.replace(rep[0], rep[1])
annotations.append(output)
return annotations return annotations
def _retrieve_annotation_content( def _retrieve_annotation_content(self, page, annotation):
self, page, annotation, highlight_prefix="> ", note_prefix="Note: "
):
"""Gets the text content of an annotation. """Gets the text content of an annotation.
Returns the actual content of an annotation. Sometimes Returns the actual content of an annotation. Sometimes
@ -162,15 +166,15 @@ class ExtractPlugin(PapersPlugin):
# highlight with selection in note # highlight with selection in note
if Levenshtein.ratio(content, written) > self.minimum_similarity: if Levenshtein.ratio(content, written) > self.minimum_similarity:
return f"{highlight_prefix}{content}" return (content, "")
# an independent note, not a highlight # an independent note, not a highlight
elif content and not written: elif content and not written:
return f"{note_prefix}{content}" return ("", content)
# both a highlight and a note # both a highlight and a note
elif content: elif content:
return f"{highlight_prefix}{written}\n{note_prefix}{content}" return (written, content)
# highlight with selection not in note # highlight with selection not in note
return f"{highlight_prefix}{written}" return (written, "")
def _to_stdout(self, annotated_papers): def _to_stdout(self, annotated_papers):
"""Write annotations to stdout. """Write annotations to stdout.