papis-extract/papis_extract/exporter.py
Marty Oehme 5a6d672c76
refactor: Move formatting logic to formatters
Formatters (previously templates) were pure data containers before,
continating the 'template' for how things should be formatted using
mustache. The formatting would be done a) in the exporters and b) in the
annotations.

This spread of formatting has now been consolidated into the Formatter,
which fixes the overall spread of formatting code and now can coherently
format a whole output instead of just individual annotations.

A formatter contains references to all documents and contained
annotations and will format everything at once by default, but the
formatting function can be invoked with reference to a specific
annotated document to only format that.

This commit should put more separation into the concerns of exporter and
formatter and made formatting a concern purely of the formatters and
annotation objects.
2023-09-20 09:14:58 +02:00

121 lines
3.6 KiB
Python

import papis.logging
import papis.document
import papis.notes
import papis.commands.edit
import papis.api
import papis.git
import papis.config
import Levenshtein
from papis_extract.formatter import Formatter
logger = papis.logging.get_logger(__name__)
def to_stdout(template: Formatter) -> None:
"""Pretty print annotations to stdout.
Gives a nice human-readable representations of
the annotations in somewhat of a list form.
Not intended for machine-readability.
"""
output:str = template.execute()
print(output.rstrip('\n'))
def to_notes(template: Formatter, edit: bool, git: bool) -> None:
"""Write annotations into document notes.
Permanently writes the given annotations into notes
belonging to papis documents. Creates new notes for
documents missing a note field or appends to existing.
"""
annotated_docs = template.annotated_docs
for entry in annotated_docs:
formatted_annotations = template.execute(entry).split("\n")
if formatted_annotations:
_add_annots_to_note(entry.document, formatted_annotations)
if edit:
papis.commands.edit.edit_notes(entry.document, git=git)
def _add_annots_to_note(
document: papis.document.Document,
formatted_annotations: list[str],
git: bool = False,
) -> None:
"""Append new annotations to the end of a note.
Looks through note to determine any new annotations which should be
added and adds them to the end of the note file.
"""
logger.debug("Adding annotations to note.")
notes_path = papis.notes.notes_path_ensured(document)
existing: list[str] = []
with open(notes_path, "r") as file_read:
existing = file_read.readlines()
new_annotations: list[str] = _drop_existing_annotations(
formatted_annotations, existing
)
if not new_annotations:
return
with open(notes_path, "a") as f:
# add newline if theres no empty space at file end
if len(existing) > 0 and existing[-1].strip() != "":
f.write("\n")
f.write("\n".join(new_annotations))
f.write("\n")
logger.info(
f"Wrote {len(new_annotations)} "
f"{'annotation' if len(new_annotations) == 1 else 'annotations'} "
f"to {papis.document.describe(document)}"
)
if git:
msg = "Update notes for '{0}'".format(papis.document.describe(document))
folder = document.get_main_folder()
if folder:
papis.git.add_and_commit_resources(
folder, [notes_path, document.get_info_file()], msg
)
def _drop_existing_annotations(
formatted_annotations: list[str], file_lines: list[str]
) -> list[str]:
"""Returns the input annotations dropping any existing.
Takes a list of formatted annotations and a list of strings
(most probably existing lines in a file). If anny annotations
match an existing line closely enough, they will be dropped.
Returns list of annotations without duplicates.
"""
minimum_similarity = (
papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0
)
remaining: list[str] = []
for an in formatted_annotations:
an_split = an.splitlines()
if an_split and not _test_similarity(
an_split[0], file_lines, minimum_similarity
):
remaining.append(an)
return remaining
def _test_similarity(
string: str, lines: list[str], minimum_similarity: float = 1.0
) -> bool:
for line in lines:
ratio = Levenshtein.ratio(string, line)
if ratio > minimum_similarity:
return True
return False