diff --git a/papis_extract/__init__.py b/papis_extract/__init__.py index 986ed99..d0a6e45 100644 --- a/papis_extract/__init__.py +++ b/papis_extract/__init__.py @@ -7,9 +7,11 @@ import papis.notes import papis.strings from papis.document import Document -from papis_extract import exporter, extraction -from papis_extract.extractors import all_extractors from papis_extract.annotation import Annotation +from papis_extract import extraction +from papis_extract.exporter import Exporter +from papis_extract.extractors import all_extractors +from papis_extract.exporters import all_exporters from papis_extract.formatter import Formatter, formatters logger = papis.logging.get_logger(__name__) @@ -126,24 +128,25 @@ def run( git: bool = False, force: bool = False, ) -> None: + if write: + exporter: Exporter = all_exporters["notes"]( + formatter=formatter or formatters["markdown"], + edit=edit, + git=git, + force=force, + ) + else: + exporter: Exporter = all_exporters["stdout"]( + formatter=formatter or formatters["markdown"] + ) + + doc_annots: list[tuple[Document, list[Annotation]]] = [] for doc in documents: + annotations: list[Annotation] = [] for ext in extractors: if not ext: continue + annotations.extend(extraction.start(ext, doc)) + doc_annots.append((doc, annotations)) - annotations: list[Annotation] = extraction.start(ext, doc) - if write: - exporter.to_notes( - formatter=formatter or formatters["markdown-atx"], - document=doc, - annotations=annotations, - edit=edit, - git=git, - force=force, - ) - else: - exporter.to_stdout( - formatter=formatter or formatters["markdown"], - document=doc, - annotations=annotations, - ) + exporter.run(doc_annots) diff --git a/papis_extract/exporter.py b/papis_extract/exporter.py index 144042c..de1fe6c 100644 --- a/papis_extract/exporter.py +++ b/papis_extract/exporter.py @@ -1,146 +1,28 @@ -import papis.logging -import papis.document -import papis.notes -import papis.commands.edit -import papis.api -import papis.git -import papis.config -import Levenshtein -from papis_extract.annotation import Annotation +from dataclasses import dataclass +from typing import Protocol +import papis.api +import papis.commands.edit +import papis.config +import papis.document +import papis.git +import papis.logging +import papis.notes + +from papis_extract.annotation import Annotation from papis_extract.formatter import Formatter logger = papis.logging.get_logger(__name__) -def to_stdout( - formatter: Formatter, - document: papis.document.Document, - annotations: list[Annotation], -) -> None: - """Pretty print annotations to stdout. +@dataclass +class Exporter(Protocol): + formatter: Formatter + edit: bool = False + git: bool = False + force: bool = False - Gives a nice human-readable representations of - the annotations in somewhat of a list form. - Not intended for machine-readability. - """ - output: str = formatter(document, annotations) - if output: - print("{output}\n".format(output=output.rstrip("\n"))) - - -def to_notes( - formatter: Formatter, - document: papis.document.Document, - annotations: list[Annotation], - edit: bool, - git: bool, - force: bool, -) -> None: - """Write annotations into document notes. - - Permanently writes the given annotations into notes - belonging to papis documents. Creates new notes for - documents missing a note field or appends to existing. - """ - formatted_annotations = formatter(document, annotations).split("\n") - if formatted_annotations: - _add_annots_to_note(document, formatted_annotations, force=force) - - if edit: - papis.commands.edit.edit_notes(document, git=git) - - -def _add_annots_to_note( - document: papis.document.Document, - formatted_annotations: list[str], - git: bool = False, - force: bool = False, -) -> None: - """ - Append new annotations to the end of a note. - - This function appends new annotations to the end of a note file. It takes in a - document object containing the note, a list of formatted annotations to be - added, and optional flags git and force. If git is True, the changes will be - committed to git. If force is True, the annotations will be added even if they - already exist in the note. - - :param document: The document object representing the note - :type document: class:`papis.document.Document` - :param formatted_annotations: A list of already formatted annotations to be added - :type formatted_annotations: list[str] - :param git: Flag indicating whether to commit changes to git, defaults to False. - :type git: bool, optional - :param force: Flag indicating whether to force adding annotations even if they - already exist, defaults to False. - :type force: bool, optional - """ - logger.debug("Adding annotations to note.") - notes_path = papis.notes.notes_path_ensured(document) - - existing: list[str] = [] - with open(notes_path, "r") as file_read: - existing = file_read.readlines() - - new_annotations: list[str] = [] - if not force: - new_annotations = _drop_existing_annotations(formatted_annotations, existing) - if not new_annotations: - return - - with open(notes_path, "a") as f: - # add newline if theres no empty space at file end - if len(existing) > 0 and existing[-1].strip() != "": - f.write("\n") - f.write("\n\n".join(new_annotations)) - f.write("\n") - logger.info( - f"Wrote {len(new_annotations)} " - f"{'line' if len(new_annotations) == 1 else 'lines'} " - f"to {papis.document.describe(document)}" - ) - - if git: - msg = "Update notes for '{0}'".format(papis.document.describe(document)) - folder = document.get_main_folder() - if folder: - papis.git.add_and_commit_resources( - folder, [notes_path, document.get_info_file()], msg - ) - - -def _drop_existing_annotations( - formatted_annotations: list[str], file_lines: list[str] -) -> list[str]: - """Returns the input annotations dropping any existing. - - Takes a list of formatted annotations and a list of strings - (most probably existing lines in a file). If anny annotations - match an existing line closely enough, they will be dropped. - - Returns list of annotations without duplicates. - """ - minimum_similarity = ( - papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0 - ) - - remaining: list[str] = [] - for an in formatted_annotations: - an_split = an.splitlines() - if an_split and not _test_similarity( - an_split[0], file_lines, minimum_similarity - ): - remaining.append(an) - - return remaining - - -def _test_similarity( - string: str, lines: list[str], minimum_similarity: float = 1.0 -) -> bool: - for line in lines: - ratio = Levenshtein.ratio(string, line) - if ratio > minimum_similarity: - return True - return False + def run( + self, annot_docs: list[tuple[papis.document.Document, list[Annotation]]] + ) -> None: + ... diff --git a/papis_extract/exporters/__init__.py b/papis_extract/exporters/__init__.py new file mode 100644 index 0000000..6b5208b --- /dev/null +++ b/papis_extract/exporters/__init__.py @@ -0,0 +1,14 @@ + +import papis.logging + +from papis_extract.exporter import Exporter +from papis_extract.exporters.notes import NotesExporter +from papis_extract.exporters.stdout import StdoutExporter + + +logger = papis.logging.get_logger(__name__) + +all_exporters: dict[str, type[Exporter]] = {} + +all_exporters["stdout"] = StdoutExporter +all_exporters["notes"] = NotesExporter diff --git a/papis_extract/exporters/notes.py b/papis_extract/exporters/notes.py new file mode 100644 index 0000000..a218db7 --- /dev/null +++ b/papis_extract/exporters/notes.py @@ -0,0 +1,131 @@ +from dataclasses import dataclass +import Levenshtein +from papis.document import Document +from papis_extract.annotation import Annotation +from papis_extract.formatter import Formatter +from papis.logging import get_logger +import papis.notes +import papis.document +import papis.git +import papis.config +import papis.commands.edit + +logger = get_logger(__name__) + + +@dataclass +class NotesExporter: + formatter: Formatter + edit: bool = False + git: bool = False + force: bool = False + + def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None: + """Write annotations into document notes. + + Permanently writes the given annotations into notes + belonging to papis documents. Creates new notes for + documents missing a note field or appends to existing. + """ + for doc, annots in annot_docs: + formatted_annotations = self.formatter(doc, annots).split("\n") + if formatted_annotations: + self._add_annots_to_note(doc, formatted_annotations, force=self.force) + + if self.edit: + papis.commands.edit.edit_notes(doc, git=self.git) + + def _add_annots_to_note( + self, + document: Document, + formatted_annotations: list[str], + git: bool = False, + force: bool = False, + ) -> None: + """ + Append new annotations to the end of a note. + + This function appends new annotations to the end of a note file. It takes in a + document object containing the note, a list of formatted annotations to be + added, and optional flags git and force. If git is True, the changes will be + committed to git. If force is True, the annotations will be added even if they + already exist in the note. + + :param document: The document object representing the note + :type document: class:`papis.document.Document` + :param formatted_annotations: A list of already formatted annotations to be added + :type formatted_annotations: list[str] + :param git: Flag indicating whether to commit changes to git, defaults to False. + :type git: bool, optional + :param force: Flag indicating whether to force adding annotations even if they + already exist, defaults to False. + :type force: bool, optional + """ + logger.debug("Adding annotations to note.") + notes_path = papis.notes.notes_path_ensured(document) + + existing: list[str] = [] + with open(notes_path, "r") as file_read: + existing = file_read.readlines() + + new_annotations: list[str] = [] + if not force: + new_annotations = self._drop_existing_annotations( + formatted_annotations, existing + ) + if not new_annotations: + return + + with open(notes_path, "a") as f: + # add newline if theres no empty space at file end + if len(existing) > 0 and existing[-1].strip() != "": + f.write("\n") + f.write("\n\n".join(new_annotations)) + f.write("\n") + logger.info( + f"Wrote {len(new_annotations)} " + f"{'line' if len(new_annotations) == 1 else 'lines'} " + f"to {papis.document.describe(document)}" + ) + + if git: + msg = "Update notes for '{0}'".format(papis.document.describe(document)) + folder = document.get_main_folder() + if folder: + papis.git.add_and_commit_resources( + folder, [notes_path, document.get_info_file()], msg + ) + + def _drop_existing_annotations( + self, formatted_annotations: list[str], file_lines: list[str] + ) -> list[str]: + """Returns the input annotations dropping any existing. + + Takes a list of formatted annotations and a list of strings + (most probably existing lines in a file). If anny annotations + match an existing line closely enough, they will be dropped. + + Returns list of annotations without duplicates. + """ + minimum_similarity = ( + papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0 + ) + + remaining: list[str] = [] + for an in formatted_annotations: + an_split = an.splitlines() + if an_split and not self._test_similarity( + an_split[0], file_lines, minimum_similarity + ): + remaining.append(an) + + return remaining + + def _test_similarity( + self, string: str, lines: list[str], minimum_similarity: float = 1.0 + ) -> bool: + for line in lines: + ratio = Levenshtein.ratio(string, line) + if ratio > minimum_similarity: + return True + return False diff --git a/papis_extract/exporters/stdout.py b/papis_extract/exporters/stdout.py new file mode 100644 index 0000000..ed1cabf --- /dev/null +++ b/papis_extract/exporters/stdout.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass + +from papis.document import Document + +from papis_extract.annotation import Annotation +from papis_extract.formatter import Formatter + + +@dataclass +class StdoutExporter: + formatter: Formatter + edit: bool = False + git: bool = False + force: bool = False + + def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None: + """Pretty print annotations to stdout. + + Gives a nice human-readable representations of + the annotations in somewhat of a list form. + Not intended for machine-readability. + """ + for doc, annots in annot_docs: + output: str = self.formatter(doc, annots) + if output: + print("{output}\n".format(output=output.rstrip("\n")))