refactor: Extract exporters to separate module
This commit is contained in:
parent
c8e8453b68
commit
72ddaaf1bc
5 changed files with 213 additions and 157 deletions
|
@ -7,9 +7,11 @@ import papis.notes
|
||||||
import papis.strings
|
import papis.strings
|
||||||
from papis.document import Document
|
from papis.document import Document
|
||||||
|
|
||||||
from papis_extract import exporter, extraction
|
|
||||||
from papis_extract.extractors import all_extractors
|
|
||||||
from papis_extract.annotation import Annotation
|
from papis_extract.annotation import Annotation
|
||||||
|
from papis_extract import extraction
|
||||||
|
from papis_extract.exporter import Exporter
|
||||||
|
from papis_extract.extractors import all_extractors
|
||||||
|
from papis_extract.exporters import all_exporters
|
||||||
from papis_extract.formatter import Formatter, formatters
|
from papis_extract.formatter import Formatter, formatters
|
||||||
|
|
||||||
logger = papis.logging.get_logger(__name__)
|
logger = papis.logging.get_logger(__name__)
|
||||||
|
@ -126,24 +128,25 @@ def run(
|
||||||
git: bool = False,
|
git: bool = False,
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
for doc in documents:
|
|
||||||
for ext in extractors:
|
|
||||||
if not ext:
|
|
||||||
continue
|
|
||||||
|
|
||||||
annotations: list[Annotation] = extraction.start(ext, doc)
|
|
||||||
if write:
|
if write:
|
||||||
exporter.to_notes(
|
exporter: Exporter = all_exporters["notes"](
|
||||||
formatter=formatter or formatters["markdown-atx"],
|
formatter=formatter or formatters["markdown"],
|
||||||
document=doc,
|
|
||||||
annotations=annotations,
|
|
||||||
edit=edit,
|
edit=edit,
|
||||||
git=git,
|
git=git,
|
||||||
force=force,
|
force=force,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
exporter.to_stdout(
|
exporter: Exporter = all_exporters["stdout"](
|
||||||
formatter=formatter or formatters["markdown"],
|
formatter=formatter or formatters["markdown"]
|
||||||
document=doc,
|
|
||||||
annotations=annotations,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
doc_annots: list[tuple[Document, list[Annotation]]] = []
|
||||||
|
for doc in documents:
|
||||||
|
annotations: list[Annotation] = []
|
||||||
|
for ext in extractors:
|
||||||
|
if not ext:
|
||||||
|
continue
|
||||||
|
annotations.extend(extraction.start(ext, doc))
|
||||||
|
doc_annots.append((doc, annotations))
|
||||||
|
|
||||||
|
exporter.run(doc_annots)
|
||||||
|
|
|
@ -1,146 +1,28 @@
|
||||||
import papis.logging
|
from dataclasses import dataclass
|
||||||
import papis.document
|
from typing import Protocol
|
||||||
import papis.notes
|
|
||||||
import papis.commands.edit
|
|
||||||
import papis.api
|
|
||||||
import papis.git
|
|
||||||
import papis.config
|
|
||||||
import Levenshtein
|
|
||||||
from papis_extract.annotation import Annotation
|
|
||||||
|
|
||||||
|
import papis.api
|
||||||
|
import papis.commands.edit
|
||||||
|
import papis.config
|
||||||
|
import papis.document
|
||||||
|
import papis.git
|
||||||
|
import papis.logging
|
||||||
|
import papis.notes
|
||||||
|
|
||||||
|
from papis_extract.annotation import Annotation
|
||||||
from papis_extract.formatter import Formatter
|
from papis_extract.formatter import Formatter
|
||||||
|
|
||||||
logger = papis.logging.get_logger(__name__)
|
logger = papis.logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def to_stdout(
|
@dataclass
|
||||||
formatter: Formatter,
|
class Exporter(Protocol):
|
||||||
document: papis.document.Document,
|
formatter: Formatter
|
||||||
annotations: list[Annotation],
|
edit: bool = False
|
||||||
) -> None:
|
git: bool = False
|
||||||
"""Pretty print annotations to stdout.
|
force: bool = False
|
||||||
|
|
||||||
Gives a nice human-readable representations of
|
def run(
|
||||||
the annotations in somewhat of a list form.
|
self, annot_docs: list[tuple[papis.document.Document, list[Annotation]]]
|
||||||
Not intended for machine-readability.
|
) -> None:
|
||||||
"""
|
...
|
||||||
output: str = formatter(document, annotations)
|
|
||||||
if output:
|
|
||||||
print("{output}\n".format(output=output.rstrip("\n")))
|
|
||||||
|
|
||||||
|
|
||||||
def to_notes(
|
|
||||||
formatter: Formatter,
|
|
||||||
document: papis.document.Document,
|
|
||||||
annotations: list[Annotation],
|
|
||||||
edit: bool,
|
|
||||||
git: bool,
|
|
||||||
force: bool,
|
|
||||||
) -> None:
|
|
||||||
"""Write annotations into document notes.
|
|
||||||
|
|
||||||
Permanently writes the given annotations into notes
|
|
||||||
belonging to papis documents. Creates new notes for
|
|
||||||
documents missing a note field or appends to existing.
|
|
||||||
"""
|
|
||||||
formatted_annotations = formatter(document, annotations).split("\n")
|
|
||||||
if formatted_annotations:
|
|
||||||
_add_annots_to_note(document, formatted_annotations, force=force)
|
|
||||||
|
|
||||||
if edit:
|
|
||||||
papis.commands.edit.edit_notes(document, git=git)
|
|
||||||
|
|
||||||
|
|
||||||
def _add_annots_to_note(
|
|
||||||
document: papis.document.Document,
|
|
||||||
formatted_annotations: list[str],
|
|
||||||
git: bool = False,
|
|
||||||
force: bool = False,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Append new annotations to the end of a note.
|
|
||||||
|
|
||||||
This function appends new annotations to the end of a note file. It takes in a
|
|
||||||
document object containing the note, a list of formatted annotations to be
|
|
||||||
added, and optional flags git and force. If git is True, the changes will be
|
|
||||||
committed to git. If force is True, the annotations will be added even if they
|
|
||||||
already exist in the note.
|
|
||||||
|
|
||||||
:param document: The document object representing the note
|
|
||||||
:type document: class:`papis.document.Document`
|
|
||||||
:param formatted_annotations: A list of already formatted annotations to be added
|
|
||||||
:type formatted_annotations: list[str]
|
|
||||||
:param git: Flag indicating whether to commit changes to git, defaults to False.
|
|
||||||
:type git: bool, optional
|
|
||||||
:param force: Flag indicating whether to force adding annotations even if they
|
|
||||||
already exist, defaults to False.
|
|
||||||
:type force: bool, optional
|
|
||||||
"""
|
|
||||||
logger.debug("Adding annotations to note.")
|
|
||||||
notes_path = papis.notes.notes_path_ensured(document)
|
|
||||||
|
|
||||||
existing: list[str] = []
|
|
||||||
with open(notes_path, "r") as file_read:
|
|
||||||
existing = file_read.readlines()
|
|
||||||
|
|
||||||
new_annotations: list[str] = []
|
|
||||||
if not force:
|
|
||||||
new_annotations = _drop_existing_annotations(formatted_annotations, existing)
|
|
||||||
if not new_annotations:
|
|
||||||
return
|
|
||||||
|
|
||||||
with open(notes_path, "a") as f:
|
|
||||||
# add newline if theres no empty space at file end
|
|
||||||
if len(existing) > 0 and existing[-1].strip() != "":
|
|
||||||
f.write("\n")
|
|
||||||
f.write("\n\n".join(new_annotations))
|
|
||||||
f.write("\n")
|
|
||||||
logger.info(
|
|
||||||
f"Wrote {len(new_annotations)} "
|
|
||||||
f"{'line' if len(new_annotations) == 1 else 'lines'} "
|
|
||||||
f"to {papis.document.describe(document)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if git:
|
|
||||||
msg = "Update notes for '{0}'".format(papis.document.describe(document))
|
|
||||||
folder = document.get_main_folder()
|
|
||||||
if folder:
|
|
||||||
papis.git.add_and_commit_resources(
|
|
||||||
folder, [notes_path, document.get_info_file()], msg
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _drop_existing_annotations(
|
|
||||||
formatted_annotations: list[str], file_lines: list[str]
|
|
||||||
) -> list[str]:
|
|
||||||
"""Returns the input annotations dropping any existing.
|
|
||||||
|
|
||||||
Takes a list of formatted annotations and a list of strings
|
|
||||||
(most probably existing lines in a file). If anny annotations
|
|
||||||
match an existing line closely enough, they will be dropped.
|
|
||||||
|
|
||||||
Returns list of annotations without duplicates.
|
|
||||||
"""
|
|
||||||
minimum_similarity = (
|
|
||||||
papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0
|
|
||||||
)
|
|
||||||
|
|
||||||
remaining: list[str] = []
|
|
||||||
for an in formatted_annotations:
|
|
||||||
an_split = an.splitlines()
|
|
||||||
if an_split and not _test_similarity(
|
|
||||||
an_split[0], file_lines, minimum_similarity
|
|
||||||
):
|
|
||||||
remaining.append(an)
|
|
||||||
|
|
||||||
return remaining
|
|
||||||
|
|
||||||
|
|
||||||
def _test_similarity(
|
|
||||||
string: str, lines: list[str], minimum_similarity: float = 1.0
|
|
||||||
) -> bool:
|
|
||||||
for line in lines:
|
|
||||||
ratio = Levenshtein.ratio(string, line)
|
|
||||||
if ratio > minimum_similarity:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
14
papis_extract/exporters/__init__.py
Normal file
14
papis_extract/exporters/__init__.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
import papis.logging
|
||||||
|
|
||||||
|
from papis_extract.exporter import Exporter
|
||||||
|
from papis_extract.exporters.notes import NotesExporter
|
||||||
|
from papis_extract.exporters.stdout import StdoutExporter
|
||||||
|
|
||||||
|
|
||||||
|
logger = papis.logging.get_logger(__name__)
|
||||||
|
|
||||||
|
all_exporters: dict[str, type[Exporter]] = {}
|
||||||
|
|
||||||
|
all_exporters["stdout"] = StdoutExporter
|
||||||
|
all_exporters["notes"] = NotesExporter
|
131
papis_extract/exporters/notes.py
Normal file
131
papis_extract/exporters/notes.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import Levenshtein
|
||||||
|
from papis.document import Document
|
||||||
|
from papis_extract.annotation import Annotation
|
||||||
|
from papis_extract.formatter import Formatter
|
||||||
|
from papis.logging import get_logger
|
||||||
|
import papis.notes
|
||||||
|
import papis.document
|
||||||
|
import papis.git
|
||||||
|
import papis.config
|
||||||
|
import papis.commands.edit
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class NotesExporter:
|
||||||
|
formatter: Formatter
|
||||||
|
edit: bool = False
|
||||||
|
git: bool = False
|
||||||
|
force: bool = False
|
||||||
|
|
||||||
|
def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None:
|
||||||
|
"""Write annotations into document notes.
|
||||||
|
|
||||||
|
Permanently writes the given annotations into notes
|
||||||
|
belonging to papis documents. Creates new notes for
|
||||||
|
documents missing a note field or appends to existing.
|
||||||
|
"""
|
||||||
|
for doc, annots in annot_docs:
|
||||||
|
formatted_annotations = self.formatter(doc, annots).split("\n")
|
||||||
|
if formatted_annotations:
|
||||||
|
self._add_annots_to_note(doc, formatted_annotations, force=self.force)
|
||||||
|
|
||||||
|
if self.edit:
|
||||||
|
papis.commands.edit.edit_notes(doc, git=self.git)
|
||||||
|
|
||||||
|
def _add_annots_to_note(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
formatted_annotations: list[str],
|
||||||
|
git: bool = False,
|
||||||
|
force: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Append new annotations to the end of a note.
|
||||||
|
|
||||||
|
This function appends new annotations to the end of a note file. It takes in a
|
||||||
|
document object containing the note, a list of formatted annotations to be
|
||||||
|
added, and optional flags git and force. If git is True, the changes will be
|
||||||
|
committed to git. If force is True, the annotations will be added even if they
|
||||||
|
already exist in the note.
|
||||||
|
|
||||||
|
:param document: The document object representing the note
|
||||||
|
:type document: class:`papis.document.Document`
|
||||||
|
:param formatted_annotations: A list of already formatted annotations to be added
|
||||||
|
:type formatted_annotations: list[str]
|
||||||
|
:param git: Flag indicating whether to commit changes to git, defaults to False.
|
||||||
|
:type git: bool, optional
|
||||||
|
:param force: Flag indicating whether to force adding annotations even if they
|
||||||
|
already exist, defaults to False.
|
||||||
|
:type force: bool, optional
|
||||||
|
"""
|
||||||
|
logger.debug("Adding annotations to note.")
|
||||||
|
notes_path = papis.notes.notes_path_ensured(document)
|
||||||
|
|
||||||
|
existing: list[str] = []
|
||||||
|
with open(notes_path, "r") as file_read:
|
||||||
|
existing = file_read.readlines()
|
||||||
|
|
||||||
|
new_annotations: list[str] = []
|
||||||
|
if not force:
|
||||||
|
new_annotations = self._drop_existing_annotations(
|
||||||
|
formatted_annotations, existing
|
||||||
|
)
|
||||||
|
if not new_annotations:
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(notes_path, "a") as f:
|
||||||
|
# add newline if theres no empty space at file end
|
||||||
|
if len(existing) > 0 and existing[-1].strip() != "":
|
||||||
|
f.write("\n")
|
||||||
|
f.write("\n\n".join(new_annotations))
|
||||||
|
f.write("\n")
|
||||||
|
logger.info(
|
||||||
|
f"Wrote {len(new_annotations)} "
|
||||||
|
f"{'line' if len(new_annotations) == 1 else 'lines'} "
|
||||||
|
f"to {papis.document.describe(document)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if git:
|
||||||
|
msg = "Update notes for '{0}'".format(papis.document.describe(document))
|
||||||
|
folder = document.get_main_folder()
|
||||||
|
if folder:
|
||||||
|
papis.git.add_and_commit_resources(
|
||||||
|
folder, [notes_path, document.get_info_file()], msg
|
||||||
|
)
|
||||||
|
|
||||||
|
def _drop_existing_annotations(
|
||||||
|
self, formatted_annotations: list[str], file_lines: list[str]
|
||||||
|
) -> list[str]:
|
||||||
|
"""Returns the input annotations dropping any existing.
|
||||||
|
|
||||||
|
Takes a list of formatted annotations and a list of strings
|
||||||
|
(most probably existing lines in a file). If anny annotations
|
||||||
|
match an existing line closely enough, they will be dropped.
|
||||||
|
|
||||||
|
Returns list of annotations without duplicates.
|
||||||
|
"""
|
||||||
|
minimum_similarity = (
|
||||||
|
papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
remaining: list[str] = []
|
||||||
|
for an in formatted_annotations:
|
||||||
|
an_split = an.splitlines()
|
||||||
|
if an_split and not self._test_similarity(
|
||||||
|
an_split[0], file_lines, minimum_similarity
|
||||||
|
):
|
||||||
|
remaining.append(an)
|
||||||
|
|
||||||
|
return remaining
|
||||||
|
|
||||||
|
def _test_similarity(
|
||||||
|
self, string: str, lines: list[str], minimum_similarity: float = 1.0
|
||||||
|
) -> bool:
|
||||||
|
for line in lines:
|
||||||
|
ratio = Levenshtein.ratio(string, line)
|
||||||
|
if ratio > minimum_similarity:
|
||||||
|
return True
|
||||||
|
return False
|
26
papis_extract/exporters/stdout.py
Normal file
26
papis_extract/exporters/stdout.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from papis.document import Document
|
||||||
|
|
||||||
|
from papis_extract.annotation import Annotation
|
||||||
|
from papis_extract.formatter import Formatter
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StdoutExporter:
|
||||||
|
formatter: Formatter
|
||||||
|
edit: bool = False
|
||||||
|
git: bool = False
|
||||||
|
force: bool = False
|
||||||
|
|
||||||
|
def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None:
|
||||||
|
"""Pretty print annotations to stdout.
|
||||||
|
|
||||||
|
Gives a nice human-readable representations of
|
||||||
|
the annotations in somewhat of a list form.
|
||||||
|
Not intended for machine-readability.
|
||||||
|
"""
|
||||||
|
for doc, annots in annot_docs:
|
||||||
|
output: str = self.formatter(doc, annots)
|
||||||
|
if output:
|
||||||
|
print("{output}\n".format(output=output.rstrip("\n")))
|
Loading…
Reference in a new issue