refactor: Extract exporters to separate module
This commit is contained in:
parent
c8e8453b68
commit
72ddaaf1bc
5 changed files with 213 additions and 157 deletions
|
@ -7,9 +7,11 @@ import papis.notes
|
|||
import papis.strings
|
||||
from papis.document import Document
|
||||
|
||||
from papis_extract import exporter, extraction
|
||||
from papis_extract.extractors import all_extractors
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract import extraction
|
||||
from papis_extract.exporter import Exporter
|
||||
from papis_extract.extractors import all_extractors
|
||||
from papis_extract.exporters import all_exporters
|
||||
from papis_extract.formatter import Formatter, formatters
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
@ -126,24 +128,25 @@ def run(
|
|||
git: bool = False,
|
||||
force: bool = False,
|
||||
) -> None:
|
||||
if write:
|
||||
exporter: Exporter = all_exporters["notes"](
|
||||
formatter=formatter or formatters["markdown"],
|
||||
edit=edit,
|
||||
git=git,
|
||||
force=force,
|
||||
)
|
||||
else:
|
||||
exporter: Exporter = all_exporters["stdout"](
|
||||
formatter=formatter or formatters["markdown"]
|
||||
)
|
||||
|
||||
doc_annots: list[tuple[Document, list[Annotation]]] = []
|
||||
for doc in documents:
|
||||
annotations: list[Annotation] = []
|
||||
for ext in extractors:
|
||||
if not ext:
|
||||
continue
|
||||
annotations.extend(extraction.start(ext, doc))
|
||||
doc_annots.append((doc, annotations))
|
||||
|
||||
annotations: list[Annotation] = extraction.start(ext, doc)
|
||||
if write:
|
||||
exporter.to_notes(
|
||||
formatter=formatter or formatters["markdown-atx"],
|
||||
document=doc,
|
||||
annotations=annotations,
|
||||
edit=edit,
|
||||
git=git,
|
||||
force=force,
|
||||
)
|
||||
else:
|
||||
exporter.to_stdout(
|
||||
formatter=formatter or formatters["markdown"],
|
||||
document=doc,
|
||||
annotations=annotations,
|
||||
)
|
||||
exporter.run(doc_annots)
|
||||
|
|
|
@ -1,146 +1,28 @@
|
|||
import papis.logging
|
||||
import papis.document
|
||||
import papis.notes
|
||||
import papis.commands.edit
|
||||
import papis.api
|
||||
import papis.git
|
||||
import papis.config
|
||||
import Levenshtein
|
||||
from papis_extract.annotation import Annotation
|
||||
from dataclasses import dataclass
|
||||
from typing import Protocol
|
||||
|
||||
import papis.api
|
||||
import papis.commands.edit
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.git
|
||||
import papis.logging
|
||||
import papis.notes
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
|
||||
def to_stdout(
|
||||
formatter: Formatter,
|
||||
document: papis.document.Document,
|
||||
annotations: list[Annotation],
|
||||
) -> None:
|
||||
"""Pretty print annotations to stdout.
|
||||
@dataclass
|
||||
class Exporter(Protocol):
|
||||
formatter: Formatter
|
||||
edit: bool = False
|
||||
git: bool = False
|
||||
force: bool = False
|
||||
|
||||
Gives a nice human-readable representations of
|
||||
the annotations in somewhat of a list form.
|
||||
Not intended for machine-readability.
|
||||
"""
|
||||
output: str = formatter(document, annotations)
|
||||
if output:
|
||||
print("{output}\n".format(output=output.rstrip("\n")))
|
||||
|
||||
|
||||
def to_notes(
|
||||
formatter: Formatter,
|
||||
document: papis.document.Document,
|
||||
annotations: list[Annotation],
|
||||
edit: bool,
|
||||
git: bool,
|
||||
force: bool,
|
||||
) -> None:
|
||||
"""Write annotations into document notes.
|
||||
|
||||
Permanently writes the given annotations into notes
|
||||
belonging to papis documents. Creates new notes for
|
||||
documents missing a note field or appends to existing.
|
||||
"""
|
||||
formatted_annotations = formatter(document, annotations).split("\n")
|
||||
if formatted_annotations:
|
||||
_add_annots_to_note(document, formatted_annotations, force=force)
|
||||
|
||||
if edit:
|
||||
papis.commands.edit.edit_notes(document, git=git)
|
||||
|
||||
|
||||
def _add_annots_to_note(
|
||||
document: papis.document.Document,
|
||||
formatted_annotations: list[str],
|
||||
git: bool = False,
|
||||
force: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Append new annotations to the end of a note.
|
||||
|
||||
This function appends new annotations to the end of a note file. It takes in a
|
||||
document object containing the note, a list of formatted annotations to be
|
||||
added, and optional flags git and force. If git is True, the changes will be
|
||||
committed to git. If force is True, the annotations will be added even if they
|
||||
already exist in the note.
|
||||
|
||||
:param document: The document object representing the note
|
||||
:type document: class:`papis.document.Document`
|
||||
:param formatted_annotations: A list of already formatted annotations to be added
|
||||
:type formatted_annotations: list[str]
|
||||
:param git: Flag indicating whether to commit changes to git, defaults to False.
|
||||
:type git: bool, optional
|
||||
:param force: Flag indicating whether to force adding annotations even if they
|
||||
already exist, defaults to False.
|
||||
:type force: bool, optional
|
||||
"""
|
||||
logger.debug("Adding annotations to note.")
|
||||
notes_path = papis.notes.notes_path_ensured(document)
|
||||
|
||||
existing: list[str] = []
|
||||
with open(notes_path, "r") as file_read:
|
||||
existing = file_read.readlines()
|
||||
|
||||
new_annotations: list[str] = []
|
||||
if not force:
|
||||
new_annotations = _drop_existing_annotations(formatted_annotations, existing)
|
||||
if not new_annotations:
|
||||
return
|
||||
|
||||
with open(notes_path, "a") as f:
|
||||
# add newline if theres no empty space at file end
|
||||
if len(existing) > 0 and existing[-1].strip() != "":
|
||||
f.write("\n")
|
||||
f.write("\n\n".join(new_annotations))
|
||||
f.write("\n")
|
||||
logger.info(
|
||||
f"Wrote {len(new_annotations)} "
|
||||
f"{'line' if len(new_annotations) == 1 else 'lines'} "
|
||||
f"to {papis.document.describe(document)}"
|
||||
)
|
||||
|
||||
if git:
|
||||
msg = "Update notes for '{0}'".format(papis.document.describe(document))
|
||||
folder = document.get_main_folder()
|
||||
if folder:
|
||||
papis.git.add_and_commit_resources(
|
||||
folder, [notes_path, document.get_info_file()], msg
|
||||
)
|
||||
|
||||
|
||||
def _drop_existing_annotations(
|
||||
formatted_annotations: list[str], file_lines: list[str]
|
||||
) -> list[str]:
|
||||
"""Returns the input annotations dropping any existing.
|
||||
|
||||
Takes a list of formatted annotations and a list of strings
|
||||
(most probably existing lines in a file). If anny annotations
|
||||
match an existing line closely enough, they will be dropped.
|
||||
|
||||
Returns list of annotations without duplicates.
|
||||
"""
|
||||
minimum_similarity = (
|
||||
papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0
|
||||
)
|
||||
|
||||
remaining: list[str] = []
|
||||
for an in formatted_annotations:
|
||||
an_split = an.splitlines()
|
||||
if an_split and not _test_similarity(
|
||||
an_split[0], file_lines, minimum_similarity
|
||||
):
|
||||
remaining.append(an)
|
||||
|
||||
return remaining
|
||||
|
||||
|
||||
def _test_similarity(
|
||||
string: str, lines: list[str], minimum_similarity: float = 1.0
|
||||
) -> bool:
|
||||
for line in lines:
|
||||
ratio = Levenshtein.ratio(string, line)
|
||||
if ratio > minimum_similarity:
|
||||
return True
|
||||
return False
|
||||
def run(
|
||||
self, annot_docs: list[tuple[papis.document.Document, list[Annotation]]]
|
||||
) -> None:
|
||||
...
|
||||
|
|
14
papis_extract/exporters/__init__.py
Normal file
14
papis_extract/exporters/__init__.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
|
||||
import papis.logging
|
||||
|
||||
from papis_extract.exporter import Exporter
|
||||
from papis_extract.exporters.notes import NotesExporter
|
||||
from papis_extract.exporters.stdout import StdoutExporter
|
||||
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
all_exporters: dict[str, type[Exporter]] = {}
|
||||
|
||||
all_exporters["stdout"] = StdoutExporter
|
||||
all_exporters["notes"] = NotesExporter
|
131
papis_extract/exporters/notes.py
Normal file
131
papis_extract/exporters/notes.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
from dataclasses import dataclass
|
||||
import Levenshtein
|
||||
from papis.document import Document
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
from papis.logging import get_logger
|
||||
import papis.notes
|
||||
import papis.document
|
||||
import papis.git
|
||||
import papis.config
|
||||
import papis.commands.edit
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotesExporter:
|
||||
formatter: Formatter
|
||||
edit: bool = False
|
||||
git: bool = False
|
||||
force: bool = False
|
||||
|
||||
def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None:
|
||||
"""Write annotations into document notes.
|
||||
|
||||
Permanently writes the given annotations into notes
|
||||
belonging to papis documents. Creates new notes for
|
||||
documents missing a note field or appends to existing.
|
||||
"""
|
||||
for doc, annots in annot_docs:
|
||||
formatted_annotations = self.formatter(doc, annots).split("\n")
|
||||
if formatted_annotations:
|
||||
self._add_annots_to_note(doc, formatted_annotations, force=self.force)
|
||||
|
||||
if self.edit:
|
||||
papis.commands.edit.edit_notes(doc, git=self.git)
|
||||
|
||||
def _add_annots_to_note(
|
||||
self,
|
||||
document: Document,
|
||||
formatted_annotations: list[str],
|
||||
git: bool = False,
|
||||
force: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Append new annotations to the end of a note.
|
||||
|
||||
This function appends new annotations to the end of a note file. It takes in a
|
||||
document object containing the note, a list of formatted annotations to be
|
||||
added, and optional flags git and force. If git is True, the changes will be
|
||||
committed to git. If force is True, the annotations will be added even if they
|
||||
already exist in the note.
|
||||
|
||||
:param document: The document object representing the note
|
||||
:type document: class:`papis.document.Document`
|
||||
:param formatted_annotations: A list of already formatted annotations to be added
|
||||
:type formatted_annotations: list[str]
|
||||
:param git: Flag indicating whether to commit changes to git, defaults to False.
|
||||
:type git: bool, optional
|
||||
:param force: Flag indicating whether to force adding annotations even if they
|
||||
already exist, defaults to False.
|
||||
:type force: bool, optional
|
||||
"""
|
||||
logger.debug("Adding annotations to note.")
|
||||
notes_path = papis.notes.notes_path_ensured(document)
|
||||
|
||||
existing: list[str] = []
|
||||
with open(notes_path, "r") as file_read:
|
||||
existing = file_read.readlines()
|
||||
|
||||
new_annotations: list[str] = []
|
||||
if not force:
|
||||
new_annotations = self._drop_existing_annotations(
|
||||
formatted_annotations, existing
|
||||
)
|
||||
if not new_annotations:
|
||||
return
|
||||
|
||||
with open(notes_path, "a") as f:
|
||||
# add newline if theres no empty space at file end
|
||||
if len(existing) > 0 and existing[-1].strip() != "":
|
||||
f.write("\n")
|
||||
f.write("\n\n".join(new_annotations))
|
||||
f.write("\n")
|
||||
logger.info(
|
||||
f"Wrote {len(new_annotations)} "
|
||||
f"{'line' if len(new_annotations) == 1 else 'lines'} "
|
||||
f"to {papis.document.describe(document)}"
|
||||
)
|
||||
|
||||
if git:
|
||||
msg = "Update notes for '{0}'".format(papis.document.describe(document))
|
||||
folder = document.get_main_folder()
|
||||
if folder:
|
||||
papis.git.add_and_commit_resources(
|
||||
folder, [notes_path, document.get_info_file()], msg
|
||||
)
|
||||
|
||||
def _drop_existing_annotations(
|
||||
self, formatted_annotations: list[str], file_lines: list[str]
|
||||
) -> list[str]:
|
||||
"""Returns the input annotations dropping any existing.
|
||||
|
||||
Takes a list of formatted annotations and a list of strings
|
||||
(most probably existing lines in a file). If anny annotations
|
||||
match an existing line closely enough, they will be dropped.
|
||||
|
||||
Returns list of annotations without duplicates.
|
||||
"""
|
||||
minimum_similarity = (
|
||||
papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0
|
||||
)
|
||||
|
||||
remaining: list[str] = []
|
||||
for an in formatted_annotations:
|
||||
an_split = an.splitlines()
|
||||
if an_split and not self._test_similarity(
|
||||
an_split[0], file_lines, minimum_similarity
|
||||
):
|
||||
remaining.append(an)
|
||||
|
||||
return remaining
|
||||
|
||||
def _test_similarity(
|
||||
self, string: str, lines: list[str], minimum_similarity: float = 1.0
|
||||
) -> bool:
|
||||
for line in lines:
|
||||
ratio = Levenshtein.ratio(string, line)
|
||||
if ratio > minimum_similarity:
|
||||
return True
|
||||
return False
|
26
papis_extract/exporters/stdout.py
Normal file
26
papis_extract/exporters/stdout.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
from papis.document import Document
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
|
||||
|
||||
@dataclass
|
||||
class StdoutExporter:
|
||||
formatter: Formatter
|
||||
edit: bool = False
|
||||
git: bool = False
|
||||
force: bool = False
|
||||
|
||||
def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None:
|
||||
"""Pretty print annotations to stdout.
|
||||
|
||||
Gives a nice human-readable representations of
|
||||
the annotations in somewhat of a list form.
|
||||
Not intended for machine-readability.
|
||||
"""
|
||||
for doc, annots in annot_docs:
|
||||
output: str = self.formatter(doc, annots)
|
||||
if output:
|
||||
print("{output}\n".format(output=output.rstrip("\n")))
|
Loading…
Reference in a new issue