diff --git a/papis_extract/__init__.py b/papis_extract/__init__.py index f8f6941..9f9ad17 100644 --- a/papis_extract/__init__.py +++ b/papis_extract/__init__.py @@ -8,12 +8,7 @@ import papis.strings from papis.document import Document from papis_extract import extractor, exporter -from papis_extract.formatter import ( - CountFormatter, - CsvFormatter, - MarkdownFormatter, - Formatter, -) +from papis_extract.formatter import Formatter, format_count, format_csv, format_markdown logger = papis.logging.get_logger(__name__) @@ -82,11 +77,11 @@ def main( return if template == "csv": - formatter = CsvFormatter() + formatter = format_csv elif template == "count": - formatter = CountFormatter() + formatter = format_count else: - formatter = MarkdownFormatter() + formatter = format_markdown run(documents, edit=manual, write=write, git=git, formatter=formatter) @@ -98,8 +93,10 @@ def run( write: bool = False, git: bool = False, ) -> None: - formatter.annotated_docs = extractor.start(documents) + annotated_docs = extractor.start(documents) if write: - exporter.to_notes(formatter, edit=edit, git=git) + exporter.to_notes( + formatter=formatter, annotated_docs=annotated_docs, edit=edit, git=git + ) else: - exporter.to_stdout(formatter) + exporter.to_stdout(formatter=formatter, annotated_docs=annotated_docs) diff --git a/papis_extract/exporter.py b/papis_extract/exporter.py index 8f03ebd..5d22270 100644 --- a/papis_extract/exporter.py +++ b/papis_extract/exporter.py @@ -6,33 +6,35 @@ import papis.api import papis.git import papis.config import Levenshtein +from papis_extract.annotation import AnnotatedDocument from papis_extract.formatter import Formatter logger = papis.logging.get_logger(__name__) -def to_stdout(formatter: Formatter) -> None: +def to_stdout(formatter: Formatter, annotated_docs: list[AnnotatedDocument]) -> None: """Pretty print annotations to stdout. Gives a nice human-readable representations of the annotations in somewhat of a list form. Not intended for machine-readability. """ - output:str = formatter.execute() - print(output.rstrip('\n')) + output: str = formatter(annotated_docs) + print(output.rstrip("\n")) -def to_notes(formatter: Formatter, edit: bool, git: bool) -> None: +def to_notes( + formatter: Formatter, annotated_docs: list[AnnotatedDocument], edit: bool, git: bool +) -> None: """Write annotations into document notes. Permanently writes the given annotations into notes belonging to papis documents. Creates new notes for documents missing a note field or appends to existing. """ - annotated_docs = formatter.annotated_docs for entry in annotated_docs: - formatted_annotations = formatter.execute(entry).split("\n") + formatted_annotations = formatter([entry]).split("\n") if formatted_annotations: _add_annots_to_note(entry.document, formatted_annotations) @@ -67,7 +69,8 @@ def _add_annots_to_note( # add newline if theres no empty space at file end if len(existing) > 0 and existing[-1].strip() != "": f.write("\n") - f.write("\n".join(new_annotations)) + print(new_annotations) + f.write("\n\n".join(new_annotations)) f.write("\n") logger.info( f"Wrote {len(new_annotations)} " diff --git a/papis_extract/formatter.py b/papis_extract/formatter.py index 4ba782d..75056ea 100644 --- a/papis_extract/formatter.py +++ b/papis_extract/formatter.py @@ -1,114 +1,74 @@ -from dataclasses import dataclass, field -from typing import Protocol +from collections.abc import Callable from papis_extract.annotation import AnnotatedDocument - -@dataclass -class Formatter(Protocol): - annotated_docs: list[AnnotatedDocument] - header: str - string: str - footer: str - - def execute(self, doc: AnnotatedDocument | None = None) -> str: - raise NotImplementedError +Formatter = Callable[[list[AnnotatedDocument]], str] -@dataclass -class MarkdownFormatter: - annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list()) - header: str = "" - string: str = ( +def format_markdown(docs: list[AnnotatedDocument] = []) -> str: + template = ( "{{#tag}}#{{tag}}\n{{/tag}}" - "{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}\n" - "{{#note}} NOTE: {{note}}{{/note}}" + "{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}" + "\n{{#note}} NOTE: {{note}}{{/note}}" ) - footer: str = "" + output = "" + for entry in docs: + if not entry.annotations: + continue - def execute(self, doc: AnnotatedDocument | None = None) -> str: - output = "" - documents = self.annotated_docs if doc is None else [doc] - last = documents[-1] - for entry in documents: - if not entry.annotations: - continue + title_decoration = ( + f"{'=' * len(entry.document.get('title', ''))} " + f"{'-' * len(entry.document.get('author', ''))}" + ) + output += ( + f"{title_decoration}\n" + f"{entry.document['title']} - {entry.document['author']}\n" + f"{title_decoration}\n\n" + ) + for a in entry.annotations: + output += a.format(template) + output += "\n" - title_decoration = ( - f"{'=' * len(entry.document.get('title', ''))} " - f"{'-' * len(entry.document.get('author', ''))}" - ) - output += ( - f"{title_decoration}\n" - f"{entry.document['title']} - {entry.document['author']}\n" - f"{title_decoration}\n\n" - ) - for a in entry.annotations: - output += a.format(self.string) + output += "\n\n\n" - if entry != last: - output += "\n\n\n" - - return output + return output -@dataclass -class CountFormatter: - annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list()) - header: str = "" - string: str = "" - footer: str = "" +def format_count(docs: list[AnnotatedDocument] = []) -> str: + output = "" + for entry in docs: + if not entry.annotations: + continue - def execute(self, doc: AnnotatedDocument | None = None) -> str: - documents = self.annotated_docs if doc is None else [doc] - output = "" - for entry in documents: - if not entry.annotations: - continue + count = 0 + for _ in entry.annotations: + count += 1 - count = 0 - for _ in entry.annotations: - count += 1 + d = entry.document + output += ( + f"{d['author'] if 'author' in d else ''}" + f"{' - ' if 'author' in d else ''}" # only put separator if author + f"{entry.document['title'] if 'title' in d else ''}: " + f"{count}\n" + ) - d = entry.document - output += ( - f"{d['author'] if 'author' in d else ''}" - f"{' - ' if 'author' in d else ''}" # only put separator if author - f"{entry.document['title'] if 'title' in d else ''}: " - f"{count}\n" - ) - - return output + return output -@dataclass -class CsvFormatter: - annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list()) +def format_csv(docs: list[AnnotatedDocument] = []) -> str: header: str = "type,tag,page,quote,note,author,title,ref,file" - string: str = ( + template: str = ( '{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",' '"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"' ) - footer: str = "" + output = f"{header}\n" + for entry in docs: + if not entry.annotations: + continue - def execute(self, doc: AnnotatedDocument | None = None) -> str: - documents = self.annotated_docs if doc is None else [doc] - output = f"{self.header}\n" - for entry in documents: - if not entry.annotations: - continue + d = entry.document + for a in entry.annotations: + output += a.format(template, doc=d) + output += "\n" - d = entry.document - for a in entry.annotations: - output += a.format(self.string, doc=d) - output += "\n" - - return output - - -@dataclass -class CustomFormatter: - def __init__(self, header: str = "", string: str = "", footer: str = "") -> None: - self.header = header - self.string = string - self.footer = footer + return output