refactor: Move formatting logic to formatters

Formatters (previously templates) were pure data containers before,
continating the 'template' for how things should be formatted using
mustache. The formatting would be done a) in the exporters and b) in the
annotations.

This spread of formatting has now been consolidated into the Formatter,
which fixes the overall spread of formatting code and now can coherently
format a whole output instead of just individual annotations.

A formatter contains references to all documents and contained
annotations and will format everything at once by default, but the
formatting function can be invoked with reference to a specific
annotated document to only format that.

This commit should put more separation into the concerns of exporter and
formatter and made formatting a concern purely of the formatters and
annotation objects.
This commit is contained in:
Marty Oehme 2023-09-19 21:43:19 +02:00
parent 66f937e2a8
commit 5a6d672c76
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
6 changed files with 138 additions and 101 deletions

100
papis_extract/formatter.py Normal file
View file

@ -0,0 +1,100 @@
from dataclasses import dataclass, field
from typing import Protocol
from papis_extract.annotation_data import AnnotatedDocument
@dataclass
class Formatter(Protocol):
annotated_docs: list[AnnotatedDocument]
header: str
string: str
footer: str
def execute(self, doc: AnnotatedDocument | None = None) -> str:
raise NotImplementedError
@dataclass
class MarkdownFormatter:
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
header: str = ""
string: str = (
"{{#tag}}#{{tag}}\n{{/tag}}"
"{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}\n"
"{{#note}} NOTE: {{note}}{{/note}}"
)
footer: str = ""
def execute(self, doc: AnnotatedDocument | None = None) -> str:
output = ""
documents = self.annotated_docs if doc is None else [doc]
last = documents[-1]
for entry in documents:
if not entry.annotations:
continue
title_decoration = (
f"{'=' * len(entry.document.get('title', ''))} "
f"{'-' * len(entry.document.get('author', ''))}"
)
output += (
f"{title_decoration}\n"
f"{entry.document['title']} - {entry.document['author']}\n"
f"{title_decoration}\n\n"
)
for a in entry.annotations:
output += a.format(self.string)
if entry != last:
print(f"entry: {entry}, last: {last}")
output += "\n\n\n"
return output
@dataclass
class CountFormatter:
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
header: str = ""
string: str = ""
footer: str = ""
def execute(self, doc: AnnotatedDocument | None = None) -> str:
output = ""
documents = self.annotated_docs if doc is None else [doc]
last = documents[-1]
for entry in documents:
if not entry.annotations:
continue
title_decoration = (
f"{'=' * len(entry.document.get('title', ''))} "
f"{'-' * len(entry.document.get('author', ''))}"
)
output += (
f"{title_decoration}\n"
f"{entry.document['title']} - {entry.document['author']}\n"
f"{title_decoration}\n\n"
)
for a in entry.annotations:
output += a.format(self.string)
if entry != last:
print(f"entry: {entry}, last: {last}")
output += "\n\n\n"
return output
@dataclass
class CsvFormatter:
header: str = "type, tag, page, quote, note, file"
string: str = "{{type}}, {{tag}}, {{page}}, {{quote}}, {{note}}, {{file}}"
footer: str = ""
@dataclass
class CustomFormatter:
def __init__(self, header: str = "", string: str = "", footer: str = "") -> None:
self.header = header
self.string = string
self.footer = footer