refactor: Move formatting logic to formatters
Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects.
This commit is contained in:
parent
66f937e2a8
commit
5a6d672c76
6 changed files with 138 additions and 101 deletions
100
papis_extract/formatter.py
Normal file
100
papis_extract/formatter.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Protocol
|
||||
|
||||
from papis_extract.annotation_data import AnnotatedDocument
|
||||
|
||||
|
||||
@dataclass
|
||||
class Formatter(Protocol):
|
||||
annotated_docs: list[AnnotatedDocument]
|
||||
header: str
|
||||
string: str
|
||||
footer: str
|
||||
|
||||
def execute(self, doc: AnnotatedDocument | None = None) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarkdownFormatter:
|
||||
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
|
||||
header: str = ""
|
||||
string: str = (
|
||||
"{{#tag}}#{{tag}}\n{{/tag}}"
|
||||
"{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}\n"
|
||||
"{{#note}} NOTE: {{note}}{{/note}}"
|
||||
)
|
||||
footer: str = ""
|
||||
|
||||
def execute(self, doc: AnnotatedDocument | None = None) -> str:
|
||||
output = ""
|
||||
documents = self.annotated_docs if doc is None else [doc]
|
||||
last = documents[-1]
|
||||
for entry in documents:
|
||||
if not entry.annotations:
|
||||
continue
|
||||
|
||||
title_decoration = (
|
||||
f"{'=' * len(entry.document.get('title', ''))} "
|
||||
f"{'-' * len(entry.document.get('author', ''))}"
|
||||
)
|
||||
output += (
|
||||
f"{title_decoration}\n"
|
||||
f"{entry.document['title']} - {entry.document['author']}\n"
|
||||
f"{title_decoration}\n\n"
|
||||
)
|
||||
for a in entry.annotations:
|
||||
output += a.format(self.string)
|
||||
|
||||
if entry != last:
|
||||
print(f"entry: {entry}, last: {last}")
|
||||
output += "\n\n\n"
|
||||
|
||||
return output
|
||||
|
||||
@dataclass
|
||||
class CountFormatter:
|
||||
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
|
||||
header: str = ""
|
||||
string: str = ""
|
||||
footer: str = ""
|
||||
|
||||
def execute(self, doc: AnnotatedDocument | None = None) -> str:
|
||||
output = ""
|
||||
documents = self.annotated_docs if doc is None else [doc]
|
||||
last = documents[-1]
|
||||
for entry in documents:
|
||||
if not entry.annotations:
|
||||
continue
|
||||
|
||||
title_decoration = (
|
||||
f"{'=' * len(entry.document.get('title', ''))} "
|
||||
f"{'-' * len(entry.document.get('author', ''))}"
|
||||
)
|
||||
output += (
|
||||
f"{title_decoration}\n"
|
||||
f"{entry.document['title']} - {entry.document['author']}\n"
|
||||
f"{title_decoration}\n\n"
|
||||
)
|
||||
for a in entry.annotations:
|
||||
output += a.format(self.string)
|
||||
|
||||
if entry != last:
|
||||
print(f"entry: {entry}, last: {last}")
|
||||
output += "\n\n\n"
|
||||
|
||||
return output
|
||||
|
||||
@dataclass
|
||||
class CsvFormatter:
|
||||
header: str = "type, tag, page, quote, note, file"
|
||||
string: str = "{{type}}, {{tag}}, {{page}}, {{quote}}, {{note}}, {{file}}"
|
||||
footer: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class CustomFormatter:
|
||||
def __init__(self, header: str = "", string: str = "", footer: str = "") -> None:
|
||||
self.header = header
|
||||
self.string = string
|
||||
self.footer = footer
|
||||
Loading…
Add table
Add a link
Reference in a new issue