Added formatter which counts and outputs the number of annotations in each document.
96 lines
2.7 KiB
96 lines
2.7 KiB
from dataclasses import dataclass, field
from typing import Protocol
from papis_extract.annotation_data import AnnotatedDocument
class Formatter(Protocol):
annotated_docs: list[AnnotatedDocument]
header: str
string: str
footer: str
def execute(self, doc: AnnotatedDocument | None = None) -> str:
raise NotImplementedError
class MarkdownFormatter:
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
header: str = ""
string: str = (
"{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}\n"
"{{#note}} NOTE: {{note}}{{/note}}"
footer: str = ""
def execute(self, doc: AnnotatedDocument | None = None) -> str:
output = ""
documents = self.annotated_docs if doc is None else [doc]
last = documents[-1]
for entry in documents:
if not entry.annotations:
title_decoration = (
f"{'=' * len(entry.document.get('title', ''))} "
f"{'-' * len(entry.document.get('author', ''))}"
output += (
f"{entry.document['title']} - {entry.document['author']}\n"
for a in entry.annotations:
output += a.format(self.string)
if entry != last:
output += "\n\n\n"
return output
class CountFormatter:
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
header: str = ""
string: str = ""
footer: str = ""
def execute(self, doc: AnnotatedDocument | None = None) -> str:
documents = self.annotated_docs if doc is None else [doc]
output = ""
for entry in documents:
if not entry.annotations:
count = 0
for _ in entry.annotations:
count += 1
d = entry.document
output += (
f"{d['author'] if 'author' in d else ''}"
f"{' - ' if 'author' in d else ''}" # only put separator if author
f"{entry.document['title'] if 'title' in d else ''}: "
return output
class CsvFormatter:
header: str = "type, tag, page, quote, note, file"
string: str = "{{type}}, {{tag}}, {{page}}, {{quote}}, {{note}}, {{file}}"
footer: str = ""
class CustomFormatter:
def __init__(self, header: str = "", string: str = "", footer: str = "") -> None:
self.header = header
self.string = string
self.footer = footer