papis-extract/papis_extract/formatter.py

from dataclasses import dataclass, field
from typing import Protocol

from papis_extract.annotation import AnnotatedDocument


@dataclass
class Formatter(Protocol):
    annotated_docs: list[AnnotatedDocument]
    header: str
    string: str
    footer: str

    def execute(self, doc: AnnotatedDocument | None = None) -> str:
        raise NotImplementedError


@dataclass
class MarkdownFormatter:
    annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
    header: str = ""
    string: str = (
        "{{#tag}}#{{tag}}\n{{/tag}}"
        "{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}\n"
        "{{#note}}  NOTE: {{note}}{{/note}}"
    )
    footer: str = ""

    def execute(self, doc: AnnotatedDocument | None = None) -> str:
        output = ""
        documents = self.annotated_docs if doc is None else [doc]
        last = documents[-1]
        for entry in documents:
            if not entry.annotations:
                continue

            title_decoration = (
                f"{'=' * len(entry.document.get('title', ''))}   "
                f"{'-' * len(entry.document.get('author', ''))}"
            )
            output += (
                f"{title_decoration}\n"
                f"{entry.document['title']} - {entry.document['author']}\n"
                f"{title_decoration}\n\n"
            )
            for a in entry.annotations:
                output += a.format(self.string)

            if entry != last:
                output += "\n\n\n"

        return output


@dataclass
class CountFormatter:
    annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
    header: str = ""
    string: str = ""
    footer: str = ""

    def execute(self, doc: AnnotatedDocument | None = None) -> str:
        documents = self.annotated_docs if doc is None else [doc]
        output = ""
        for entry in documents:
            if not entry.annotations:
                continue

            count = 0
            for _ in entry.annotations:
                count += 1

            d = entry.document
            output += (
                f"{d['author'] if 'author' in d else ''}"
                f"{' - ' if 'author' in d else ''}"  # only put separator if author
                f"{entry.document['title'] if 'title' in d else ''}: "
                f"{count}\n"
            )

        return output


@dataclass
class CsvFormatter:
    annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
    header: str = "type,tag,page,quote,note,author,title,ref,file"
    string: str = (
        '{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
        '"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
    )
    footer: str = ""

    def execute(self, doc: AnnotatedDocument | None = None) -> str:
        documents = self.annotated_docs if doc is None else [doc]
        output = f"{self.header}\n"
        for entry in documents:
            if not entry.annotations:
                continue

            d = entry.document
            for a in entry.annotations:
                output += a.format(self.string, doc=d)
                output += "\n"

        return output


@dataclass
class CustomFormatter:
    def __init__(self, header: str = "", string: str = "", footer: str = "") -> None:
        self.header = header
        self.string = string
        self.footer = footer
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`from dataclasses import dataclass, field`
			`from typing import Protocol`

refactor: Move Annotations into annotation module 2023-09-20 15:22:29 +00:00			`from papis_extract.annotation import AnnotatedDocument`
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00

			`@dataclass`
			`class Formatter(Protocol):`
			`annotated_docs: list[AnnotatedDocument]`
			`header: str`
			`string: str`
			`footer: str`

			`def execute(self, doc: AnnotatedDocument \| None = None) -> str:`
			`raise NotImplementedError`


			`@dataclass`
			`class MarkdownFormatter:`
			`annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())`
			`header: str = ""`
			`string: str = (`
			`"{{#tag}}#{{tag}}\n{{/tag}}"`
			`"{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}\n"`
			`"{{#note}} NOTE: {{note}}{{/note}}"`
			`)`
			`footer: str = ""`

			`def execute(self, doc: AnnotatedDocument \| None = None) -> str:`
			`output = ""`
			`documents = self.annotated_docs if doc is None else [doc]`
			`last = documents[-1]`
			`for entry in documents:`
			`if not entry.annotations:`
			`continue`

			`title_decoration = (`
			`f"{'=' * len(entry.document.get('title', ''))} "`
			`f"{'-' * len(entry.document.get('author', ''))}"`
			`)`
			`output += (`
			`f"{title_decoration}\n"`
			`f"{entry.document['title']} - {entry.document['author']}\n"`
			`f"{title_decoration}\n\n"`
			`)`
			`for a in entry.annotations:`
			`output += a.format(self.string)`

			`if entry != last:`
			`output += "\n\n\n"`

			`return output`

feat: Add count formatter Added formatter which counts and outputs the number of annotations in each document. 2023-09-20 06:38:06 +00:00
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`@dataclass`
			`class CountFormatter:`
			`annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())`
			`header: str = ""`
			`string: str = ""`
			`footer: str = ""`

			`def execute(self, doc: AnnotatedDocument \| None = None) -> str:`
			`documents = self.annotated_docs if doc is None else [doc]`
feat: Add count formatter Added formatter which counts and outputs the number of annotations in each document. 2023-09-20 06:38:06 +00:00			`output = ""`
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`for entry in documents:`
			`if not entry.annotations:`
			`continue`

feat: Add count formatter Added formatter which counts and outputs the number of annotations in each document. 2023-09-20 06:38:06 +00:00			`count = 0`
			`for _ in entry.annotations:`
			`count += 1`

			`d = entry.document`
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`output += (`
feat: Add count formatter Added formatter which counts and outputs the number of annotations in each document. 2023-09-20 06:38:06 +00:00			`f"{d['author'] if 'author' in d else ''}"`
feat: Add CSV formatter Added formatter for csv-compatible syntax. The formatting is quite basic with no escaping happening should that be necessary. However, for an initial csv output it suffices for me. 2023-09-20 06:49:55 +00:00			`f"{' - ' if 'author' in d else ''}" # only put separator if author`
feat: Add count formatter Added formatter which counts and outputs the number of annotations in each document. 2023-09-20 06:38:06 +00:00			`f"{entry.document['title'] if 'title' in d else ''}: "`
			`f"{count}\n"`
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`)`

			`return output`

feat: Add count formatter Added formatter which counts and outputs the number of annotations in each document. 2023-09-20 06:38:06 +00:00
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`@dataclass`
			`class CsvFormatter:`
feat: Add CSV formatter Added formatter for csv-compatible syntax. The formatting is quite basic with no escaping happening should that be necessary. However, for an initial csv output it suffices for me. 2023-09-20 06:49:55 +00:00			`annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())`
			`header: str = "type,tag,page,quote,note,author,title,ref,file"`
			`string: str = (`
			`'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'`
			`'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'`
			`)`
refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00			`footer: str = ""`

feat: Add CSV formatter Added formatter for csv-compatible syntax. The formatting is quite basic with no escaping happening should that be necessary. However, for an initial csv output it suffices for me. 2023-09-20 06:49:55 +00:00			`def execute(self, doc: AnnotatedDocument \| None = None) -> str:`
			`documents = self.annotated_docs if doc is None else [doc]`
			`output = f"{self.header}\n"`
			`for entry in documents:`
			`if not entry.annotations:`
			`continue`

			`d = entry.document`
			`for a in entry.annotations:`
			`output += a.format(self.string, doc=d)`
			`output += "\n"`

			`return output`

refactor: Move formatting logic to formatters Formatters (previously templates) were pure data containers before, continating the 'template' for how things should be formatted using mustache. The formatting would be done a) in the exporters and b) in the annotations. This spread of formatting has now been consolidated into the Formatter, which fixes the overall spread of formatting code and now can coherently format a whole output instead of just individual annotations. A formatter contains references to all documents and contained annotations and will format everything at once by default, but the formatting function can be invoked with reference to a specific annotated document to only format that. This commit should put more separation into the concerns of exporter and formatter and made formatting a concern purely of the formatters and annotation objects. 2023-09-19 19:43:19 +00:00
			`@dataclass`
			`class CustomFormatter:`
			`def __init__(self, header: str = "", string: str = "", footer: str = "") -> None:`
			`self.header = header`
			`self.string = string`
			`self.footer = footer`