feat: Add CSV formatter

Added formatter for csv-compatible syntax. The formatting is quite basic
with no escaping happening should that be necessary. However, for an
initial csv output it suffices for me.
This commit is contained in:
Marty Oehme 2023-09-20 08:49:55 +02:00
parent 5f0bc2ffad
commit e511ffa48d
Signed by: Marty
GPG Key ID: EDBF2ED917B2EF6A
4 changed files with 40 additions and 17 deletions

View File

@ -8,7 +8,12 @@ import papis.strings
from papis.document import Document
from papis_extract import extractor, exporter
from papis_extract.formatter import CountFormatter, MarkdownFormatter, Formatter
from papis_extract.formatter import (
CountFormatter,
CsvFormatter,
MarkdownFormatter,
Formatter,
)
logger = papis.logging.get_logger(__name__)
@ -80,24 +85,24 @@ def main(
return
if template == "csv":
raise NotImplementedError
formatter = CsvFormatter()
elif template == "count":
formatter = CountFormatter()
else:
formatter = MarkdownFormatter()
run(documents, edit=manual, write=write, git=git, template=formatter)
run(documents, edit=manual, write=write, git=git, formatter=formatter)
def run(
documents: list[Document],
template: Formatter,
formatter: Formatter,
edit: bool = False,
write: bool = False,
git: bool = False,
) -> None:
template.annotated_docs = extractor.start(documents)
formatter.annotated_docs = extractor.start(documents)
if write:
exporter.to_notes(template, edit=edit, git=git)
exporter.to_notes(formatter, edit=edit, git=git)
else:
exporter.to_stdout(template)
exporter.to_stdout(formatter)

View File

@ -34,7 +34,7 @@ class Annotation:
type: str = "Highlight"
minimum_similarity_color: float = 1.0
def format(self, template: str, doc: Document = Document()):
def format(self, formatting: str, doc: Document = Document()):
"""Return a formatted string of the annotation.
Given a provided formatting pattern, this method returns the annotation
@ -50,7 +50,7 @@ class Annotation:
"type": self.type,
"doc": doc,
}
return chevron.render(template, data)
return chevron.render(formatting, data)
@property
def colorname(self):

View File

@ -12,27 +12,27 @@ from papis_extract.formatter import Formatter
logger = papis.logging.get_logger(__name__)
def to_stdout(template: Formatter) -> None:
def to_stdout(formatter: Formatter) -> None:
"""Pretty print annotations to stdout.
Gives a nice human-readable representations of
the annotations in somewhat of a list form.
Not intended for machine-readability.
"""
output:str = template.execute()
output:str = formatter.execute()
print(output.rstrip('\n'))
def to_notes(template: Formatter, edit: bool, git: bool) -> None:
def to_notes(formatter: Formatter, edit: bool, git: bool) -> None:
"""Write annotations into document notes.
Permanently writes the given annotations into notes
belonging to papis documents. Creates new notes for
documents missing a note field or appends to existing.
"""
annotated_docs = template.annotated_docs
annotated_docs = formatter.annotated_docs
for entry in annotated_docs:
formatted_annotations = template.execute(entry).split("\n")
formatted_annotations = formatter.execute(entry).split("\n")
if formatted_annotations:
_add_annots_to_note(entry.document, formatted_annotations)

View File

@ -73,7 +73,7 @@ class CountFormatter:
d = entry.document
output += (
f"{d['author'] if 'author' in d else ''}"
f"{' - ' if 'author' in d else ''}" # only put separator if author
f"{' - ' if 'author' in d else ''}" # only put separator if author
f"{entry.document['title'] if 'title' in d else ''}: "
f"{count}\n"
)
@ -83,10 +83,28 @@ class CountFormatter:
@dataclass
class CsvFormatter:
header: str = "type, tag, page, quote, note, file"
string: str = "{{type}}, {{tag}}, {{page}}, {{quote}}, {{note}}, {{file}}"
annotated_docs: list[AnnotatedDocument] = field(default_factory=lambda: list())
header: str = "type,tag,page,quote,note,author,title,ref,file"
string: str = (
'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
)
footer: str = ""
def execute(self, doc: AnnotatedDocument | None = None) -> str:
documents = self.annotated_docs if doc is None else [doc]
output = f"{self.header}\n"
for entry in documents:
if not entry.annotations:
continue
d = entry.document
for a in entry.annotations:
output += a.format(self.string, doc=d)
output += "\n"
return output
@dataclass
class CustomFormatter: