feat: Notify formatters if formatting first entry

This allows headers to be created by a formatter, which will
*only* be added to the very first entry created and not to
each entry. Currently for example this is used to create
a csv header but not for each document in turn.
This commit is contained in:
Marty Oehme 2024-06-12 11:45:35 +02:00
parent 9eb7399536
commit c2aec7add6
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
4 changed files with 39 additions and 7 deletions

View file

@ -28,7 +28,8 @@ class NotesExporter:
documents missing a note field or appends to existing.
"""
for doc, annots in annot_docs:
formatted_annotations = self.formatter(doc, annots).split("\n")
# first always true since we write single doc per note
formatted_annotations: list[str] = self.formatter(doc, annots, first=True).split("\n")
if formatted_annotations:
self._add_annots_to_note(doc, formatted_annotations, force=self.force)
@ -80,7 +81,9 @@ class NotesExporter:
# add newline if theres no empty space at file end
if len(existing) > 0 and existing[-1].strip() != "":
f.write("\n")
f.write("\n\n".join(new_annotations))
# FIXME this either joins them too close or moves them too far apart
# We need a better algorithm which knows what a full 'annotation' is.
f.write("\n".join(new_annotations))
f.write("\n")
logger.info(
f"Wrote {len(new_annotations)} "

View file

@ -20,7 +20,9 @@ class StdoutExporter:
the annotations in somewhat of a list form.
Not intended for machine-readability.
"""
first_entry = True
for doc, annots in annot_docs:
output: str = self.formatter(doc, annots)
output: str = self.formatter(doc, annots, first=first_entry)
if output:
print("{output}\n".format(output=output.rstrip("\n")))
first_entry = False

View file

@ -1,15 +1,29 @@
from collections.abc import Callable
from typing import Protocol
from papis.document import Document
from papis_extract.annotation import Annotation
Formatter = Callable[[Document, list[Annotation]], str]
class Formatter(Protocol):
"""Basic formatter protocol.
Every valid formatter must implement at least this protocol.
A formatter is a function which receives a document and a list
of annotations and spits them out in some formatted way.
Formatters additionally must take the (often optional) passed
parameter 'first' which signals to the formatter that the current
document entry is the very first one to be printed in whatever
exporter is used, if multiple entries are printed.
This can be useful for adding a header if necessary for the format.
"""
def __call__(self, document: Document, annotations: list[Annotation], first: bool) -> str:
...
def format_markdown(
document: Document = Document(),
annotations: list[Annotation] = [],
first: bool = False,
headings: str = "setext", # setext | atx | None
) -> str:
if not annotations:
@ -43,6 +57,7 @@ def format_markdown(
def format_markdown_atx(
document: Document = Document(),
annotations: list[Annotation] = [],
first: bool = False,
) -> str:
return format_markdown(document, annotations, headings="atx")
@ -50,6 +65,7 @@ def format_markdown_atx(
def format_markdown_setext(
document: Document = Document(),
annotations: list[Annotation] = [],
first: bool = False,
) -> str:
return format_markdown(document, annotations, headings="setext")
@ -57,6 +73,7 @@ def format_markdown_setext(
def format_count(
document: Document = Document(),
annotations: list[Annotation] = [],
first: bool = False,
) -> str:
if not annotations:
return ""
@ -76,13 +93,14 @@ def format_count(
def format_csv(
document: Document = Document(),
annotations: list[Annotation] = [],
first: bool = False,
) -> str:
header: str = "type,tag,page,quote,note,author,title,ref,file"
template: str = (
'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
)
output = f"{header}\n"
output = f"{header}\n" if first else ""
if not annotations:
return ""

View file

@ -54,6 +54,15 @@ def test_count_default():
def test_csv_default():
fmt = format_csv
assert fmt(document, annotations) == (
'Highlight,,0,"my lovely text","","document-author",'
'"document-title","","myfile.pdf"\n'
'Highlight,,0,"my second text","with note","document-author",'
'"document-title","","myfile.pdf"'
)
def test_csv_with_header():
fmt = format_csv
assert fmt(document, annotations, first=True) == (
"type,tag,page,quote,note,author,title,ref,file\n"
'Highlight,,0,"my lovely text","","document-author",'
'"document-title","","myfile.pdf"\n'