feat: Notify formatters if formatting first entry
This allows headers to be created by a formatter, which will *only* be added to the very first entry created and not to each entry. Currently for example this is used to create a csv header but not for each document in turn.
This commit is contained in:
parent
9eb7399536
commit
c2aec7add6
4 changed files with 39 additions and 7 deletions
|
@ -28,7 +28,8 @@ class NotesExporter:
|
||||||
documents missing a note field or appends to existing.
|
documents missing a note field or appends to existing.
|
||||||
"""
|
"""
|
||||||
for doc, annots in annot_docs:
|
for doc, annots in annot_docs:
|
||||||
formatted_annotations = self.formatter(doc, annots).split("\n")
|
# first always true since we write single doc per note
|
||||||
|
formatted_annotations: list[str] = self.formatter(doc, annots, first=True).split("\n")
|
||||||
if formatted_annotations:
|
if formatted_annotations:
|
||||||
self._add_annots_to_note(doc, formatted_annotations, force=self.force)
|
self._add_annots_to_note(doc, formatted_annotations, force=self.force)
|
||||||
|
|
||||||
|
@ -80,7 +81,9 @@ class NotesExporter:
|
||||||
# add newline if theres no empty space at file end
|
# add newline if theres no empty space at file end
|
||||||
if len(existing) > 0 and existing[-1].strip() != "":
|
if len(existing) > 0 and existing[-1].strip() != "":
|
||||||
f.write("\n")
|
f.write("\n")
|
||||||
f.write("\n\n".join(new_annotations))
|
# FIXME this either joins them too close or moves them too far apart
|
||||||
|
# We need a better algorithm which knows what a full 'annotation' is.
|
||||||
|
f.write("\n".join(new_annotations))
|
||||||
f.write("\n")
|
f.write("\n")
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Wrote {len(new_annotations)} "
|
f"Wrote {len(new_annotations)} "
|
||||||
|
|
|
@ -20,7 +20,9 @@ class StdoutExporter:
|
||||||
the annotations in somewhat of a list form.
|
the annotations in somewhat of a list form.
|
||||||
Not intended for machine-readability.
|
Not intended for machine-readability.
|
||||||
"""
|
"""
|
||||||
|
first_entry = True
|
||||||
for doc, annots in annot_docs:
|
for doc, annots in annot_docs:
|
||||||
output: str = self.formatter(doc, annots)
|
output: str = self.formatter(doc, annots, first=first_entry)
|
||||||
if output:
|
if output:
|
||||||
print("{output}\n".format(output=output.rstrip("\n")))
|
print("{output}\n".format(output=output.rstrip("\n")))
|
||||||
|
first_entry = False
|
||||||
|
|
|
@ -1,15 +1,29 @@
|
||||||
from collections.abc import Callable
|
from typing import Protocol
|
||||||
|
|
||||||
from papis.document import Document
|
from papis.document import Document
|
||||||
|
|
||||||
from papis_extract.annotation import Annotation
|
from papis_extract.annotation import Annotation
|
||||||
|
|
||||||
Formatter = Callable[[Document, list[Annotation]], str]
|
class Formatter(Protocol):
|
||||||
|
"""Basic formatter protocol.
|
||||||
|
|
||||||
|
Every valid formatter must implement at least this protocol.
|
||||||
|
A formatter is a function which receives a document and a list
|
||||||
|
of annotations and spits them out in some formatted way.
|
||||||
|
|
||||||
|
Formatters additionally must take the (often optional) passed
|
||||||
|
parameter 'first' which signals to the formatter that the current
|
||||||
|
document entry is the very first one to be printed in whatever
|
||||||
|
exporter is used, if multiple entries are printed.
|
||||||
|
This can be useful for adding a header if necessary for the format.
|
||||||
|
"""
|
||||||
|
def __call__(self, document: Document, annotations: list[Annotation], first: bool) -> str:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
def format_markdown(
|
def format_markdown(
|
||||||
document: Document = Document(),
|
document: Document = Document(),
|
||||||
annotations: list[Annotation] = [],
|
annotations: list[Annotation] = [],
|
||||||
|
first: bool = False,
|
||||||
headings: str = "setext", # setext | atx | None
|
headings: str = "setext", # setext | atx | None
|
||||||
) -> str:
|
) -> str:
|
||||||
if not annotations:
|
if not annotations:
|
||||||
|
@ -43,6 +57,7 @@ def format_markdown(
|
||||||
def format_markdown_atx(
|
def format_markdown_atx(
|
||||||
document: Document = Document(),
|
document: Document = Document(),
|
||||||
annotations: list[Annotation] = [],
|
annotations: list[Annotation] = [],
|
||||||
|
first: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
return format_markdown(document, annotations, headings="atx")
|
return format_markdown(document, annotations, headings="atx")
|
||||||
|
|
||||||
|
@ -50,6 +65,7 @@ def format_markdown_atx(
|
||||||
def format_markdown_setext(
|
def format_markdown_setext(
|
||||||
document: Document = Document(),
|
document: Document = Document(),
|
||||||
annotations: list[Annotation] = [],
|
annotations: list[Annotation] = [],
|
||||||
|
first: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
return format_markdown(document, annotations, headings="setext")
|
return format_markdown(document, annotations, headings="setext")
|
||||||
|
|
||||||
|
@ -57,6 +73,7 @@ def format_markdown_setext(
|
||||||
def format_count(
|
def format_count(
|
||||||
document: Document = Document(),
|
document: Document = Document(),
|
||||||
annotations: list[Annotation] = [],
|
annotations: list[Annotation] = [],
|
||||||
|
first: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
if not annotations:
|
if not annotations:
|
||||||
return ""
|
return ""
|
||||||
|
@ -76,13 +93,14 @@ def format_count(
|
||||||
def format_csv(
|
def format_csv(
|
||||||
document: Document = Document(),
|
document: Document = Document(),
|
||||||
annotations: list[Annotation] = [],
|
annotations: list[Annotation] = [],
|
||||||
|
first: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
header: str = "type,tag,page,quote,note,author,title,ref,file"
|
header: str = "type,tag,page,quote,note,author,title,ref,file"
|
||||||
template: str = (
|
template: str = (
|
||||||
'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
|
'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
|
||||||
'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
|
'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
|
||||||
)
|
)
|
||||||
output = f"{header}\n"
|
output = f"{header}\n" if first else ""
|
||||||
if not annotations:
|
if not annotations:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,15 @@ def test_count_default():
|
||||||
def test_csv_default():
|
def test_csv_default():
|
||||||
fmt = format_csv
|
fmt = format_csv
|
||||||
assert fmt(document, annotations) == (
|
assert fmt(document, annotations) == (
|
||||||
|
'Highlight,,0,"my lovely text","","document-author",'
|
||||||
|
'"document-title","","myfile.pdf"\n'
|
||||||
|
'Highlight,,0,"my second text","with note","document-author",'
|
||||||
|
'"document-title","","myfile.pdf"'
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_csv_with_header():
|
||||||
|
fmt = format_csv
|
||||||
|
assert fmt(document, annotations, first=True) == (
|
||||||
"type,tag,page,quote,note,author,title,ref,file\n"
|
"type,tag,page,quote,note,author,title,ref,file\n"
|
||||||
'Highlight,,0,"my lovely text","","document-author",'
|
'Highlight,,0,"my lovely text","","document-author",'
|
||||||
'"document-title","","myfile.pdf"\n'
|
'"document-title","","myfile.pdf"\n'
|
||||||
|
|
Loading…
Reference in a new issue