2024-06-12 09:45:35 +00:00
|
|
|
from typing import Protocol
|
2024-01-20 15:34:10 +00:00
|
|
|
from papis.document import Document
|
2023-09-19 19:43:19 +00:00
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
from papis_extract.annotation import Annotation
|
|
|
|
|
2024-06-12 09:46:39 +00:00
|
|
|
|
2024-06-12 09:45:35 +00:00
|
|
|
class Formatter(Protocol):
|
|
|
|
"""Basic formatter protocol.
|
|
|
|
|
|
|
|
Every valid formatter must implement at least this protocol.
|
|
|
|
A formatter is a function which receives a document and a list
|
|
|
|
of annotations and spits them out in some formatted way.
|
|
|
|
|
|
|
|
Formatters additionally must take the (often optional) passed
|
|
|
|
parameter 'first' which signals to the formatter that the current
|
|
|
|
document entry is the very first one to be printed in whatever
|
|
|
|
exporter is used, if multiple entries are printed.
|
|
|
|
This can be useful for adding a header if necessary for the format.
|
|
|
|
"""
|
2024-06-12 09:46:39 +00:00
|
|
|
|
|
|
|
def __call__(
|
|
|
|
self, document: Document, annotations: list[Annotation], first: bool
|
|
|
|
) -> str: ...
|
2023-09-19 19:43:19 +00:00
|
|
|
|
|
|
|
|
2023-09-21 20:01:51 +00:00
|
|
|
def format_markdown(
|
2024-01-20 15:34:10 +00:00
|
|
|
document: Document = Document(),
|
|
|
|
annotations: list[Annotation] = [],
|
2024-06-12 09:45:35 +00:00
|
|
|
first: bool = False,
|
2024-01-20 15:34:10 +00:00
|
|
|
headings: str = "setext", # setext | atx | None
|
2023-09-21 20:01:51 +00:00
|
|
|
) -> str:
|
2024-01-20 15:34:10 +00:00
|
|
|
if not annotations:
|
|
|
|
return ""
|
2023-09-21 19:54:24 +00:00
|
|
|
template = (
|
2023-09-19 19:43:19 +00:00
|
|
|
"{{#tag}}#{{tag}}\n{{/tag}}"
|
2023-09-22 18:04:39 +00:00
|
|
|
"{{#quote}}> {{quote}}{{/quote}}{{#page}} [p. {{page}}]{{/page}}"
|
|
|
|
"{{#note}}\n NOTE: {{note}}{{/note}}"
|
2023-09-19 19:43:19 +00:00
|
|
|
)
|
2023-09-21 19:54:24 +00:00
|
|
|
output = ""
|
2024-01-20 15:34:10 +00:00
|
|
|
|
|
|
|
heading = f"{document.get('title', '')} - {document.get('author', '')}"
|
|
|
|
if headings == "atx":
|
2024-06-12 09:05:13 +00:00
|
|
|
output += f"# {heading}\n\n"
|
2024-01-20 15:34:10 +00:00
|
|
|
elif headings == "setext":
|
|
|
|
title_decoration = (
|
|
|
|
f"{'=' * len(document.get('title', ''))} "
|
|
|
|
f"{'-' * len(document.get('author', ''))}"
|
|
|
|
)
|
2024-06-12 09:05:13 +00:00
|
|
|
output += f"{title_decoration}\n{heading}\n{title_decoration}\n\n"
|
2024-01-20 15:34:10 +00:00
|
|
|
|
|
|
|
for a in annotations:
|
|
|
|
output += a.format(template)
|
|
|
|
output += "\n\n"
|
|
|
|
|
|
|
|
output += "\n\n\n"
|
2023-09-21 19:54:24 +00:00
|
|
|
|
2023-09-22 18:04:39 +00:00
|
|
|
return output.rstrip()
|
2023-09-21 19:54:24 +00:00
|
|
|
|
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
def format_markdown_atx(
|
|
|
|
document: Document = Document(),
|
|
|
|
annotations: list[Annotation] = [],
|
2024-06-12 09:45:35 +00:00
|
|
|
first: bool = False,
|
2024-01-20 15:34:10 +00:00
|
|
|
) -> str:
|
|
|
|
return format_markdown(document, annotations, headings="atx")
|
2023-09-21 20:01:51 +00:00
|
|
|
|
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
def format_markdown_setext(
|
|
|
|
document: Document = Document(),
|
|
|
|
annotations: list[Annotation] = [],
|
2024-06-12 09:45:35 +00:00
|
|
|
first: bool = False,
|
2024-01-20 15:34:10 +00:00
|
|
|
) -> str:
|
|
|
|
return format_markdown(document, annotations, headings="setext")
|
2023-09-21 20:01:51 +00:00
|
|
|
|
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
def format_count(
|
|
|
|
document: Document = Document(),
|
|
|
|
annotations: list[Annotation] = [],
|
2024-06-12 09:45:35 +00:00
|
|
|
first: bool = False,
|
2024-01-20 15:34:10 +00:00
|
|
|
) -> str:
|
|
|
|
if not annotations:
|
|
|
|
return ""
|
2023-09-21 19:54:24 +00:00
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
count = 0
|
|
|
|
for _ in annotations:
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
return (
|
2024-06-12 09:06:27 +00:00
|
|
|
f"{count} "
|
2024-01-20 15:34:10 +00:00
|
|
|
f"{document.get('author', '')}"
|
2024-06-12 09:06:27 +00:00
|
|
|
f"{': ' if 'author' in document else ''}" # only put separator if author
|
|
|
|
f"{document.get('title', '')}"
|
2024-01-20 15:34:10 +00:00
|
|
|
).rstrip()
|
2023-09-21 19:54:24 +00:00
|
|
|
|
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
def format_csv(
|
|
|
|
document: Document = Document(),
|
|
|
|
annotations: list[Annotation] = [],
|
2024-06-12 09:45:35 +00:00
|
|
|
first: bool = False,
|
2024-01-20 15:34:10 +00:00
|
|
|
) -> str:
|
2023-09-20 06:49:55 +00:00
|
|
|
header: str = "type,tag,page,quote,note,author,title,ref,file"
|
2023-09-21 19:54:24 +00:00
|
|
|
template: str = (
|
2023-09-20 06:49:55 +00:00
|
|
|
'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
|
|
|
|
'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
|
|
|
|
)
|
2024-06-12 09:45:35 +00:00
|
|
|
output = f"{header}\n" if first else ""
|
2024-01-20 15:34:10 +00:00
|
|
|
if not annotations:
|
|
|
|
return ""
|
2023-09-20 06:49:55 +00:00
|
|
|
|
2024-01-20 15:34:10 +00:00
|
|
|
for a in annotations:
|
|
|
|
output += a.format(template, doc=document)
|
|
|
|
output += "\n"
|
2023-09-19 19:43:19 +00:00
|
|
|
|
2023-09-22 18:04:39 +00:00
|
|
|
return output.rstrip()
|
2023-09-21 20:01:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
formatters: dict[str, Formatter] = {
|
|
|
|
"count": format_count,
|
|
|
|
"csv": format_csv,
|
|
|
|
"markdown": format_markdown,
|
2024-01-20 15:34:10 +00:00
|
|
|
"markdown-atx": format_markdown_atx,
|
|
|
|
"markdown-setext": format_markdown_setext,
|
2023-09-21 20:01:51 +00:00
|
|
|
}
|