2023-09-21 19:54:24 +00:00
|
|
|
from collections.abc import Callable
|
2023-09-19 19:43:19 +00:00
|
|
|
|
2023-09-20 15:22:29 +00:00
|
|
|
from papis_extract.annotation import AnnotatedDocument
|
2023-09-19 19:43:19 +00:00
|
|
|
|
2023-09-21 19:54:24 +00:00
|
|
|
Formatter = Callable[[list[AnnotatedDocument]], str]
|
2023-09-19 19:43:19 +00:00
|
|
|
|
|
|
|
|
2023-09-21 20:01:51 +00:00
|
|
|
def format_markdown(
|
|
|
|
docs: list[AnnotatedDocument] = [], atx_headings: bool = False
|
|
|
|
) -> str:
|
2023-09-21 19:54:24 +00:00
|
|
|
template = (
|
2023-09-19 19:43:19 +00:00
|
|
|
"{{#tag}}#{{tag}}\n{{/tag}}"
|
2023-09-22 18:04:39 +00:00
|
|
|
"{{#quote}}> {{quote}}{{/quote}}{{#page}} [p. {{page}}]{{/page}}"
|
|
|
|
"{{#note}}\n NOTE: {{note}}{{/note}}"
|
2023-09-19 19:43:19 +00:00
|
|
|
)
|
2023-09-21 19:54:24 +00:00
|
|
|
output = ""
|
|
|
|
for entry in docs:
|
|
|
|
if not entry.annotations:
|
|
|
|
continue
|
|
|
|
|
2023-09-21 20:01:51 +00:00
|
|
|
heading = f"{entry.document['title']} - {entry.document['author']}\n"
|
|
|
|
if atx_headings:
|
|
|
|
output += f"# {heading}\n"
|
|
|
|
else:
|
|
|
|
title_decoration = (
|
|
|
|
f"{'=' * len(entry.document.get('title', ''))} "
|
|
|
|
f"{'-' * len(entry.document.get('author', ''))}"
|
|
|
|
)
|
|
|
|
output += f"{title_decoration}\n" f"{heading}" f"{title_decoration}\n\n"
|
|
|
|
|
2023-09-21 19:54:24 +00:00
|
|
|
for a in entry.annotations:
|
|
|
|
output += a.format(template)
|
2023-09-22 18:04:39 +00:00
|
|
|
output += "\n\n"
|
2023-09-21 19:54:24 +00:00
|
|
|
|
|
|
|
output += "\n\n\n"
|
|
|
|
|
2023-09-22 18:04:39 +00:00
|
|
|
return output.rstrip()
|
2023-09-21 19:54:24 +00:00
|
|
|
|
|
|
|
|
2023-09-21 20:01:51 +00:00
|
|
|
def format_markdown_atx(docs: list[AnnotatedDocument] = []) -> str:
|
|
|
|
return format_markdown(docs, atx_headings=True)
|
|
|
|
|
|
|
|
|
|
|
|
def format_markdown_setext(docs: list[AnnotatedDocument] = []) -> str:
|
|
|
|
return format_markdown(docs, atx_headings=False)
|
|
|
|
|
|
|
|
|
2023-09-21 19:54:24 +00:00
|
|
|
def format_count(docs: list[AnnotatedDocument] = []) -> str:
|
|
|
|
output = ""
|
|
|
|
for entry in docs:
|
|
|
|
if not entry.annotations:
|
|
|
|
continue
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
for _ in entry.annotations:
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
d = entry.document
|
|
|
|
output += (
|
|
|
|
f"{d['author'] if 'author' in d else ''}"
|
|
|
|
f"{' - ' if 'author' in d else ''}" # only put separator if author
|
|
|
|
f"{entry.document['title'] if 'title' in d else ''}: "
|
|
|
|
f"{count}\n"
|
|
|
|
)
|
|
|
|
|
2023-09-22 18:04:39 +00:00
|
|
|
return output.rstrip()
|
2023-09-21 19:54:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
def format_csv(docs: list[AnnotatedDocument] = []) -> str:
|
2023-09-20 06:49:55 +00:00
|
|
|
header: str = "type,tag,page,quote,note,author,title,ref,file"
|
2023-09-21 19:54:24 +00:00
|
|
|
template: str = (
|
2023-09-20 06:49:55 +00:00
|
|
|
'{{type}},{{tag}},{{page}},"{{quote}}","{{note}}",'
|
|
|
|
'"{{doc.author}}","{{doc.title}}","{{doc.ref}}","{{file}}"'
|
|
|
|
)
|
2023-09-21 19:54:24 +00:00
|
|
|
output = f"{header}\n"
|
|
|
|
for entry in docs:
|
|
|
|
if not entry.annotations:
|
|
|
|
continue
|
2023-09-20 06:49:55 +00:00
|
|
|
|
2023-09-21 19:54:24 +00:00
|
|
|
d = entry.document
|
|
|
|
for a in entry.annotations:
|
|
|
|
output += a.format(template, doc=d)
|
|
|
|
output += "\n"
|
2023-09-19 19:43:19 +00:00
|
|
|
|
2023-09-22 18:04:39 +00:00
|
|
|
return output.rstrip()
|
2023-09-21 20:01:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
formatters: dict[str, Formatter] = {
|
|
|
|
"count": format_count,
|
|
|
|
"csv": format_csv,
|
|
|
|
"markdown": format_markdown,
|
|
|
|
"markdown_atx": format_markdown_atx,
|
|
|
|
"markdown_setext": format_markdown_setext,
|
|
|
|
}
|