feat: Add atx-style markdown

Added markdown with atx style headers, can be chosen as
alternative markdown template on the cli.
The existing 'markdown' template will still default to
setext-style headers.
This commit is contained in:
Marty Oehme 2023-09-21 22:01:51 +02:00
parent 7ee8d4911e
commit ee4690f52b
Signed by: Marty
GPG Key ID: EDBF2ED917B2EF6A
3 changed files with 44 additions and 19 deletions

View File

@ -94,6 +94,9 @@ To output annotations in a markdown-compatible syntax (the default), do:
papis extract --template markdown
```
There are sub-variants of the formatter for atx-style headers, with `--template markdown-atx` (`# Headings`),
or setext-style with `--template markdown-setext` (the default style).
To instead see them in a csv syntax simply invoke:
```bash
@ -227,7 +230,8 @@ features to be implemented:
- [ ] allow custom colors -> tag name settings not dependent on color name existing (e.g. {"important": (1.0,0.0,0.0)})
- [ ] `--overwrite` mode where existing annotations are not dropped but overwritten on same line of note
- [ ] `--force` mode where we simply do not drop anything
- [ ] `--format` option to choose from default or set up a custom formatter
- [x] `--format` option to choose from default or set up a custom formatter
- called `--template` in current implementation
- [ ] on_add hook to extract annotations as files are added
- needs upstream help, 'on_add' hook, and pass-through of affected documents

View File

@ -8,7 +8,10 @@ import papis.strings
from papis.document import Document
from papis_extract import extractor, exporter
from papis_extract.formatter import Formatter, format_count, format_csv, format_markdown
from papis_extract.formatter import (
Formatter,
formatters
)
logger = papis.logging.get_logger(__name__)
@ -43,7 +46,10 @@ papis.config.register_default_settings(DEFAULT_OPTIONS)
@click.option(
"--template",
"-t",
type=click.Choice(["markdown", "count", "csv"], case_sensitive=False),
type=click.Choice(
["markdown", "markdown-setext", "markdown-atx", "count", "csv"],
case_sensitive=False,
),
help="Choose an output template to format annotations with.",
)
def main(
@ -76,12 +82,7 @@ def main(
logger.warning(papis.strings.no_documents_retrieved_message)
return
if template == "csv":
formatter = format_csv
elif template == "count":
formatter = format_count
else:
formatter = format_markdown
formatter = formatters[template]
run(documents, edit=manual, write=write, git=git, formatter=formatter)

View File

@ -5,7 +5,9 @@ from papis_extract.annotation import AnnotatedDocument
Formatter = Callable[[list[AnnotatedDocument]], str]
def format_markdown(docs: list[AnnotatedDocument] = []) -> str:
def format_markdown(
docs: list[AnnotatedDocument] = [], atx_headings: bool = False
) -> str:
template = (
"{{#tag}}#{{tag}}\n{{/tag}}"
"{{#quote}}> {{quote}}{{/quote}} {{#page}}[p. {{page}}]{{/page}}"
@ -16,15 +18,16 @@ def format_markdown(docs: list[AnnotatedDocument] = []) -> str:
if not entry.annotations:
continue
title_decoration = (
f"{'=' * len(entry.document.get('title', ''))} "
f"{'-' * len(entry.document.get('author', ''))}"
)
output += (
f"{title_decoration}\n"
f"{entry.document['title']} - {entry.document['author']}\n"
f"{title_decoration}\n\n"
)
heading = f"{entry.document['title']} - {entry.document['author']}\n"
if atx_headings:
output += f"# {heading}\n"
else:
title_decoration = (
f"{'=' * len(entry.document.get('title', ''))} "
f"{'-' * len(entry.document.get('author', ''))}"
)
output += f"{title_decoration}\n" f"{heading}" f"{title_decoration}\n\n"
for a in entry.annotations:
output += a.format(template)
output += "\n"
@ -34,6 +37,14 @@ def format_markdown(docs: list[AnnotatedDocument] = []) -> str:
return output
def format_markdown_atx(docs: list[AnnotatedDocument] = []) -> str:
return format_markdown(docs, atx_headings=True)
def format_markdown_setext(docs: list[AnnotatedDocument] = []) -> str:
return format_markdown(docs, atx_headings=False)
def format_count(docs: list[AnnotatedDocument] = []) -> str:
output = ""
for entry in docs:
@ -72,3 +83,12 @@ def format_csv(docs: list[AnnotatedDocument] = []) -> str:
output += "\n"
return output
formatters: dict[str, Formatter] = {
"count": format_count,
"csv": format_csv,
"markdown": format_markdown,
"markdown_atx": format_markdown_atx,
"markdown_setext": format_markdown_setext,
}