papis-extract/tests/test_formatting.py

81 lines
2.1 KiB
Python
Raw Normal View History

from papis.document import Document
from papis_extract.annotation import AnnotatedDocument, Annotation
2023-08-31 19:32:24 +00:00
from papis_extract.formatter import (
format_count,
format_csv,
format_markdown,
format_markdown_atx,
format_markdown_setext,
)
an_doc: AnnotatedDocument = AnnotatedDocument(
Document(data={"author": "document-author", "title": "document-title"}),
[
Annotation("myfile.pdf", text="my lovely text"),
Annotation("myfile.pdf", text="my second text", content="with note"),
],
)
md_default_output = """============== ---------------
document-title - document-author
============== ---------------
> my lovely text
> my second text
NOTE: with note"""
def test_markdown_default():
fmt = format_markdown
assert fmt([an_doc]) == md_default_output
def test_markdown_atx():
fmt = format_markdown_atx
assert fmt([an_doc]) == (
"""# document-title - document-author
> my lovely text
> my second text
NOTE: with note"""
)
def test_markdown_setext():
fmt = format_markdown_setext
assert fmt([an_doc]) == md_default_output
def test_count_default():
fmt = format_count
assert fmt([an_doc]) == ("""document-author - document-title: 2""")
def test_csv_default():
fmt = format_csv
assert fmt([an_doc]) == (
"type,tag,page,quote,note,author,title,ref,file\n"
'Highlight,,0,"my lovely text","","document-author",'
'"document-title","","myfile.pdf"\n'
'Highlight,,0,"my second text","with note","document-author",'
'"document-title","","myfile.pdf"'
)
# sadpath - no annotations contained for each format
def test_markdown_no_annotations():
d: AnnotatedDocument = AnnotatedDocument(Document(data={}), [])
assert format_markdown([d]) == ""
def test_count_no_annotations():
d: AnnotatedDocument = AnnotatedDocument(Document(data={}), [])
assert format_count([d]) == ""
def test_csv_no_annotations():
d: AnnotatedDocument = AnnotatedDocument(Document(data={}), [])
assert format_csv([d]) == "type,tag,page,quote,note,author,title,ref,file"