From 07d4de9a4663af692a51ddd87483291e6f3c059b Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 19 Sep 2023 17:52:45 +0200 Subject: [PATCH] docs: Add docstrings --- papis_extract/annotation_data.py | 9 ++++++++- papis_extract/exporter.py | 8 ++++++++ papis_extract/extractor.py | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/papis_extract/annotation_data.py b/papis_extract/annotation_data.py index c94244f..5fd5546 100644 --- a/papis_extract/annotation_data.py +++ b/papis_extract/annotation_data.py @@ -22,7 +22,10 @@ COLORS = { @dataclass class Annotation: - """A PDF annotation object""" + """A PDF annotation object. + + Contains all information necessary for the annotation itself, content and metadata. + """ file: str colors: tuple[float, float, float] = field(default_factory=lambda: (0.0, 0.0, 0.0)) @@ -83,6 +86,10 @@ class Annotation: @dataclass class AnnotatedDocument: + """Contains all annotations belonging to a single papis document. + + Combines a document with a list of annotations which belong to it.""" document: Document annotations: list[Annotation] + # TODO could implement a from_doc() static method to generate annotation list? diff --git a/papis_extract/exporter.py b/papis_extract/exporter.py index 0fca9d7..8e05c6c 100644 --- a/papis_extract/exporter.py +++ b/papis_extract/exporter.py @@ -115,6 +115,14 @@ def _add_annots_to_note( def _drop_existing_annotations( formatted_annotations: list[str], file_lines: list[str] ) -> list[str]: + """Returns the input annotations dropping any existing. + + Takes a list of formatted annotations and a list of strings + (most probably existing lines in a file). If anny annotations + match an existing line closely enough, they will be dropped. + + Returns list of annotations without duplicates. + """ minimum_similarity = ( papis.config.getfloat("minimum_similarity", "plugins.extract") or 1.0 ) diff --git a/papis_extract/extractor.py b/papis_extract/extractor.py index a4bc536..f6802f0 100644 --- a/papis_extract/extractor.py +++ b/papis_extract/extractor.py @@ -83,6 +83,7 @@ def extract(filename: Path) -> list[Annotation]: def is_pdf(fname: Path) -> bool: + """Check if file is a pdf, using mime type.""" return magic.from_file(fname, mime=True) == "application/pdf"