diff --git a/papis_extract/__init__.py b/papis_extract/__init__.py index 3e4792d..043d3b5 100644 --- a/papis_extract/__init__.py +++ b/papis_extract/__init__.py @@ -1,3 +1,4 @@ +import re import click import papis.cli import papis.config @@ -145,10 +146,18 @@ def run( doc_annots: list[tuple[Document, list[Annotation]]] = [] for doc in documents: annotations: list[Annotation] = [] + valid_files: int = 0 for ext in extractors: if not ext: continue - annotations.extend(extraction.start(ext, doc)) + added = extraction.start(ext, doc) + if added is not None: + valid_files += 1 + annotations.extend(added) doc_annots.append((doc, annotations)) + if valid_files == 0: + # have to remove curlys or papis logger gets upset + desc = re.sub("[{}]", "", papis.document.describe(doc)) + logger.info(f"Document {desc} has no valid extractors for any of its files.") exporter.run(doc_annots) diff --git a/papis_extract/extraction.py b/papis_extract/extraction.py index 2054323..0fa8b31 100644 --- a/papis_extract/extraction.py +++ b/papis_extract/extraction.py @@ -22,11 +22,13 @@ class Extractor(Protocol): def start( extractor: Extractor, document: Document, -) -> list[Annotation]: +) -> list[Annotation] | None: """Extract all annotations from passed documents. Returns all annotations contained in the papis - documents passed in. + documents passed in (empty list if no annotations). + If there are no files that the extractor can process, + returns None instead. """ annotations: list[Annotation] = [] file_available: bool = False @@ -40,11 +42,9 @@ def start( try: annotations.extend(extractor.run(fname)) except ExtractionError as e: - print(f"File extraction errors for {file}.\n{e}") + logger.error(f"File extraction errors for {file}. File may be damaged.\n{e}") if not file_available: - # have to remove curlys or papis logger gets upset - desc = re.sub("[{}]", "", papis.document.describe(document)) - logger.info(f"No {type(extractor)} file for document: {desc}") + return None return annotations