fix: Only inform if no extractor finds valid files
Some checks failed
ci/woodpecker/push/lint Pipeline failed
ci/woodpecker/push/static_analysis Pipeline was successful
ci/woodpecker/push/test Pipeline was successful

Until now whenever an extractor could not find any valid files for a
document it would inform the user of this case. However, this is not
very useful: if you have a pdf and an epub extractor running, it would
inform you for each document which only had one of the two formats as
well as those which actually did not have any valid files for *any* of
the extractors running.

This commit changes the behavior to only inform the user when none of
the running extractors find a valid file, since that is the actual case
a user might want to be informed about.
This commit is contained in:
Marty Oehme 2024-06-14 20:02:52 +02:00
parent 97b7ec0dc9
commit 779519f580
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
2 changed files with 16 additions and 8 deletions

View file

@ -1,3 +1,4 @@
import re
import click import click
import papis.cli import papis.cli
import papis.config import papis.config
@ -145,10 +146,18 @@ def run(
doc_annots: list[tuple[Document, list[Annotation]]] = [] doc_annots: list[tuple[Document, list[Annotation]]] = []
for doc in documents: for doc in documents:
annotations: list[Annotation] = [] annotations: list[Annotation] = []
valid_files: int = 0
for ext in extractors: for ext in extractors:
if not ext: if not ext:
continue continue
annotations.extend(extraction.start(ext, doc)) added = extraction.start(ext, doc)
if added is not None:
valid_files += 1
annotations.extend(added)
doc_annots.append((doc, annotations)) doc_annots.append((doc, annotations))
if valid_files == 0:
# have to remove curlys or papis logger gets upset
desc = re.sub("[{}]", "", papis.document.describe(doc))
logger.info(f"Document {desc} has no valid extractors for any of its files.")
exporter.run(doc_annots) exporter.run(doc_annots)

View file

@ -1,4 +1,3 @@
import re
from pathlib import Path from pathlib import Path
from typing import Protocol from typing import Protocol
@ -22,11 +21,13 @@ class Extractor(Protocol):
def start( def start(
extractor: Extractor, extractor: Extractor,
document: Document, document: Document,
) -> list[Annotation]: ) -> list[Annotation] | None:
"""Extract all annotations from passed documents. """Extract all annotations from passed documents.
Returns all annotations contained in the papis Returns all annotations contained in the papis
documents passed in. documents passed in (empty list if no annotations).
If there are no files that the extractor can process,
returns None instead.
""" """
annotations: list[Annotation] = [] annotations: list[Annotation] = []
file_available: bool = False file_available: bool = False
@ -40,11 +41,9 @@ def start(
try: try:
annotations.extend(extractor.run(fname)) annotations.extend(extractor.run(fname))
except ExtractionError as e: except ExtractionError as e:
print(f"File extraction errors for {file}.\n{e}") logger.error(f"File extraction errors for {file}. File may be damaged.\n{e}")
if not file_available: if not file_available:
# have to remove curlys or papis logger gets upset return None
desc = re.sub("[{}]", "", papis.document.describe(document))
logger.info(f"No {type(extractor)} file for document: {desc}")
return annotations return annotations