Some checks failed
ci/woodpecker/push/lint Pipeline was successful
ci/woodpecker/push/static_analysis Pipeline failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/manual/lint Pipeline was successful
ci/woodpecker/manual/static_analysis Pipeline failed
ci/woodpecker/manual/test Pipeline failed
51 lines
1.3 KiB
Python
51 lines
1.3 KiB
Python
from pathlib import Path
|
|
from typing import Protocol
|
|
|
|
import papis.config
|
|
import papis.document
|
|
import papis.logging
|
|
from papis.document import Document
|
|
|
|
from papis_extract.annotation import Annotation
|
|
from papis_extract.exceptions import ExtractionError
|
|
|
|
logger = papis.logging.get_logger(__name__)
|
|
|
|
|
|
class Extractor(Protocol):
|
|
def can_process(self, filename: Path) -> bool: ...
|
|
|
|
def run(self, filename: Path) -> list[Annotation]: ...
|
|
|
|
|
|
def start(
|
|
extractor: Extractor,
|
|
document: Document,
|
|
) -> list[Annotation] | None:
|
|
"""Extract all annotations from passed documents.
|
|
|
|
Returns all annotations contained in the papis
|
|
documents passed in (empty list if no annotations).
|
|
If there are no files that the extractor can process,
|
|
returns None instead.
|
|
"""
|
|
annotations: list[Annotation] = []
|
|
file_available: bool = False
|
|
|
|
for file in document.get_files():
|
|
fname = Path(file)
|
|
if not extractor.can_process(fname):
|
|
continue
|
|
file_available = True
|
|
|
|
try:
|
|
annotations.extend(extractor.run(fname))
|
|
except ExtractionError as e:
|
|
logger.error(
|
|
f"File extraction errors for {file}. File may be damaged.\n{e}"
|
|
)
|
|
|
|
if not file_available:
|
|
return None
|
|
|
|
return annotations
|