refactor: Remove pymupdf coupling in extraction

The library is only needed for pdf extraction which is taken care of
in its own extractor plugin. In the overall extraction routine we do not
need any knowledge of the existence of pymupdf.
This commit is contained in:
Marty Oehme 2024-06-14 14:59:39 +02:00
parent 7261e7d80c
commit 8093259551
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
3 changed files with 52 additions and 29 deletions

View file

@ -2,13 +2,13 @@ import re
from pathlib import Path
from typing import Protocol
import fitz
import papis.config
import papis.document
import papis.logging
from papis.document import Document
from papis_extract.annotation import Annotation
from papis_extract.extractors import ExtractionError
logger = papis.logging.get_logger(__name__)
@ -39,8 +39,8 @@ def start(
try:
annotations.extend(extractor.run(fname))
except fitz.FileDataError as e:
print(f"File structure errors for {file}.\n{e}")
except ExtractionError as e:
print(f"File extraction errors for {file}.\n{e}")
if not file_available:
# have to remove curlys or papis logger gets upset