refactor: Remove pymupdf coupling in extraction
The library is only needed for pdf extraction which is taken care of in its own extractor plugin. In the overall extraction routine we do not need any knowledge of the existence of pymupdf.
This commit is contained in:
parent
7261e7d80c
commit
8093259551
3 changed files with 52 additions and 29 deletions
|
|
@ -2,13 +2,13 @@ import re
|
|||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
|
||||
import fitz
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.logging
|
||||
from papis.document import Document
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.extractors import ExtractionError
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
|
|
@ -39,8 +39,8 @@ def start(
|
|||
|
||||
try:
|
||||
annotations.extend(extractor.run(fname))
|
||||
except fitz.FileDataError as e:
|
||||
print(f"File structure errors for {file}.\n{e}")
|
||||
except ExtractionError as e:
|
||||
print(f"File extraction errors for {file}.\n{e}")
|
||||
|
||||
if not file_available:
|
||||
# have to remove curlys or papis logger gets upset
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue