chore: Remove python-magic dependency

It relies on the libmagic module which is not necessarily installed
everywhere. Most of the functionality that we need for our purposes can
be recreated with lighter-weight methods.
This commit is contained in:
Marty Oehme 2025-09-12 09:47:41 +02:00
parent 30bc8452fa
commit 7459fbeb0b
Signed by: Marty
GPG key ID: 4E535BC19C61886E
5 changed files with 33 additions and 12 deletions

View file

@ -1,8 +1,8 @@
# pyright: strict, reportUnknownMemberType=false
import mimetypes
import re
from pathlib import Path
import magic
import papis.logging
from papis_extract.annotation import Annotation
@ -17,7 +17,7 @@ class ReadEraExtractor:
"""
def can_process(self, filename: Path) -> bool:
if magic.from_file(filename, mime=True) != "text/plain":
if not self._is_txt(filename):
return False
content = self._read_file(filename)
@ -36,11 +36,12 @@ class ReadEraExtractor:
if not re.search(r"\n\*\*\*\*\*\n\n$", "".join(content)):
return False
logger.debug(
f"Found processable annotation file: {filename}"
)
logger.debug(f"Found processable annotation file: {filename}")
return True
def _is_txt(self, filename: Path) -> bool:
return mimetypes.guess_type(filename)[0] == "text/plain"
def run(self, filename: Path) -> list[Annotation]:
"""Extract annotations from readera txt file.