chore: Remove python-magic dependency

It relies on the libmagic module which is not necessarily installed
everywhere. Most of the functionality that we need for our purposes can
be recreated with lighter-weight methods.
This commit is contained in:
Marty Oehme 2025-09-12 09:47:41 +02:00
parent 30bc8452fa
commit 7459fbeb0b
Signed by: Marty
GPG key ID: 4E535BC19C61886E
5 changed files with 33 additions and 12 deletions

View file

@ -1,7 +1,7 @@
# pyright: strict, reportUnknownMemberType=false
import mimetypes
from pathlib import Path
import magic
import papis.logging
from bs4 import BeautifulSoup
@ -12,7 +12,7 @@ logger = papis.logging.get_logger(__name__)
class PocketBookExtractor:
def can_process(self, filename: Path) -> bool:
if magic.from_file(filename, mime=True) != "text/xml":
if not self._is_html(filename):
return False
content = self._read_file(filename)
@ -28,6 +28,9 @@ class PocketBookExtractor:
logger.debug(f"Found processable annotation file: {filename}")
return True
def _is_html(self, filename: Path) -> bool:
return mimetypes.guess_type(filename)[0] == "text/html"
def run(self, filename: Path) -> list[Annotation]:
"""Extract annotations from pocketbook html file.