fix: Fixed pocketbook extractor trying to read all files

The complete read routine would work before figuring out that it is a file of xml mimetype. This means that it would try to read to memory any file as the first thing, pdfs, even binaries. Of course doing so crashed the program.
2024-01-25 21:40:31 +01:00 · 2024-01-25 21:40:31 +01:00 · 163fd63038
commit 163fd63038
parent 72ddaaf1bc
1 changed files with 3 additions and 3 deletions
--- a/papis_extract/extractors/pocketbook.py
+++ b/papis_extract/extractors/pocketbook.py
@ -12,11 +12,11 @@ logger = papis.logging.get_logger(__name__)

 class PocketBookExtractor:
    def can_process(self, filename: Path) -> bool:
-        content = self._read_file(filename)
-        if not content:
+        if not magic.from_file(filename, mime=True) == "text/xml":
            return False

-        if not magic.from_buffer(content, mime=True) == "text/xml":
+        content = self._read_file(filename)
+        if not content:
            return False

        html = BeautifulSoup(content, features="xml")