feat: Add advanced pocketbook detection heuristic
Added heuristic which checks for the existence of a specific meta tag written to the pocketbook XHTML file.
This commit is contained in:
parent
6a8f8a03bc
commit
c8e8453b68
1 changed files with 13 additions and 3 deletions
|
@ -12,7 +12,19 @@ logger = papis.logging.get_logger(__name__)
|
||||||
|
|
||||||
class PocketBookExtractor:
|
class PocketBookExtractor:
|
||||||
def can_process(self, filename: Path) -> bool:
|
def can_process(self, filename: Path) -> bool:
|
||||||
return magic.from_file(filename, mime=True) == "text/xml"
|
content = self._read_file(filename)
|
||||||
|
if not content:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not magic.from_buffer(content, mime=True) == "text/xml":
|
||||||
|
return False
|
||||||
|
|
||||||
|
html = BeautifulSoup(content, features="xml")
|
||||||
|
if not html.find(
|
||||||
|
"meta", {"name": "generator", "content": "PocketBook Bookmarks Export"}
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def run(self, filename: Path) -> list[Annotation]:
|
def run(self, filename: Path) -> list[Annotation]:
|
||||||
"""Extract annotations from pocketbook html file.
|
"""Extract annotations from pocketbook html file.
|
||||||
|
@ -64,5 +76,3 @@ class PocketBookExtractor:
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.error(f"Could not open file {filename} for extraction.")
|
logger.error(f"Could not open file {filename} for extraction.")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue