feat: Add advanced pocketbook detection heuristic
Added heuristic which checks for the existence of a specific meta tag written to the pocketbook XHTML file.
This commit is contained in:
parent
6a8f8a03bc
commit
c8e8453b68
1 changed files with 13 additions and 3 deletions
|
@ -12,7 +12,19 @@ logger = papis.logging.get_logger(__name__)
|
|||
|
||||
class PocketBookExtractor:
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
return magic.from_file(filename, mime=True) == "text/xml"
|
||||
content = self._read_file(filename)
|
||||
if not content:
|
||||
return False
|
||||
|
||||
if not magic.from_buffer(content, mime=True) == "text/xml":
|
||||
return False
|
||||
|
||||
html = BeautifulSoup(content, features="xml")
|
||||
if not html.find(
|
||||
"meta", {"name": "generator", "content": "PocketBook Bookmarks Export"}
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from pocketbook html file.
|
||||
|
@ -64,5 +76,3 @@ class PocketBookExtractor:
|
|||
except FileNotFoundError:
|
||||
logger.error(f"Could not open file {filename} for extraction.")
|
||||
return ""
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue