feat: Add Readest extractor
This commit is contained in:
parent
fd71482526
commit
3eb7f3f1c7
2 changed files with 80 additions and 1 deletions
|
|
@ -3,7 +3,7 @@ from importlib.util import find_spec
|
||||||
import papis.logging
|
import papis.logging
|
||||||
|
|
||||||
from papis_extract.extraction import Extractor
|
from papis_extract.extraction import Extractor
|
||||||
from papis_extract.extractors import pdf, readera
|
from papis_extract.extractors import pdf, readera, readest
|
||||||
from papis_extract.extractors.pocketbook import PocketBookExtractor
|
from papis_extract.extractors.pocketbook import PocketBookExtractor
|
||||||
|
|
||||||
logger = papis.logging.get_logger(__name__)
|
logger = papis.logging.get_logger(__name__)
|
||||||
|
|
@ -12,6 +12,7 @@ all_extractors: dict[str, Extractor] = {}
|
||||||
|
|
||||||
all_extractors["pdf"] = pdf.PdfExtractor()
|
all_extractors["pdf"] = pdf.PdfExtractor()
|
||||||
all_extractors["readera"] = readera.ReadEraExtractor()
|
all_extractors["readera"] = readera.ReadEraExtractor()
|
||||||
|
all_extractors["readest"] = readest.ReadestExtractor()
|
||||||
|
|
||||||
if find_spec("bs4") and find_spec("magic"):
|
if find_spec("bs4") and find_spec("magic"):
|
||||||
all_extractors["pocketbook"] = PocketBookExtractor()
|
all_extractors["pocketbook"] = PocketBookExtractor()
|
||||||
|
|
|
||||||
78
papis_extract/extractors/readest.py
Normal file
78
papis_extract/extractors/readest.py
Normal file
|
|
@ -0,0 +1,78 @@
|
||||||
|
# pyright: strict, reportUnknownMemberType=false
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import magic
|
||||||
|
import papis.logging
|
||||||
|
|
||||||
|
from papis_extract.annotation import Annotation
|
||||||
|
|
||||||
|
logger = papis.logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ReadestExtractor:
|
||||||
|
"""Extracts exported annotations from the FOSS Readest book reading app.
|
||||||
|
|
||||||
|
https://readest.com/
|
||||||
|
"""
|
||||||
|
|
||||||
|
def can_process(self, filename: Path) -> bool:
|
||||||
|
if magic.from_file(filename, mime=True) != "text/plain":
|
||||||
|
return False
|
||||||
|
|
||||||
|
content = self._read_file(filename)
|
||||||
|
if not content:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# look for star-shaped divider pattern
|
||||||
|
if not re.search(
|
||||||
|
r"\n\*\*Exported from Readest\*\*: \d{4}-\d{2}-\d{2}\n", "".join(content)
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.debug(f"Found processable annotation file: {filename}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def run(self, filename: Path) -> list[Annotation]:
|
||||||
|
"""Extract annotations from readest txt file.
|
||||||
|
|
||||||
|
Returns all readable annotations contained in the file passed in, with
|
||||||
|
highlights and notes if available.
|
||||||
|
"""
|
||||||
|
content = self._read_file(filename)[2:]
|
||||||
|
if not content:
|
||||||
|
return []
|
||||||
|
|
||||||
|
annotations: list[Annotation] = []
|
||||||
|
|
||||||
|
for i, line in enumerate(content):
|
||||||
|
entry_content: str = ""
|
||||||
|
entry_note: str = ""
|
||||||
|
if line.startswith("> "):
|
||||||
|
entry_content = line.lstrip('> "').rstrip('\n" ')
|
||||||
|
nextline = content[i + 1]
|
||||||
|
if nextline.startswith("**Note**:: "):
|
||||||
|
entry_note = nextline.removeprefix("**Note**:: ").strip()
|
||||||
|
|
||||||
|
a = Annotation(
|
||||||
|
file=str(filename),
|
||||||
|
content=entry_content,
|
||||||
|
note=entry_note,
|
||||||
|
# NOTE: Unfortunately Readest currently does not export color information
|
||||||
|
# color=color,
|
||||||
|
)
|
||||||
|
annotations.append(a)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Found {len(annotations)} "
|
||||||
|
f"{'annotation' if len(annotations) == 1 else 'annotations'} for {filename}."
|
||||||
|
)
|
||||||
|
return annotations
|
||||||
|
|
||||||
|
def _read_file(self, filename: Path) -> list[str]:
|
||||||
|
try:
|
||||||
|
with filename.open("r") as fr:
|
||||||
|
return fr.readlines()
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.error(f"Could not open file {filename} for extraction.")
|
||||||
|
return []
|
||||||
Loading…
Add table
Add a link
Reference in a new issue