feat: Add Readest extractor

This commit is contained in:
Marty Oehme 2025-09-11 21:37:18 +02:00
parent fd71482526
commit 3eb7f3f1c7
Signed by: Marty
GPG key ID: 4E535BC19C61886E
2 changed files with 80 additions and 1 deletions

View file

@ -3,7 +3,7 @@ from importlib.util import find_spec
import papis.logging
from papis_extract.extraction import Extractor
from papis_extract.extractors import pdf, readera
from papis_extract.extractors import pdf, readera, readest
from papis_extract.extractors.pocketbook import PocketBookExtractor
logger = papis.logging.get_logger(__name__)
@ -12,6 +12,7 @@ all_extractors: dict[str, Extractor] = {}
all_extractors["pdf"] = pdf.PdfExtractor()
all_extractors["readera"] = readera.ReadEraExtractor()
all_extractors["readest"] = readest.ReadestExtractor()
if find_spec("bs4") and find_spec("magic"):
all_extractors["pocketbook"] = PocketBookExtractor()

View file

@ -0,0 +1,78 @@
# pyright: strict, reportUnknownMemberType=false
import re
from pathlib import Path
import magic
import papis.logging
from papis_extract.annotation import Annotation
logger = papis.logging.get_logger(__name__)
class ReadestExtractor:
"""Extracts exported annotations from the FOSS Readest book reading app.
https://readest.com/
"""
def can_process(self, filename: Path) -> bool:
if magic.from_file(filename, mime=True) != "text/plain":
return False
content = self._read_file(filename)
if not content:
return False
# look for star-shaped divider pattern
if not re.search(
r"\n\*\*Exported from Readest\*\*: \d{4}-\d{2}-\d{2}\n", "".join(content)
):
return False
logger.debug(f"Found processable annotation file: {filename}")
return True
def run(self, filename: Path) -> list[Annotation]:
"""Extract annotations from readest txt file.
Returns all readable annotations contained in the file passed in, with
highlights and notes if available.
"""
content = self._read_file(filename)[2:]
if not content:
return []
annotations: list[Annotation] = []
for i, line in enumerate(content):
entry_content: str = ""
entry_note: str = ""
if line.startswith("> "):
entry_content = line.lstrip('> "').rstrip('\n" ')
nextline = content[i + 1]
if nextline.startswith("**Note**:: "):
entry_note = nextline.removeprefix("**Note**:: ").strip()
a = Annotation(
file=str(filename),
content=entry_content,
note=entry_note,
# NOTE: Unfortunately Readest currently does not export color information
# color=color,
)
annotations.append(a)
logger.debug(
f"Found {len(annotations)} "
f"{'annotation' if len(annotations) == 1 else 'annotations'} for {filename}."
)
return annotations
def _read_file(self, filename: Path) -> list[str]:
try:
with filename.open("r") as fr:
return fr.readlines()
except FileNotFoundError:
logger.error(f"Could not open file {filename} for extraction.")
return []