feat: Add eof heuristic for readera extractor

Every exported ReadEra annotation file also _ends_ with the ubiquitous
`*****` pattern, so we look for that to detect the file.
This commit is contained in:
Marty Oehme 2025-09-11 10:14:10 +02:00
parent 3ef45e24f7
commit 5f01aa1f2b
Signed by: Marty
GPG key ID: 4E535BC19C61886E
2 changed files with 32 additions and 2 deletions

View file

@ -24,11 +24,16 @@ class ReadEraExtractor:
if not content:
return False
# look for title and author lines up top
if not content[0] or not content[1]:
return False
patt = re.compile(r"\n\*\*\*\*\*\n")
if not patt.search("".join(content)):
# look for star-shaped divider pattern
if not re.search(r"\n\*\*\*\*\*\n", "".join(content)):
return False
# look for star-shaped pattern at end of file
if not re.search(r"\n\*\*\*\*\*\n\n$", "".join(content)):
return False
logger.debug(