feat: Add eof heuristic for readera extractor
Every exported ReadEra annotation file also _ends_ with the ubiquitous `*****` pattern, so we look for that to detect the file.
This commit is contained in:
parent
3ef45e24f7
commit
5f01aa1f2b
2 changed files with 32 additions and 2 deletions
|
|
@ -24,11 +24,16 @@ class ReadEraExtractor:
|
|||
if not content:
|
||||
return False
|
||||
|
||||
# look for title and author lines up top
|
||||
if not content[0] or not content[1]:
|
||||
return False
|
||||
|
||||
patt = re.compile(r"\n\*\*\*\*\*\n")
|
||||
if not patt.search("".join(content)):
|
||||
# look for star-shaped divider pattern
|
||||
if not re.search(r"\n\*\*\*\*\*\n", "".join(content)):
|
||||
return False
|
||||
|
||||
# look for star-shaped pattern at end of file
|
||||
if not re.search(r"\n\*\*\*\*\*\n\n$", "".join(content)):
|
||||
return False
|
||||
|
||||
logger.debug(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue