Format file
This commit is contained in:
parent
8f01b93de2
commit
e3aacc4b15
1 changed files with 17 additions and 7 deletions
|
@ -40,9 +40,15 @@ class ExtractPlugin(PapersPlugin):
|
||||||
# or `:: {annotation} :: {page} ::`
|
# or `:: {annotation} :: {page} ::`
|
||||||
# and so on
|
# and so on
|
||||||
self.on_import = conf["plugins"].get("extract", {}).get("on_import", False)
|
self.on_import = conf["plugins"].get("extract", {}).get("on_import", False)
|
||||||
self.minimum_similarity = float(conf["plugins"].get("extract", {}).get("minimum_similarity", 0.75))
|
self.minimum_similarity = float(
|
||||||
self.highlight_prefix = conf["plugins"].get("extract", {}).get("quote_prefix", "> ")
|
conf["plugins"].get("extract", {}).get("minimum_similarity", 0.75)
|
||||||
self.note_prefix = conf["plugins"].get("extract", {}).get("note_prefix", "Note: ")
|
)
|
||||||
|
self.highlight_prefix = (
|
||||||
|
conf["plugins"].get("extract", {}).get("quote_prefix", "> ")
|
||||||
|
)
|
||||||
|
self.note_prefix = (
|
||||||
|
conf["plugins"].get("extract", {}).get("note_prefix", "Note: ")
|
||||||
|
)
|
||||||
|
|
||||||
def update_parser(self, subparsers, conf):
|
def update_parser(self, subparsers, conf):
|
||||||
"""Allow the usage of the pubs extract subcommand"""
|
"""Allow the usage of the pubs extract subcommand"""
|
||||||
|
@ -133,12 +139,16 @@ class ExtractPlugin(PapersPlugin):
|
||||||
with fitz.Document(filename) as doc:
|
with fitz.Document(filename) as doc:
|
||||||
for page in doc:
|
for page in doc:
|
||||||
for annot in page.annots():
|
for annot in page.annots():
|
||||||
content = self._retrieve_annotation_content(page, annot, self.highlight_prefix, self.note_prefix)
|
content = self._retrieve_annotation_content(
|
||||||
|
page, annot, self.highlight_prefix, self.note_prefix
|
||||||
|
)
|
||||||
if content:
|
if content:
|
||||||
annotations.append(f"[{(page.number or 0) + 1}] {content}")
|
annotations.append(f"[{(page.number or 0) + 1}] {content}")
|
||||||
return annotations
|
return annotations
|
||||||
|
|
||||||
def _retrieve_annotation_content(self, page, annotation, highlight_prefix = "> ", note_prefix = "Note: "):
|
def _retrieve_annotation_content(
|
||||||
|
self, page, annotation, highlight_prefix="> ", note_prefix="Note: "
|
||||||
|
):
|
||||||
"""Gets the text content of an annotation.
|
"""Gets the text content of an annotation.
|
||||||
|
|
||||||
Returns the actual content of an annotation. Sometimes
|
Returns the actual content of an annotation. Sometimes
|
||||||
|
@ -151,7 +161,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
written = page.get_textbox(annotation.rect).replace("\n", " ")
|
written = page.get_textbox(annotation.rect).replace("\n", " ")
|
||||||
|
|
||||||
# highlight with selection in note
|
# highlight with selection in note
|
||||||
if Levenshtein.ratio(content,written) > self.minimum_similarity:
|
if Levenshtein.ratio(content, written) > self.minimum_similarity:
|
||||||
return f"{highlight_prefix}{content}"
|
return f"{highlight_prefix}{content}"
|
||||||
# an independent note, not a highlight
|
# an independent note, not a highlight
|
||||||
elif content and not written:
|
elif content and not written:
|
||||||
|
@ -175,7 +185,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
if annotations:
|
if annotations:
|
||||||
output += f"------ {paper.citekey} ------\n\n"
|
output += f"------ {paper.citekey} ------\n\n"
|
||||||
for annot in annotations:
|
for annot in annotations:
|
||||||
output += f'{annot}\n\n'
|
output += f"{annot}\n\n"
|
||||||
output += "\n"
|
output += "\n"
|
||||||
print(output)
|
print(output)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue