Add simple docstrings
This commit is contained in:
parent
1bb3502709
commit
7b27f3291d
1 changed files with 48 additions and 13 deletions
|
@ -1,9 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
# from subprocess import Popen, PIPE, STDOUT
|
|
||||||
# from pipes import quote as shell_quote
|
|
||||||
|
|
||||||
import fitz
|
import fitz
|
||||||
|
|
||||||
from pubs.plugins import PapersPlugin
|
from pubs.plugins import PapersPlugin
|
||||||
|
@ -15,13 +12,16 @@ from pubs.content import check_file, read_text_file, write_file
|
||||||
|
|
||||||
|
|
||||||
class ExtractPlugin(PapersPlugin):
|
class ExtractPlugin(PapersPlugin):
|
||||||
"""Make the pubs repository also a git repository.
|
"""Extract annotations from any pdf document.
|
||||||
|
|
||||||
The git plugin creates a git repository in the pubs directory
|
The extract plugin allows manual or automatic extraction of all annotations
|
||||||
and commit the changes to the pubs repository.
|
contained in the pdf documents belonging to entries of the pubs library.
|
||||||
|
|
||||||
It also add the `pubs git` subcommand, so git commands can be executed
|
It can write those changes to stdout or directly create and update notes
|
||||||
in the git repository from the command line.
|
for the pubs entries.
|
||||||
|
|
||||||
|
It adds a `pubs extract` subcommand through which it is invoked, but can
|
||||||
|
optionally run whenever a new document is imported for a pubs entry.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name = "extract"
|
name = "extract"
|
||||||
|
@ -39,12 +39,9 @@ class ExtractPlugin(PapersPlugin):
|
||||||
# or `:: {annotation} :: {page} ::`
|
# or `:: {annotation} :: {page} ::`
|
||||||
# and so on
|
# and so on
|
||||||
self.onimport = conf["plugins"].get("extract", {}).get("onimport", False)
|
self.onimport = conf["plugins"].get("extract", {}).get("onimport", False)
|
||||||
# self.manual = conf['plugins'].get('git', {}).get('manual', False)
|
|
||||||
# self.force_color = conf['plugins'].get('git', {}).get('force_color', True)
|
|
||||||
# self.list_of_changes = []
|
|
||||||
|
|
||||||
def update_parser(self, subparsers, conf):
|
def update_parser(self, subparsers, conf):
|
||||||
"""Allow the usage of the pubs git subcommand"""
|
"""Allow the usage of the pubs extract subcommand"""
|
||||||
# TODO option for ignoring missing documents or erroring.
|
# TODO option for ignoring missing documents or erroring.
|
||||||
extract_parser = subparsers.add_parser(self.name, help=self.description)
|
extract_parser = subparsers.add_parser(self.name, help=self.description)
|
||||||
extract_parser.add_argument(
|
extract_parser.add_argument(
|
||||||
|
@ -69,7 +66,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
extract_parser.set_defaults(func=self.command)
|
extract_parser.set_defaults(func=self.command)
|
||||||
|
|
||||||
def command(self, conf, args):
|
def command(self, conf, args):
|
||||||
"""Run the annotation extraction"""
|
"""Run the annotation extraction command."""
|
||||||
citekeys = resolve_citekey_list(
|
citekeys = resolve_citekey_list(
|
||||||
self.repository, conf, args.citekeys, ui=self.ui, exit_on_fail=True
|
self.repository, conf, args.citekeys, ui=self.ui, exit_on_fail=True
|
||||||
)
|
)
|
||||||
|
@ -83,6 +80,11 @@ class ExtractPlugin(PapersPlugin):
|
||||||
self.repository.close()
|
self.repository.close()
|
||||||
|
|
||||||
def extract(self, citekeys):
|
def extract(self, citekeys):
|
||||||
|
"""Extracts annotations from citekeys.
|
||||||
|
|
||||||
|
Returns all annotations belonging to the papers that
|
||||||
|
are described by the citekeys passed in.
|
||||||
|
"""
|
||||||
papers = self._gather_papers(citekeys)
|
papers = self._gather_papers(citekeys)
|
||||||
papers_annotated = []
|
papers_annotated = []
|
||||||
for paper in papers:
|
for paper in papers:
|
||||||
|
@ -94,18 +96,35 @@ class ExtractPlugin(PapersPlugin):
|
||||||
return papers_annotated
|
return papers_annotated
|
||||||
|
|
||||||
def _gather_papers(self, citekeys):
|
def _gather_papers(self, citekeys):
|
||||||
|
"""Get all papers for citekeys.
|
||||||
|
|
||||||
|
Returns all Paper objects described by the citekeys
|
||||||
|
passed in.
|
||||||
|
"""
|
||||||
papers = []
|
papers = []
|
||||||
for key in citekeys:
|
for key in citekeys:
|
||||||
papers.append(self.repository.pull_paper(key))
|
papers.append(self.repository.pull_paper(key))
|
||||||
return papers
|
return papers
|
||||||
|
|
||||||
def _get_file(self, paper):
|
def _get_file(self, paper):
|
||||||
|
"""Get path of document belonging to paper.
|
||||||
|
|
||||||
|
Returns the real path to the document which belongs
|
||||||
|
to the paper passed in. Emits a warning if no
|
||||||
|
document belongs to paper.
|
||||||
|
"""
|
||||||
path = self.broker.real_docpath(paper.docpath)
|
path = self.broker.real_docpath(paper.docpath)
|
||||||
if not path:
|
if not path:
|
||||||
self.ui.warning(f"{paper.citekey} has no valid document.")
|
self.ui.warning(f"{paper.citekey} has no valid document.")
|
||||||
return path
|
return path
|
||||||
|
|
||||||
def _get_annotations(self, filename):
|
def _get_annotations(self, filename):
|
||||||
|
"""Extract annotations from a file.
|
||||||
|
|
||||||
|
Returns all readable annotations contained in the file
|
||||||
|
passed in. Only returns Highlight or Text annotations
|
||||||
|
currently.
|
||||||
|
"""
|
||||||
annotations = []
|
annotations = []
|
||||||
with fitz.Document(filename) as doc:
|
with fitz.Document(filename) as doc:
|
||||||
for page in doc:
|
for page in doc:
|
||||||
|
@ -118,6 +137,11 @@ class ExtractPlugin(PapersPlugin):
|
||||||
return annotations
|
return annotations
|
||||||
|
|
||||||
def _to_stdout(self, annotated_papers):
|
def _to_stdout(self, annotated_papers):
|
||||||
|
"""Write annotations to stdout.
|
||||||
|
|
||||||
|
Simply outputs the gathered annotations over stdout
|
||||||
|
ready to be passed on through pipelines etc.
|
||||||
|
"""
|
||||||
output = ""
|
output = ""
|
||||||
for contents in annotated_papers:
|
for contents in annotated_papers:
|
||||||
paper = contents[0]
|
paper = contents[0]
|
||||||
|
@ -130,6 +154,12 @@ class ExtractPlugin(PapersPlugin):
|
||||||
print(output)
|
print(output)
|
||||||
|
|
||||||
def _to_notes(self, annotated_papers, note_extension="txt", edit=False):
|
def _to_notes(self, annotated_papers, note_extension="txt", edit=False):
|
||||||
|
"""Write annotations into pubs notes.
|
||||||
|
|
||||||
|
Permanently writes the given annotations into notes
|
||||||
|
in the pubs notes directory. Creates new notes for
|
||||||
|
citekeys missing a note or appends to existing.
|
||||||
|
"""
|
||||||
for contents in annotated_papers:
|
for contents in annotated_papers:
|
||||||
paper = contents[0]
|
paper = contents[0]
|
||||||
annotations = contents[1]
|
annotations = contents[1]
|
||||||
|
@ -145,6 +175,11 @@ class ExtractPlugin(PapersPlugin):
|
||||||
NoteEvent(paper.citekey).send()
|
NoteEvent(paper.citekey).send()
|
||||||
|
|
||||||
def _write_new_note(self, notepath, annotations):
|
def _write_new_note(self, notepath, annotations):
|
||||||
|
"""Create a new note containing the annotations.
|
||||||
|
|
||||||
|
Will create a new note in the notes folder of pubs
|
||||||
|
and fill it with the annotations extracted from pdf.
|
||||||
|
"""
|
||||||
output = "# Annotations\n\n"
|
output = "# Annotations\n\n"
|
||||||
for annotation in annotations:
|
for annotation in annotations:
|
||||||
output += f"> {annotation}\n\n"
|
output += f"> {annotation}\n\n"
|
||||||
|
|
Loading…
Reference in a new issue