Add simple docstrings

This commit is contained in:
Marty Oehme 2022-12-22 21:02:01 +01:00
parent 1bb3502709
commit 7b27f3291d
Signed by: Marty
GPG Key ID: 73BA40D5AFAF49C9
1 changed files with 48 additions and 13 deletions

View File

@ -1,9 +1,6 @@
import os import os
import argparse import argparse
# from subprocess import Popen, PIPE, STDOUT
# from pipes import quote as shell_quote
import fitz import fitz
from pubs.plugins import PapersPlugin from pubs.plugins import PapersPlugin
@ -15,13 +12,16 @@ from pubs.content import check_file, read_text_file, write_file
class ExtractPlugin(PapersPlugin): class ExtractPlugin(PapersPlugin):
"""Make the pubs repository also a git repository. """Extract annotations from any pdf document.
The git plugin creates a git repository in the pubs directory The extract plugin allows manual or automatic extraction of all annotations
and commit the changes to the pubs repository. contained in the pdf documents belonging to entries of the pubs library.
It also add the `pubs git` subcommand, so git commands can be executed It can write those changes to stdout or directly create and update notes
in the git repository from the command line. for the pubs entries.
It adds a `pubs extract` subcommand through which it is invoked, but can
optionally run whenever a new document is imported for a pubs entry.
""" """
name = "extract" name = "extract"
@ -39,12 +39,9 @@ class ExtractPlugin(PapersPlugin):
# or `:: {annotation} :: {page} ::` # or `:: {annotation} :: {page} ::`
# and so on # and so on
self.onimport = conf["plugins"].get("extract", {}).get("onimport", False) self.onimport = conf["plugins"].get("extract", {}).get("onimport", False)
# self.manual = conf['plugins'].get('git', {}).get('manual', False)
# self.force_color = conf['plugins'].get('git', {}).get('force_color', True)
# self.list_of_changes = []
def update_parser(self, subparsers, conf): def update_parser(self, subparsers, conf):
"""Allow the usage of the pubs git subcommand""" """Allow the usage of the pubs extract subcommand"""
# TODO option for ignoring missing documents or erroring. # TODO option for ignoring missing documents or erroring.
extract_parser = subparsers.add_parser(self.name, help=self.description) extract_parser = subparsers.add_parser(self.name, help=self.description)
extract_parser.add_argument( extract_parser.add_argument(
@ -69,7 +66,7 @@ class ExtractPlugin(PapersPlugin):
extract_parser.set_defaults(func=self.command) extract_parser.set_defaults(func=self.command)
def command(self, conf, args): def command(self, conf, args):
"""Run the annotation extraction""" """Run the annotation extraction command."""
citekeys = resolve_citekey_list( citekeys = resolve_citekey_list(
self.repository, conf, args.citekeys, ui=self.ui, exit_on_fail=True self.repository, conf, args.citekeys, ui=self.ui, exit_on_fail=True
) )
@ -83,6 +80,11 @@ class ExtractPlugin(PapersPlugin):
self.repository.close() self.repository.close()
def extract(self, citekeys): def extract(self, citekeys):
"""Extracts annotations from citekeys.
Returns all annotations belonging to the papers that
are described by the citekeys passed in.
"""
papers = self._gather_papers(citekeys) papers = self._gather_papers(citekeys)
papers_annotated = [] papers_annotated = []
for paper in papers: for paper in papers:
@ -94,18 +96,35 @@ class ExtractPlugin(PapersPlugin):
return papers_annotated return papers_annotated
def _gather_papers(self, citekeys): def _gather_papers(self, citekeys):
"""Get all papers for citekeys.
Returns all Paper objects described by the citekeys
passed in.
"""
papers = [] papers = []
for key in citekeys: for key in citekeys:
papers.append(self.repository.pull_paper(key)) papers.append(self.repository.pull_paper(key))
return papers return papers
def _get_file(self, paper): def _get_file(self, paper):
"""Get path of document belonging to paper.
Returns the real path to the document which belongs
to the paper passed in. Emits a warning if no
document belongs to paper.
"""
path = self.broker.real_docpath(paper.docpath) path = self.broker.real_docpath(paper.docpath)
if not path: if not path:
self.ui.warning(f"{paper.citekey} has no valid document.") self.ui.warning(f"{paper.citekey} has no valid document.")
return path return path
def _get_annotations(self, filename): def _get_annotations(self, filename):
"""Extract annotations from a file.
Returns all readable annotations contained in the file
passed in. Only returns Highlight or Text annotations
currently.
"""
annotations = [] annotations = []
with fitz.Document(filename) as doc: with fitz.Document(filename) as doc:
for page in doc: for page in doc:
@ -118,6 +137,11 @@ class ExtractPlugin(PapersPlugin):
return annotations return annotations
def _to_stdout(self, annotated_papers): def _to_stdout(self, annotated_papers):
"""Write annotations to stdout.
Simply outputs the gathered annotations over stdout
ready to be passed on through pipelines etc.
"""
output = "" output = ""
for contents in annotated_papers: for contents in annotated_papers:
paper = contents[0] paper = contents[0]
@ -130,6 +154,12 @@ class ExtractPlugin(PapersPlugin):
print(output) print(output)
def _to_notes(self, annotated_papers, note_extension="txt", edit=False): def _to_notes(self, annotated_papers, note_extension="txt", edit=False):
"""Write annotations into pubs notes.
Permanently writes the given annotations into notes
in the pubs notes directory. Creates new notes for
citekeys missing a note or appends to existing.
"""
for contents in annotated_papers: for contents in annotated_papers:
paper = contents[0] paper = contents[0]
annotations = contents[1] annotations = contents[1]
@ -145,6 +175,11 @@ class ExtractPlugin(PapersPlugin):
NoteEvent(paper.citekey).send() NoteEvent(paper.citekey).send()
def _write_new_note(self, notepath, annotations): def _write_new_note(self, notepath, annotations):
"""Create a new note containing the annotations.
Will create a new note in the notes folder of pubs
and fill it with the annotations extracted from pdf.
"""
output = "# Annotations\n\n" output = "# Annotations\n\n"
for annotation in annotations: for annotation in annotations:
output += f"> {annotation}\n\n" output += f"> {annotation}\n\n"