Compare commits
14 commits
Author | SHA1 | Date | |
---|---|---|---|
42e05b848d | |||
e05560a0b8 | |||
676b42eec9 | |||
0b11381069 | |||
db9294d8f2 | |||
dcfd306451 | |||
836079cecb | |||
7684a0370a | |||
dcf367b385 | |||
76737b3467 | |||
becc529626 | |||
2c5d096d08 | |||
0634cbb381 | |||
e1a7d9f613 |
9 changed files with 377 additions and 72 deletions
70
.woodpecker.yml
Normal file
70
.woodpecker.yml
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
branches: main
|
||||||
|
|
||||||
|
pipeline:
|
||||||
|
code_lint:
|
||||||
|
image: python
|
||||||
|
commands:
|
||||||
|
- pip install poetry
|
||||||
|
- poetry install
|
||||||
|
- pip install black
|
||||||
|
- echo "----------------- running lint ------------------"
|
||||||
|
- python --version && poetry --version && black --version
|
||||||
|
- poetry run black .
|
||||||
|
|
||||||
|
build_dist:
|
||||||
|
image: python
|
||||||
|
commands:
|
||||||
|
- pip install poetry
|
||||||
|
- poetry install
|
||||||
|
- echo "----------------- running analysis ------------------"
|
||||||
|
- python --version && poetry --version
|
||||||
|
- poetry build
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
tag: v*
|
||||||
|
|
||||||
|
release_prep:
|
||||||
|
image: python
|
||||||
|
commands:
|
||||||
|
- echo "----------------- preparing release ------------------"
|
||||||
|
- python tools/extract-changelog.py
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
tag: v*
|
||||||
|
|
||||||
|
pypi_release:
|
||||||
|
image: python
|
||||||
|
commands:
|
||||||
|
- pip install poetry
|
||||||
|
- poetry install
|
||||||
|
- echo "----------------- publishing to pypi ------------------"
|
||||||
|
- poetry publish --username "$PYPI_USERNAME" --password "$PYPI_PASSWORD"
|
||||||
|
secrets: [ pypi_username, pypi_password ]
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
tag: v*
|
||||||
|
|
||||||
|
gitea_release:
|
||||||
|
image: plugins/gitea-release
|
||||||
|
settings:
|
||||||
|
api_key:
|
||||||
|
from_secret: gitea_release_token
|
||||||
|
base_url: https://git.martyoeh.me
|
||||||
|
files: dist/*
|
||||||
|
title: NEWEST_VERSION.md
|
||||||
|
note: NEWEST_CHANGES.md
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
tag: v*
|
||||||
|
|
||||||
|
github_release:
|
||||||
|
image: plugins/github-release
|
||||||
|
settings:
|
||||||
|
api_key:
|
||||||
|
from_secret: github_release_token
|
||||||
|
files: dist/*
|
||||||
|
title: NEWEST_VERSION.md
|
||||||
|
note: NEWEST_CHANGES.md
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
tag: v*
|
35
CHANGELOG.md
Normal file
35
CHANGELOG.md
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this project tries to adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
<!--
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
-->
|
||||||
|
|
||||||
|
## [0.2.0] - 2023-01-13
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
* Added pypi release publication
|
||||||
|
* Try not to duplicate extracted annotations in existing notes
|
||||||
|
* Map annotation colors to custom tags in notes
|
||||||
|
* Add querying for publications to command (like list command)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
* Grab annotations even if their content is empty or contains custom text
|
||||||
|
|
||||||
|
## [0.1.0] - 2022-12-25
|
||||||
|
|
||||||
|
* Extract highlights and annotations from a pubs doc file
|
||||||
|
* Optionally run automatically whenever file is added to pubs
|
||||||
|
* Optionally write annotations to pubs note file
|
165
LICENSE
Normal file
165
LICENSE
Normal file
|
@ -0,0 +1,165 @@
|
||||||
|
GNU LESSER GENERAL PUBLIC LICENSE
|
||||||
|
Version 3, 29 June 2007
|
||||||
|
|
||||||
|
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
|
||||||
|
This version of the GNU Lesser General Public License incorporates
|
||||||
|
the terms and conditions of version 3 of the GNU General Public
|
||||||
|
License, supplemented by the additional permissions listed below.
|
||||||
|
|
||||||
|
0. Additional Definitions.
|
||||||
|
|
||||||
|
As used herein, "this License" refers to version 3 of the GNU Lesser
|
||||||
|
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
||||||
|
General Public License.
|
||||||
|
|
||||||
|
"The Library" refers to a covered work governed by this License,
|
||||||
|
other than an Application or a Combined Work as defined below.
|
||||||
|
|
||||||
|
An "Application" is any work that makes use of an interface provided
|
||||||
|
by the Library, but which is not otherwise based on the Library.
|
||||||
|
Defining a subclass of a class defined by the Library is deemed a mode
|
||||||
|
of using an interface provided by the Library.
|
||||||
|
|
||||||
|
A "Combined Work" is a work produced by combining or linking an
|
||||||
|
Application with the Library. The particular version of the Library
|
||||||
|
with which the Combined Work was made is also called the "Linked
|
||||||
|
Version".
|
||||||
|
|
||||||
|
The "Minimal Corresponding Source" for a Combined Work means the
|
||||||
|
Corresponding Source for the Combined Work, excluding any source code
|
||||||
|
for portions of the Combined Work that, considered in isolation, are
|
||||||
|
based on the Application, and not on the Linked Version.
|
||||||
|
|
||||||
|
The "Corresponding Application Code" for a Combined Work means the
|
||||||
|
object code and/or source code for the Application, including any data
|
||||||
|
and utility programs needed for reproducing the Combined Work from the
|
||||||
|
Application, but excluding the System Libraries of the Combined Work.
|
||||||
|
|
||||||
|
1. Exception to Section 3 of the GNU GPL.
|
||||||
|
|
||||||
|
You may convey a covered work under sections 3 and 4 of this License
|
||||||
|
without being bound by section 3 of the GNU GPL.
|
||||||
|
|
||||||
|
2. Conveying Modified Versions.
|
||||||
|
|
||||||
|
If you modify a copy of the Library, and, in your modifications, a
|
||||||
|
facility refers to a function or data to be supplied by an Application
|
||||||
|
that uses the facility (other than as an argument passed when the
|
||||||
|
facility is invoked), then you may convey a copy of the modified
|
||||||
|
version:
|
||||||
|
|
||||||
|
a) under this License, provided that you make a good faith effort to
|
||||||
|
ensure that, in the event an Application does not supply the
|
||||||
|
function or data, the facility still operates, and performs
|
||||||
|
whatever part of its purpose remains meaningful, or
|
||||||
|
|
||||||
|
b) under the GNU GPL, with none of the additional permissions of
|
||||||
|
this License applicable to that copy.
|
||||||
|
|
||||||
|
3. Object Code Incorporating Material from Library Header Files.
|
||||||
|
|
||||||
|
The object code form of an Application may incorporate material from
|
||||||
|
a header file that is part of the Library. You may convey such object
|
||||||
|
code under terms of your choice, provided that, if the incorporated
|
||||||
|
material is not limited to numerical parameters, data structure
|
||||||
|
layouts and accessors, or small macros, inline functions and templates
|
||||||
|
(ten or fewer lines in length), you do both of the following:
|
||||||
|
|
||||||
|
a) Give prominent notice with each copy of the object code that the
|
||||||
|
Library is used in it and that the Library and its use are
|
||||||
|
covered by this License.
|
||||||
|
|
||||||
|
b) Accompany the object code with a copy of the GNU GPL and this license
|
||||||
|
document.
|
||||||
|
|
||||||
|
4. Combined Works.
|
||||||
|
|
||||||
|
You may convey a Combined Work under terms of your choice that,
|
||||||
|
taken together, effectively do not restrict modification of the
|
||||||
|
portions of the Library contained in the Combined Work and reverse
|
||||||
|
engineering for debugging such modifications, if you also do each of
|
||||||
|
the following:
|
||||||
|
|
||||||
|
a) Give prominent notice with each copy of the Combined Work that
|
||||||
|
the Library is used in it and that the Library and its use are
|
||||||
|
covered by this License.
|
||||||
|
|
||||||
|
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
||||||
|
document.
|
||||||
|
|
||||||
|
c) For a Combined Work that displays copyright notices during
|
||||||
|
execution, include the copyright notice for the Library among
|
||||||
|
these notices, as well as a reference directing the user to the
|
||||||
|
copies of the GNU GPL and this license document.
|
||||||
|
|
||||||
|
d) Do one of the following:
|
||||||
|
|
||||||
|
0) Convey the Minimal Corresponding Source under the terms of this
|
||||||
|
License, and the Corresponding Application Code in a form
|
||||||
|
suitable for, and under terms that permit, the user to
|
||||||
|
recombine or relink the Application with a modified version of
|
||||||
|
the Linked Version to produce a modified Combined Work, in the
|
||||||
|
manner specified by section 6 of the GNU GPL for conveying
|
||||||
|
Corresponding Source.
|
||||||
|
|
||||||
|
1) Use a suitable shared library mechanism for linking with the
|
||||||
|
Library. A suitable mechanism is one that (a) uses at run time
|
||||||
|
a copy of the Library already present on the user's computer
|
||||||
|
system, and (b) will operate properly with a modified version
|
||||||
|
of the Library that is interface-compatible with the Linked
|
||||||
|
Version.
|
||||||
|
|
||||||
|
e) Provide Installation Information, but only if you would otherwise
|
||||||
|
be required to provide such information under section 6 of the
|
||||||
|
GNU GPL, and only to the extent that such information is
|
||||||
|
necessary to install and execute a modified version of the
|
||||||
|
Combined Work produced by recombining or relinking the
|
||||||
|
Application with a modified version of the Linked Version. (If
|
||||||
|
you use option 4d0, the Installation Information must accompany
|
||||||
|
the Minimal Corresponding Source and Corresponding Application
|
||||||
|
Code. If you use option 4d1, you must provide the Installation
|
||||||
|
Information in the manner specified by section 6 of the GNU GPL
|
||||||
|
for conveying Corresponding Source.)
|
||||||
|
|
||||||
|
5. Combined Libraries.
|
||||||
|
|
||||||
|
You may place library facilities that are a work based on the
|
||||||
|
Library side by side in a single library together with other library
|
||||||
|
facilities that are not Applications and are not covered by this
|
||||||
|
License, and convey such a combined library under terms of your
|
||||||
|
choice, if you do both of the following:
|
||||||
|
|
||||||
|
a) Accompany the combined library with a copy of the same work based
|
||||||
|
on the Library, uncombined with any other library facilities,
|
||||||
|
conveyed under the terms of this License.
|
||||||
|
|
||||||
|
b) Give prominent notice with the combined library that part of it
|
||||||
|
is a work based on the Library, and explaining where to find the
|
||||||
|
accompanying uncombined form of the same work.
|
||||||
|
|
||||||
|
6. Revised Versions of the GNU Lesser General Public License.
|
||||||
|
|
||||||
|
The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the GNU Lesser General Public License from time to time. Such new
|
||||||
|
versions will be similar in spirit to the present version, but may
|
||||||
|
differ in detail to address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the
|
||||||
|
Library as you received it specifies that a certain numbered version
|
||||||
|
of the GNU Lesser General Public License "or any later version"
|
||||||
|
applies to it, you have the option of following the terms and
|
||||||
|
conditions either of that published version or of any later version
|
||||||
|
published by the Free Software Foundation. If the Library as you
|
||||||
|
received it does not specify a version number of the GNU Lesser
|
||||||
|
General Public License, you may choose any version of the GNU Lesser
|
||||||
|
General Public License ever published by the Free Software Foundation.
|
||||||
|
|
||||||
|
If the Library as you received it specifies that a proxy can decide
|
||||||
|
whether future versions of the GNU Lesser General Public License shall
|
||||||
|
apply, that proxy's public statement of acceptance of any version is
|
||||||
|
permanent authorization for you to choose that version for the
|
||||||
|
Library.
|
103
README.md
103
README.md
|
@ -1,14 +1,30 @@
|
||||||
# pubs-extract
|
# pubs-extract
|
||||||
|
|
||||||
|
[![status-badge](https://ci.martyoeh.me/api/badges/Marty/pubs-extract/status.svg)](https://ci.martyoeh.me/Marty/pubs-extract)
|
||||||
|
![PyPI](https://img.shields.io/pypi/v/pubs-extract)
|
||||||
|
|
||||||
Quickly extract annotations from your pdf files with the help of the pubs bibliography manager.
|
Quickly extract annotations from your pdf files with the help of the pubs bibliography manager.
|
||||||
|
Easily organize your highlights and thoughts next to your documents.
|
||||||
|
|
||||||
## Installation:
|
## Installation:
|
||||||
|
|
||||||
Still a bit painful since I have not set up any package management:
|
You can install from pypi with `pip install pubs-extract`.
|
||||||
|
|
||||||
Put `extract` folder in your pubs `plugs` directory.
|
Or you install manually by moving the `extract` directory into your pubs `plugs` directory,
|
||||||
|
so that the hierarchy is `pubs/plugs/extract/`
|
||||||
|
|
||||||
Then add `extract` to your plugin list in the pubs configuration file.
|
Then add `extract` to your plugin list in the pubs configuration file:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[plugins]
|
||||||
|
active = extract
|
||||||
|
```
|
||||||
|
|
||||||
|
To check if everything is working you can do `pubs --help` which should show you the new extract command.
|
||||||
|
You will be set up with the default options but if you want to change anything, read on in configuration below.
|
||||||
|
|
||||||
|
> **Note**
|
||||||
|
> This plugin is in fairly early development. It does what I need it to do, but if you have a meticulously organized library *please* make backups before doing any operation on your notes, or make use of the pubs-included git plugin.
|
||||||
|
|
||||||
## Configuration:
|
## Configuration:
|
||||||
|
|
||||||
|
@ -110,19 +126,20 @@ the annotations. If a note already exists for any of the entries, it will instea
|
||||||
the annotations to the end of it, dropping all those that it already finds in the note
|
the annotations to the end of it, dropping all those that it already finds in the note
|
||||||
(essentially only adding new annotations to the end).
|
(essentially only adding new annotations to the end).
|
||||||
|
|
||||||
**PLEASE** Be aware that so far, I spent a single afternoon coding this plugin, it
|
**PLEASE** Heed the note above and exercise caution with the `-w` option. It is
|
||||||
contains no tests and operates on your notes. In my use nothing too bad happened but
|
not intended to be destructive, but nevertheless create backups or version
|
||||||
only use it with adequate backup in place, or with your library being version controlled.
|
control your files.
|
||||||
|
|
||||||
You can invoke the command with `-e` to instantly edit the notes:
|
You can invoke the command with `-e` to instantly edit the notes in your editor:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pubs extract -w -e Bayat2015 Peck2004
|
pubs extract -w -e Bayat2015 Peck2004
|
||||||
```
|
```
|
||||||
|
|
||||||
Will create/append annotations and drop you into the Bayat2015 note, when you close it
|
Will create/append annotations and drop you into the Bayat2015 note, when you
|
||||||
directly into the Peck2004 note. Take care that it will be fairly annoying if you use this
|
close it directly into the Peck2004 note. Take care that it will be fairly
|
||||||
option with hundreds of entries being annotated.
|
annoying if you use this option with hundreds of entries being annotated as it
|
||||||
|
will open one entry after another for editing.
|
||||||
|
|
||||||
To extract the annotations for all your existing entries in one go, you can use:
|
To extract the annotations for all your existing entries in one go, you can use:
|
||||||
|
|
||||||
|
@ -141,7 +158,7 @@ Pull requests tackling one of these areas of course very welcome.
|
||||||
|
|
||||||
## Issues
|
## Issues
|
||||||
|
|
||||||
A note on the extraction. Highlights in pdfs are somewhat difficult to parse
|
A note on the extraction: Highlights in pdfs can be somewhat difficult to parse
|
||||||
(as are most things in them). Sometimes they contain the selected text that is written on the
|
(as are most things in them). Sometimes they contain the selected text that is written on the
|
||||||
page, sometimes they contain the annotators thoughts as a note, sometimes they contain nothing.
|
page, sometimes they contain the annotators thoughts as a note, sometimes they contain nothing.
|
||||||
This plugin makes an effort to find the right combination and extract the written words,
|
This plugin makes an effort to find the right combination and extract the written words,
|
||||||
|
@ -149,60 +166,18 @@ as well as any additional notes made - but things *will* slip through or extract
|
||||||
and again.
|
and again.
|
||||||
|
|
||||||
The easiest extraction is provided if your program writes the selection itself into the highlight
|
The easiest extraction is provided if your program writes the selection itself into the highlight
|
||||||
content, because then we can just use that. It is harder to parse if it does not.
|
content, because then we can just use that. It is harder to parse if it does not and will sometimes
|
||||||
|
get additional words in front or behind (especially if the highlight ends in the middle of a line)
|
||||||
|
or even cut a few off.
|
||||||
|
|
||||||
## Roadmap:
|
I am not sure if there is much I can do about this.
|
||||||
|
|
||||||
- [x] extracts highlights and annotations from a doc file (e.g. using PyMuPDF)
|
---
|
||||||
- [x] puts those in the annotation file of a doc in a customizable format
|
|
||||||
- [x] option to have it automatically run after a file is added?
|
|
||||||
- option to have it run whenever a pdf in the library was updated?
|
|
||||||
- [ ] needs some way to delimit where it puts stuff and user stuff is in note
|
|
||||||
- [ ] one way is to have it look at `> [17] here be extracted annotation from page seventeen` annotations and put it in between
|
|
||||||
- [x] another, probably simpler first, is to just append missing annotations to the end of the note
|
|
||||||
- [ ] use similarity search instead of literal search for existing annotation (levenshtein)
|
|
||||||
- [x] some highlights (or annotations in general) do not contain text as content
|
|
||||||
- [x] pymupdf can extract the content of the underlying rectangle (mostly)
|
|
||||||
- [x] issue is that sometimes the highlight contents are in content, sometimes a user comment instead
|
|
||||||
- [x] we could have a comparison function which estimates how 'close' the two text snippets are and act accordingly -> using levenshtein distance
|
|
||||||
- [x] config option to map colors in annotations to meaning ('read', 'important', 'extra') in pubs
|
|
||||||
- [x] colors are given in very exact 0.6509979 RGB values, meaning we could once again estimate if a color is 'close enough' in distance to tag it accordingly -> using euclidian distance
|
|
||||||
- [ ] support custom colors by setting a float tuple in configuration
|
|
||||||
- [x] make invoking the command run a query if corresponding option provided (or whatever) in pubs syntax and use resulting papers
|
|
||||||
- [x] confirm for many papers?
|
|
||||||
- [ ] warning when the amount of annotations in file is different than the amount extracted?
|
|
||||||
- [ ] tests tests tests tests tests, lah-di-dah
|
|
||||||
|
|
||||||
## Things that would also be nice in pubs in general and don't really belong in this repository
|
If you spot a bug or have an idea feel free to open an issue.\
|
||||||
|
I might be slow to respond but will consider them all!
|
||||||
|
|
||||||
- `show` command which simply displays given entry in a nice way
|
Pull requests are warmly welcomed.\
|
||||||
- could take multiple entries but present them all in the same larger way
|
If they are big changes or additions let's talk about them in an issue first.
|
||||||
- a metadata command which shows the metadata connected to an entry (e.g. `show --meta`)
|
|
||||||
- XDG compliance
|
Thanks for using my software ❤️
|
||||||
- a way to insert env vars into the configuration paths
|
|
||||||
- looking in XDG_CONFIG_HOME and XDG_DATA_HOME by default
|
|
||||||
- accepting env vars for overriding the directories
|
|
||||||
- isbn import re-enabled with -> `api.paperpile.com/api/public/convert`
|
|
||||||
- example request: `curl -X POST -d '{"fromIds":true,"input":"9780816530441","targetFormat":"Bibtex"}' -H "Content-Type: application/json" https://api.paperpile.com/api/public/convert`
|
|
||||||
- example reponse: `{"output":"@BOOK{Igoe2017-cu,\n title = \"The nature of spectacle\",\n author = \"Igoe, James\",\n publisher = \"University of Arizona Press\",\n series = \"Critical Green Engagements: Investigating the Green Economy and\n its Alternatives\",\n month = jun,\n year = 2017,\n address = \"Tucson, AZ\",\n language = \"en\"\n}\n","token":"3ca6b666-2b9d-4962-8017-a0c8f1f86bfd","tags":[],"withErrors":false}`
|
|
||||||
- side-by-side command to open annotation file and document at the same time
|
|
||||||
- fzf-mode/bemenu mode to look through documents
|
|
||||||
- batch-edit? a way to quickly modify items matching a query, e.g. removing file entry for all those from year:2022 or whatever
|
|
||||||
- link related items
|
|
||||||
- a special tag?
|
|
||||||
- building relationships: two-way (related, e.g. same working paper), or single direction, e.g. a re-print, a compendium, etc
|
|
||||||
- should still always be traceable from both sides
|
|
||||||
- automatically keeping a main bibtex file up-to-date
|
|
||||||
- can be done through the `export` command, e.g. as a git hook when the repo is updated
|
|
||||||
- better git commit names for git plugin
|
|
||||||
- more direct linking to individual annotations
|
|
||||||
- e.g. you have an annotation on page 17, allow opening that page from there and vice versa
|
|
||||||
- can use e.g. existing markdown quote pattern:
|
|
||||||
> [17] To be or not to be blabla
|
|
||||||
which would then open page 17 in the document
|
|
||||||
- makes most sense as plugin probably (which also allows setting the pattern by which it finds citations in the notes)
|
|
||||||
- fuzzy matching
|
|
||||||
- either by default, as a config setting or with the ~prefix
|
|
||||||
- why are we doing tags in metadata not in the bibtex files?
|
|
||||||
- default replacement bibkey for files which are missing part of what makes it up
|
|
||||||
- e.g. if you use {authorname}{year} as bibkey, a file missing author would substitute with this
|
|
||||||
|
|
|
@ -87,7 +87,8 @@ class Annotation:
|
||||||
def colorname(self):
|
def colorname(self):
|
||||||
"""Return the stringified version of the annotation color.
|
"""Return the stringified version of the annotation color.
|
||||||
|
|
||||||
Finds the closest named color to the annotation and returns it.
|
Finds the closest named color to the annotation and returns it,
|
||||||
|
using euclidian distance between the two color vectors.
|
||||||
"""
|
"""
|
||||||
annot_colors = (
|
annot_colors = (
|
||||||
self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0)
|
self.colors.get("stroke") or self.colors.get("fill") or (0.0, 0.0, 0.0)
|
|
@ -225,7 +225,8 @@ class ExtractPlugin(PapersPlugin):
|
||||||
that is only the written words, sometimes that is only
|
that is only the written words, sometimes that is only
|
||||||
annotation notes, sometimes it is both. Runs a similarity
|
annotation notes, sometimes it is both. Runs a similarity
|
||||||
comparison between strings to find out whether they
|
comparison between strings to find out whether they
|
||||||
should both be included or are doubling up.
|
should both be included or are doubling up, using
|
||||||
|
Levenshtein distance.
|
||||||
"""
|
"""
|
||||||
content = annotation.info["content"].replace("\n", " ")
|
content = annotation.info["content"].replace("\n", " ")
|
||||||
written = page.get_textbox(annotation.rect).replace("\n", " ")
|
written = page.get_textbox(annotation.rect).replace("\n", " ")
|
||||||
|
@ -250,9 +251,7 @@ class ExtractPlugin(PapersPlugin):
|
||||||
"""
|
"""
|
||||||
output = ""
|
output = ""
|
||||||
for paper in annotated_papers:
|
for paper in annotated_papers:
|
||||||
output += (
|
output += f"\n------ {paper.headline(self.short_header, self.max_authors)} ------\n\n"
|
||||||
f"\n------ {paper.headline(self.short_header, self.max_authors)} ------\n\n"
|
|
||||||
)
|
|
||||||
for annotation in paper.annotations:
|
for annotation in paper.annotations:
|
||||||
output += f"{annotation.format(self.formatting)}\n"
|
output += f"{annotation.format(self.formatting)}\n"
|
||||||
output += "\n"
|
output += "\n"
|
||||||
|
@ -271,7 +270,11 @@ class ExtractPlugin(PapersPlugin):
|
||||||
if check_file(notepath, fail=False):
|
if check_file(notepath, fail=False):
|
||||||
self._append_to_note(notepath, paper)
|
self._append_to_note(notepath, paper)
|
||||||
else:
|
else:
|
||||||
self._write_new_note(notepath, paper, paper.headline(short=True, max_authors=self.max_authors))
|
self._write_new_note(
|
||||||
|
notepath,
|
||||||
|
paper,
|
||||||
|
paper.headline(short=True, max_authors=self.max_authors),
|
||||||
|
)
|
||||||
self.ui.info(f"Wrote annotations to {paper.citekey} note {notepath}.")
|
self.ui.info(f"Wrote annotations to {paper.citekey} note {notepath}.")
|
||||||
|
|
||||||
if edit is True:
|
if edit is True:
|
|
@ -1,11 +1,11 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "pubs-extract"
|
name = "pubs-extract"
|
||||||
version = "0.1.0"
|
version = "0.2.0"
|
||||||
description = "A pdf annotation extraction plugin for pubs bibliography manager"
|
description = "A pdf annotation extraction plugin for pubs bibliography manager"
|
||||||
authors = ["Marty Oehme <marty.oehme@gmail.com>"]
|
authors = ["Marty Oehme <marty.oehme@gmail.com>"]
|
||||||
license = "LGPL-3.0"
|
license = "LGPL-3.0"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
packages = [{include = "extract"}]
|
packages = [{include = "pubs"}]
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
|
|
56
tools/extract-changelog.py
Normal file
56
tools/extract-changelog.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
## Extracts the version and newest changes from a semantic changelog.
|
||||||
|
#
|
||||||
|
# Important, it only works with three-parted version numbers
|
||||||
|
# a-la 1.2.3 or 313.01.1888 -- needs \d.\d.\d to work.
|
||||||
|
#
|
||||||
|
# The version number and changeset will be put in `NEWEST_VERSION.md`
|
||||||
|
# and `NEWEST_CHANGES.md` respectively, for further use in releases.
|
||||||
|
OUTPUT_FILE_VERSION = "NEWEST_VERSION.md"
|
||||||
|
OUTPUT_FILE_CHANGES = "NEWEST_CHANGES.md"
|
||||||
|
|
||||||
|
|
||||||
|
def getVersion(file):
|
||||||
|
for line in file:
|
||||||
|
m = re.match(r"^## \[(\d+\.\d+\.\d+)\]", line)
|
||||||
|
if m and m.group(1):
|
||||||
|
return m.group(1)
|
||||||
|
|
||||||
|
|
||||||
|
def getSection(file):
|
||||||
|
inRecordingMode = False
|
||||||
|
for line in file:
|
||||||
|
if not inRecordingMode:
|
||||||
|
if re.match(r"^## \[\d+\.\d+\.\d+\]", line):
|
||||||
|
inRecordingMode = True
|
||||||
|
elif re.match(r"^## \[\d+\.\d+\.\d+\]", line):
|
||||||
|
inRecordingMode = False
|
||||||
|
break
|
||||||
|
elif re.match(r"^$", line):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
yield line
|
||||||
|
|
||||||
|
|
||||||
|
def toFile(fname, content):
|
||||||
|
file = open(fname, "w")
|
||||||
|
file.write(content)
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
with open("CHANGELOG.md") as file:
|
||||||
|
title = getVersion(file)
|
||||||
|
print(title)
|
||||||
|
toFile(OUTPUT_FILE_VERSION, title)
|
||||||
|
|
||||||
|
with open("CHANGELOG.md") as file:
|
||||||
|
newest_changes_gen = getSection(file)
|
||||||
|
newest_changes = ""
|
||||||
|
for line in newest_changes_gen:
|
||||||
|
newest_changes += line
|
||||||
|
print("[Extracted Changelog]")
|
||||||
|
print(newest_changes)
|
||||||
|
toFile(OUTPUT_FILE_CHANGES, newest_changes)
|
||||||
|
|
||||||
|
file.close()
|
Loading…
Reference in a new issue