Add CI
Added static linting, type checking for each commit as well as pytest testing for each master push. Add linting and static analysis CI Use deprecated pipeline key for server woodpecker version Format black Install poetry environment before pyright Add pytest CI Update dependencies, Fix black Ensure tests run on master branch
This commit is contained in:
parent
c6b95a4742
commit
a4f48bb24e
8 changed files with 157 additions and 35 deletions
16
.woodpecker/lint.yml
Normal file
16
.woodpecker/lint.yml
Normal file
|
@ -0,0 +1,16 @@
|
|||
pipeline:
|
||||
lint_ruff:
|
||||
image: python
|
||||
commands:
|
||||
- pip install ruff
|
||||
- python --version && poetry --version && ruff --version
|
||||
- echo "----------------- running ruff lint ------------------"
|
||||
- ruff check .
|
||||
|
||||
lint_black:
|
||||
image: python
|
||||
commands:
|
||||
- pip install black
|
||||
- python --version && poetry --version && black --version
|
||||
- echo "----------------- running black lint ----------------"
|
||||
- black --check .
|
9
.woodpecker/static_analysis.yml
Normal file
9
.woodpecker/static_analysis.yml
Normal file
|
@ -0,0 +1,9 @@
|
|||
pipeline:
|
||||
pyright:
|
||||
image: ghcr.io/withlogicco/poetry:1.5.1
|
||||
commands:
|
||||
- pip install pyright
|
||||
- poetry install
|
||||
- python --version && poetry --version && pyright --version
|
||||
- echo "------------- running pyright typecheck -------------"
|
||||
- poetry run pyright
|
10
.woodpecker/test.yml
Normal file
10
.woodpecker/test.yml
Normal file
|
@ -0,0 +1,10 @@
|
|||
branches: master
|
||||
|
||||
pipeline:
|
||||
pytest:
|
||||
image: ghcr.io/withlogicco/poetry:1.5.1
|
||||
commands:
|
||||
- poetry install
|
||||
- python --version && poetry --version
|
||||
- echo "------------- running pytest -------------"
|
||||
- poetry run pytest
|
21
README.md
21
README.md
|
@ -173,12 +173,22 @@ Known issues to be fixed:
|
|||
- [x] if both content and text are empty, do not extract an annotation
|
||||
- [x] Speed?
|
||||
- should be fine, on my machine (old i5 laptop) it takes around 90s for ~1000 documents with ~4000 annotations
|
||||
- [ ] ensure all cmdline options do what they should
|
||||
- [x] ensure all cmdline options do what they should
|
||||
- [ ] annotations carry over color object from fitz, should just be Color object or simple tuple with rgb vals
|
||||
- [ ] docstrings, docstrings!
|
||||
- [ ] testing testing testing!!
|
||||
- [ ] refactor into some better abstractions (e.g. Exporter Protocol -> stdout/markdown implementations; Extractor Protocol -> PDF implementation)
|
||||
|
||||
features to be implemented:
|
||||
|
||||
- [ ] CICD
|
||||
- [x] static analysis (lint, typecheck etc) on pushes
|
||||
- [ ] test pipeline on master pushes
|
||||
- [ ] release pipeline to pypi on tags
|
||||
- [ ] on_add hook to extract annotations as files are added
|
||||
- needs upstream help, 'on_add' hook, and pass-through of affected documents
|
||||
- [ ] add page number if available
|
||||
- exists in Annotation, just need to place in output
|
||||
- [ ] show overall amount of extractions at the end
|
||||
- [ ] custom formatting decided by user
|
||||
- [ ] improved default exporters
|
||||
|
@ -186,7 +196,14 @@ features to be implemented:
|
|||
- pretty display on stdout (rich?)
|
||||
- csv/tsv to stdout
|
||||
- table fmt stdout?
|
||||
- [ ] arbitrary color -> name settings not dependent on color name existing
|
||||
- [ ] allow custom colors -> tag name settings not dependent on color name existing (e.g. {"important": (1.0,0.0,0.0)})
|
||||
- [ ] `--overwrite` mode where existing annotations are not dropped but overwritten on same line of note
|
||||
- [ ] `--force` mode where we simply do not drop anything
|
||||
|
||||
upstream changes:
|
||||
|
||||
- [ ] need a hook for adding a document/file
|
||||
- [ ] need hooks to actually pass through information on the thing they worked on (i.e. their document)
|
||||
|
||||
## Issues
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ def start(filename: Path) -> list[Annotation]:
|
|||
|
||||
|
||||
def _tag_from_colorname(colorname: str) -> str:
|
||||
color_mapping: dict[str,str] = getdict("tags", "plugins.extract")
|
||||
color_mapping: dict[str, str] = getdict("tags", "plugins.extract")
|
||||
if not color_mapping:
|
||||
return ""
|
||||
|
||||
|
@ -82,6 +82,7 @@ def _retrieve_annotation_content(
|
|||
# just a highlight without any text
|
||||
return (None, None)
|
||||
|
||||
|
||||
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
|
||||
def getdict(key: str, section: Optional[str] = None) -> dict[str, str]:
|
||||
"""Dict getter
|
||||
|
@ -97,13 +98,14 @@ def getdict(key: str, section: Optional[str] = None) -> dict[str, str]:
|
|||
rawvalue = eval(rawvalue)
|
||||
except Exception:
|
||||
raise SyntaxError(
|
||||
"The key '{}' must be a valid Python object: {}"
|
||||
.format(key, rawvalue))
|
||||
"The key '{}' must be a valid Python object: {}".format(key, rawvalue)
|
||||
)
|
||||
else:
|
||||
if not isinstance(rawvalue, dict):
|
||||
raise SyntaxError(
|
||||
"The key '{}' must be a valid Python dict. Got: {} (type {!r})"
|
||||
.format(key, rawvalue, type(rawvalue).__name__))
|
||||
"The key '{}' must be a valid Python dict. Got: {} (type {!r})".format(
|
||||
key, rawvalue, type(rawvalue).__name__
|
||||
)
|
||||
)
|
||||
|
||||
return rawvalue
|
||||
|
||||
|
|
111
poetry.lock
generated
111
poetry.lock
generated
|
@ -224,6 +224,17 @@ files = [
|
|||
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.0.0"
|
||||
description = "brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
||||
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "isbnlib"
|
||||
version = "3.10.14"
|
||||
|
@ -456,6 +467,17 @@ html5 = ["html5lib"]
|
|||
htmlsoup = ["BeautifulSoup4"]
|
||||
source = ["Cython (>=0.29.35)"]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "23.1"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
|
||||
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "papis"
|
||||
version = "0.13"
|
||||
|
@ -504,6 +526,21 @@ files = [
|
|||
{file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.3.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
|
||||
{file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "prompt-toolkit"
|
||||
version = "3.0.39"
|
||||
|
@ -534,36 +571,36 @@ plugins = ["importlib-metadata"]
|
|||
|
||||
[[package]]
|
||||
name = "pymupdf"
|
||||
version = "1.23.1"
|
||||
version = "1.23.2"
|
||||
description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "PyMuPDF-1.23.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:eb85e8c8d5ceebe078206f779e5cd918919616e865e081fad3993f90159e61c1"},
|
||||
{file = "PyMuPDF-1.23.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3a23857d655180f6147dbc9886f109e627b6654e37ba5088d1b038f6d7861c0"},
|
||||
{file = "PyMuPDF-1.23.1-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:9216ca0805892b481be9544f3d3f9efbc06258fd26ba95295d540c2fa0ae0fea"},
|
||||
{file = "PyMuPDF-1.23.1-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:c8173a1b8c8539ee466f74263a7c6bdf1427b8f5dc32c66ccaca22222a89e339"},
|
||||
{file = "PyMuPDF-1.23.1-cp310-none-win32.whl", hash = "sha256:0d709d3d7c9b894ad5400fa5037bf34e82133b3386b5f3f7bb60b16e9666a1bb"},
|
||||
{file = "PyMuPDF-1.23.1-cp310-none-win_amd64.whl", hash = "sha256:11af6e9e86e2a4ad1ecf8085bb64a48323614769188fbf5eaa5a198acef5de39"},
|
||||
{file = "PyMuPDF-1.23.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:08b93208eaccac85a78b7aa2b3020645824fb97dda2ab4f6d3a07290ac99b078"},
|
||||
{file = "PyMuPDF-1.23.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ead3422bc17607e8e174e87b9d66639b10f5649fbb55c3cde13d0ba937e9c9f8"},
|
||||
{file = "PyMuPDF-1.23.1-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:322ef02cefad4c705461d64a89b33bf95147958277fd422206c4ea32323dc89e"},
|
||||
{file = "PyMuPDF-1.23.1-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:8ce4dcef3df3c959053a5f35df8dd35c9272cfe5fd3df54039ea3e270a9ef69c"},
|
||||
{file = "PyMuPDF-1.23.1-cp311-none-win32.whl", hash = "sha256:245ba657c3f8e39a7e2b17ede676f8519031f70f11963cb1f39b76c00aace3cf"},
|
||||
{file = "PyMuPDF-1.23.1-cp311-none-win_amd64.whl", hash = "sha256:55109e3c49f7854e2a471ca92bef5e5f8324841474a3074fb03bee99537d11cd"},
|
||||
{file = "PyMuPDF-1.23.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:9f48302062e8d3d2440d197379a1ac9da201c78b2f80c414bccf7a0a4109ac16"},
|
||||
{file = "PyMuPDF-1.23.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:67a442a2235ba82fb4b604e1f2acb023131fcb5be4263d615dfef10319b1d617"},
|
||||
{file = "PyMuPDF-1.23.1-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:83db19d9d3b27fc14f66d222623a15b014db88f04682a33f9ed4054c31a65184"},
|
||||
{file = "PyMuPDF-1.23.1-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:756f24923bb28a8165f603ebe34ae72f951d9fc29ed7d72d8a3dad48001c19c4"},
|
||||
{file = "PyMuPDF-1.23.1-cp38-none-win32.whl", hash = "sha256:a69279f4b85db33733eecb4cc42b1b959775b31f61931ae6de2b0d640ac82ad8"},
|
||||
{file = "PyMuPDF-1.23.1-cp38-none-win_amd64.whl", hash = "sha256:89d54dce9ca91f204e7f9cf4f91111cf6feede08f710259682f0f3bd2bc77ba7"},
|
||||
{file = "PyMuPDF-1.23.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:0d57716ff547a17365eed5f1485fac33b60d4c15776f041503b718834a9a8fe1"},
|
||||
{file = "PyMuPDF-1.23.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:bdbf28e9ce255b8a221d1015f8a86810f70d1db45b6c4f90033fa8d8c2fb2b11"},
|
||||
{file = "PyMuPDF-1.23.1-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:6ac7ee501ed13932878d52bb291ef7abf88e9bceb6c04144bd896cd079b193c0"},
|
||||
{file = "PyMuPDF-1.23.1-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:74e20395bbaa5fd5bc815e5459f017edeb668ddf1ddf4f848f05587ec71b1326"},
|
||||
{file = "PyMuPDF-1.23.1-cp39-none-win32.whl", hash = "sha256:25f554c861039ade6fcdb12cd17c942687e530afdfd337d72820529dc9bd440d"},
|
||||
{file = "PyMuPDF-1.23.1-cp39-none-win_amd64.whl", hash = "sha256:a3b2ce649b5bba352eebb4415529c81ec937595f751cc02079be2b314c5972b5"},
|
||||
{file = "PyMuPDF-1.23.1.tar.gz", hash = "sha256:34cac956024f1a30f5204a1a987d7b2c8d4b2b5df57806b82c2842e3e108786b"},
|
||||
{file = "PyMuPDF-1.23.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:701aff64dbf0635c2c875b518979b46b935ed4d3b3d2aee1c449e2960831d766"},
|
||||
{file = "PyMuPDF-1.23.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:949c071b50825cf341f03546e7354cef942c36fcc071a72a0417c035d6ee7e33"},
|
||||
{file = "PyMuPDF-1.23.2-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:5209612dfc9038fbbb1a61dc01bd298d5279646d5e1c98cfe80878db3d862a3e"},
|
||||
{file = "PyMuPDF-1.23.2-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:f20ba77a61440220bd2e380ceef8a86bf51f97ac9374a8af00aeedea904dad46"},
|
||||
{file = "PyMuPDF-1.23.2-cp310-none-win32.whl", hash = "sha256:01c45723fbc389fac2ab8150e5ba80c357706ca69a74c29ec1a83a05921c53d1"},
|
||||
{file = "PyMuPDF-1.23.2-cp310-none-win_amd64.whl", hash = "sha256:ff799db717d5b0e423bd81fbae8131cf3463a80a642524a96952f6f3deaf2a95"},
|
||||
{file = "PyMuPDF-1.23.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:1f372bcc70b888f0c953add8b15627efb9f3cc2c7b8ad0916560b6081093932c"},
|
||||
{file = "PyMuPDF-1.23.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:6389eb4bfc27264a951497847089e5e4485f6609c351ac321071d62881a21982"},
|
||||
{file = "PyMuPDF-1.23.2-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:0fc5f600b3a72c29a0944cbcbc1375962ad669023265c50cd1d8f794d7ae95f7"},
|
||||
{file = "PyMuPDF-1.23.2-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:4fc4a6183a7a8006b83476fc0b26d6fb849996050e8c3c911b9d6a66fe6ccc1e"},
|
||||
{file = "PyMuPDF-1.23.2-cp311-none-win32.whl", hash = "sha256:66f94d35fd48e2b5cbe70a4601f036f76cb826318b893994ab7bd4186a65e78f"},
|
||||
{file = "PyMuPDF-1.23.2-cp311-none-win_amd64.whl", hash = "sha256:1ec04285451231c68a024657b75d59a43ce0dcdade582edf3a9cc1d86c75b826"},
|
||||
{file = "PyMuPDF-1.23.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:40a713ea439548cf3c6bd910dc904cb868eae9d7bc1c2d0aebc04c84431822af"},
|
||||
{file = "PyMuPDF-1.23.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:f0d57e40bdbf6c6deacf94387d3aad918535d8723aa6e3a27e4bef1f3d52158a"},
|
||||
{file = "PyMuPDF-1.23.2-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:2de9da709e14a0b32ca1ed7e268615189a8c1e76a26920dd45a92d9f0e207d1f"},
|
||||
{file = "PyMuPDF-1.23.2-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:7edc4b4542041a28f5644c09c1e670215ae014adc28a81d32786db73077d4cf3"},
|
||||
{file = "PyMuPDF-1.23.2-cp38-none-win32.whl", hash = "sha256:18f19be85f277a36536277f3f4991a2d1d1b9c2d0c3a515925e9bef41780efe0"},
|
||||
{file = "PyMuPDF-1.23.2-cp38-none-win_amd64.whl", hash = "sha256:a98cf7bb1ba8d64de78f443005c0f60c0c9644f73b3ebd57cbd20e232e2e5a30"},
|
||||
{file = "PyMuPDF-1.23.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:ac236156688627bca0a8062bb4153f77108f072dd4a06a80626fd089c2879e04"},
|
||||
{file = "PyMuPDF-1.23.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:c1a08531194d038e068641be92fdc31276efbee2b718a8dc4281dc593f1a99e7"},
|
||||
{file = "PyMuPDF-1.23.2-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:78b6c87fd375d1b017c63a426432be7ee4859f2142108b9c5dc8283599c112eb"},
|
||||
{file = "PyMuPDF-1.23.2-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:41d9cd45bb61cda890d446baeeded454fb4404086cf7f7e385e440123e9ecb56"},
|
||||
{file = "PyMuPDF-1.23.2-cp39-none-win32.whl", hash = "sha256:d34da29cd4305c4b85ea57528c1a31cfc6abfac7921d27153e633470e9dac104"},
|
||||
{file = "PyMuPDF-1.23.2-cp39-none-win_amd64.whl", hash = "sha256:86127075227f868a6b115eb96a74405539dde90168cd1a98781b0f1f6d4f9d7c"},
|
||||
{file = "PyMuPDF-1.23.2.tar.gz", hash = "sha256:32302d0eb0e28d60ba305f5d74702fb0fab2ed9d9f6b3a9d853429e5023bc6bb"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -598,6 +635,26 @@ files = [
|
|||
[package.extras]
|
||||
diagrams = ["jinja2", "railroad-diagrams"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.4.0"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"},
|
||||
{file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "python-doi"
|
||||
version = "0.2.0"
|
||||
|
@ -923,4 +980,4 @@ files = [
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "2e158747df6100e105c98494d8b5f4b23b7076ae76295ce7a28facf02488ebd5"
|
||||
content-hash = "d519605837788792d06ffc7bca7a92b315612ca6052227c53c558ec49dffec9f"
|
||||
|
|
|
@ -18,6 +18,10 @@ python-magic = "^0.4.27"
|
|||
[tool.poetry.plugins."papis.command"]
|
||||
extract = "papis_extract:main"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
7
tests/test_annotation.py
Normal file
7
tests/test_annotation.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
from papis_extract.annotation_data import Annotation
|
||||
|
||||
|
||||
def test_matches_colorname_exact():
|
||||
sut = Annotation("testfile", colors={"stroke": (1.0, 0.0, 0.0)})
|
||||
c_name = sut.colorname
|
||||
assert c_name == "red"
|
Loading…
Reference in a new issue