From a4f48bb24e4b0bd379a89914aa9e5c560a7274af Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 29 Aug 2023 09:55:08 +0200 Subject: [PATCH] Add CI Added static linting, type checking for each commit as well as pytest testing for each master push. Add linting and static analysis CI Use deprecated pipeline key for server woodpecker version Format black Install poetry environment before pyright Add pytest CI Update dependencies, Fix black Ensure tests run on master branch --- .woodpecker/lint.yml | 16 +++++ .woodpecker/static_analysis.yml | 9 +++ .woodpecker/test.yml | 10 +++ README.md | 21 +++++- papis_extract/extractor.py | 14 ++-- poetry.lock | 111 ++++++++++++++++++++++++-------- pyproject.toml | 4 ++ tests/test_annotation.py | 7 ++ 8 files changed, 157 insertions(+), 35 deletions(-) create mode 100644 .woodpecker/lint.yml create mode 100644 .woodpecker/static_analysis.yml create mode 100644 .woodpecker/test.yml create mode 100644 tests/test_annotation.py diff --git a/.woodpecker/lint.yml b/.woodpecker/lint.yml new file mode 100644 index 0000000..eb67dbe --- /dev/null +++ b/.woodpecker/lint.yml @@ -0,0 +1,16 @@ +pipeline: + lint_ruff: + image: python + commands: + - pip install ruff + - python --version && poetry --version && ruff --version + - echo "----------------- running ruff lint ------------------" + - ruff check . + + lint_black: + image: python + commands: + - pip install black + - python --version && poetry --version && black --version + - echo "----------------- running black lint ----------------" + - black --check . diff --git a/.woodpecker/static_analysis.yml b/.woodpecker/static_analysis.yml new file mode 100644 index 0000000..3506c5c --- /dev/null +++ b/.woodpecker/static_analysis.yml @@ -0,0 +1,9 @@ +pipeline: + pyright: + image: ghcr.io/withlogicco/poetry:1.5.1 + commands: + - pip install pyright + - poetry install + - python --version && poetry --version && pyright --version + - echo "------------- running pyright typecheck -------------" + - poetry run pyright diff --git a/.woodpecker/test.yml b/.woodpecker/test.yml new file mode 100644 index 0000000..44e264d --- /dev/null +++ b/.woodpecker/test.yml @@ -0,0 +1,10 @@ +branches: master + +pipeline: + pytest: + image: ghcr.io/withlogicco/poetry:1.5.1 + commands: + - poetry install + - python --version && poetry --version + - echo "------------- running pytest -------------" + - poetry run pytest diff --git a/README.md b/README.md index b403962..8aa6b5b 100644 --- a/README.md +++ b/README.md @@ -173,12 +173,22 @@ Known issues to be fixed: - [x] if both content and text are empty, do not extract an annotation - [x] Speed? - should be fine, on my machine (old i5 laptop) it takes around 90s for ~1000 documents with ~4000 annotations -- [ ] ensure all cmdline options do what they should +- [x] ensure all cmdline options do what they should +- [ ] annotations carry over color object from fitz, should just be Color object or simple tuple with rgb vals +- [ ] docstrings, docstrings! +- [ ] testing testing testing!! + - [ ] refactor into some better abstractions (e.g. Exporter Protocol -> stdout/markdown implementations; Extractor Protocol -> PDF implementation) features to be implemented: +- [ ] CICD + - [x] static analysis (lint, typecheck etc) on pushes + - [ ] test pipeline on master pushes + - [ ] release pipeline to pypi on tags - [ ] on_add hook to extract annotations as files are added + - needs upstream help, 'on_add' hook, and pass-through of affected documents - [ ] add page number if available + - exists in Annotation, just need to place in output - [ ] show overall amount of extractions at the end - [ ] custom formatting decided by user - [ ] improved default exporters @@ -186,7 +196,14 @@ features to be implemented: - pretty display on stdout (rich?) - csv/tsv to stdout - table fmt stdout? -- [ ] arbitrary color -> name settings not dependent on color name existing +- [ ] allow custom colors -> tag name settings not dependent on color name existing (e.g. {"important": (1.0,0.0,0.0)}) +- [ ] `--overwrite` mode where existing annotations are not dropped but overwritten on same line of note +- [ ] `--force` mode where we simply do not drop anything + +upstream changes: + +- [ ] need a hook for adding a document/file +- [ ] need hooks to actually pass through information on the thing they worked on (i.e. their document) ## Issues diff --git a/papis_extract/extractor.py b/papis_extract/extractor.py index 9467231..88ff012 100644 --- a/papis_extract/extractor.py +++ b/papis_extract/extractor.py @@ -42,7 +42,7 @@ def start(filename: Path) -> list[Annotation]: def _tag_from_colorname(colorname: str) -> str: - color_mapping: dict[str,str] = getdict("tags", "plugins.extract") + color_mapping: dict[str, str] = getdict("tags", "plugins.extract") if not color_mapping: return "" @@ -82,6 +82,7 @@ def _retrieve_annotation_content( # just a highlight without any text return (None, None) + # mimics the functions in papis.config.{getlist,getint,getfloat} etc. def getdict(key: str, section: Optional[str] = None) -> dict[str, str]: """Dict getter @@ -97,13 +98,14 @@ def getdict(key: str, section: Optional[str] = None) -> dict[str, str]: rawvalue = eval(rawvalue) except Exception: raise SyntaxError( - "The key '{}' must be a valid Python object: {}" - .format(key, rawvalue)) + "The key '{}' must be a valid Python object: {}".format(key, rawvalue) + ) else: if not isinstance(rawvalue, dict): raise SyntaxError( - "The key '{}' must be a valid Python dict. Got: {} (type {!r})" - .format(key, rawvalue, type(rawvalue).__name__)) + "The key '{}' must be a valid Python dict. Got: {} (type {!r})".format( + key, rawvalue, type(rawvalue).__name__ + ) + ) return rawvalue - diff --git a/poetry.lock b/poetry.lock index e225e20..8d6734d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -224,6 +224,17 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "isbnlib" version = "3.10.14" @@ -456,6 +467,17 @@ html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] source = ["Cython (>=0.29.35)"] +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] + [[package]] name = "papis" version = "0.13" @@ -504,6 +526,21 @@ files = [ {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, ] +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prompt-toolkit" version = "3.0.39" @@ -534,36 +571,36 @@ plugins = ["importlib-metadata"] [[package]] name = "pymupdf" -version = "1.23.1" +version = "1.23.2" description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." optional = false python-versions = ">=3.8" files = [ - {file = "PyMuPDF-1.23.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:eb85e8c8d5ceebe078206f779e5cd918919616e865e081fad3993f90159e61c1"}, - {file = "PyMuPDF-1.23.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3a23857d655180f6147dbc9886f109e627b6654e37ba5088d1b038f6d7861c0"}, - {file = "PyMuPDF-1.23.1-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:9216ca0805892b481be9544f3d3f9efbc06258fd26ba95295d540c2fa0ae0fea"}, - {file = "PyMuPDF-1.23.1-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:c8173a1b8c8539ee466f74263a7c6bdf1427b8f5dc32c66ccaca22222a89e339"}, - {file = "PyMuPDF-1.23.1-cp310-none-win32.whl", hash = "sha256:0d709d3d7c9b894ad5400fa5037bf34e82133b3386b5f3f7bb60b16e9666a1bb"}, - {file = "PyMuPDF-1.23.1-cp310-none-win_amd64.whl", hash = "sha256:11af6e9e86e2a4ad1ecf8085bb64a48323614769188fbf5eaa5a198acef5de39"}, - {file = "PyMuPDF-1.23.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:08b93208eaccac85a78b7aa2b3020645824fb97dda2ab4f6d3a07290ac99b078"}, - {file = "PyMuPDF-1.23.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ead3422bc17607e8e174e87b9d66639b10f5649fbb55c3cde13d0ba937e9c9f8"}, - {file = "PyMuPDF-1.23.1-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:322ef02cefad4c705461d64a89b33bf95147958277fd422206c4ea32323dc89e"}, - {file = "PyMuPDF-1.23.1-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:8ce4dcef3df3c959053a5f35df8dd35c9272cfe5fd3df54039ea3e270a9ef69c"}, - {file = "PyMuPDF-1.23.1-cp311-none-win32.whl", hash = "sha256:245ba657c3f8e39a7e2b17ede676f8519031f70f11963cb1f39b76c00aace3cf"}, - {file = "PyMuPDF-1.23.1-cp311-none-win_amd64.whl", hash = "sha256:55109e3c49f7854e2a471ca92bef5e5f8324841474a3074fb03bee99537d11cd"}, - {file = "PyMuPDF-1.23.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:9f48302062e8d3d2440d197379a1ac9da201c78b2f80c414bccf7a0a4109ac16"}, - {file = "PyMuPDF-1.23.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:67a442a2235ba82fb4b604e1f2acb023131fcb5be4263d615dfef10319b1d617"}, - {file = "PyMuPDF-1.23.1-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:83db19d9d3b27fc14f66d222623a15b014db88f04682a33f9ed4054c31a65184"}, - {file = "PyMuPDF-1.23.1-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:756f24923bb28a8165f603ebe34ae72f951d9fc29ed7d72d8a3dad48001c19c4"}, - {file = "PyMuPDF-1.23.1-cp38-none-win32.whl", hash = "sha256:a69279f4b85db33733eecb4cc42b1b959775b31f61931ae6de2b0d640ac82ad8"}, - {file = "PyMuPDF-1.23.1-cp38-none-win_amd64.whl", hash = "sha256:89d54dce9ca91f204e7f9cf4f91111cf6feede08f710259682f0f3bd2bc77ba7"}, - {file = "PyMuPDF-1.23.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:0d57716ff547a17365eed5f1485fac33b60d4c15776f041503b718834a9a8fe1"}, - {file = "PyMuPDF-1.23.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:bdbf28e9ce255b8a221d1015f8a86810f70d1db45b6c4f90033fa8d8c2fb2b11"}, - {file = "PyMuPDF-1.23.1-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:6ac7ee501ed13932878d52bb291ef7abf88e9bceb6c04144bd896cd079b193c0"}, - {file = "PyMuPDF-1.23.1-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:74e20395bbaa5fd5bc815e5459f017edeb668ddf1ddf4f848f05587ec71b1326"}, - {file = "PyMuPDF-1.23.1-cp39-none-win32.whl", hash = "sha256:25f554c861039ade6fcdb12cd17c942687e530afdfd337d72820529dc9bd440d"}, - {file = "PyMuPDF-1.23.1-cp39-none-win_amd64.whl", hash = "sha256:a3b2ce649b5bba352eebb4415529c81ec937595f751cc02079be2b314c5972b5"}, - {file = "PyMuPDF-1.23.1.tar.gz", hash = "sha256:34cac956024f1a30f5204a1a987d7b2c8d4b2b5df57806b82c2842e3e108786b"}, + {file = "PyMuPDF-1.23.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:701aff64dbf0635c2c875b518979b46b935ed4d3b3d2aee1c449e2960831d766"}, + {file = "PyMuPDF-1.23.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:949c071b50825cf341f03546e7354cef942c36fcc071a72a0417c035d6ee7e33"}, + {file = "PyMuPDF-1.23.2-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:5209612dfc9038fbbb1a61dc01bd298d5279646d5e1c98cfe80878db3d862a3e"}, + {file = "PyMuPDF-1.23.2-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:f20ba77a61440220bd2e380ceef8a86bf51f97ac9374a8af00aeedea904dad46"}, + {file = "PyMuPDF-1.23.2-cp310-none-win32.whl", hash = "sha256:01c45723fbc389fac2ab8150e5ba80c357706ca69a74c29ec1a83a05921c53d1"}, + {file = "PyMuPDF-1.23.2-cp310-none-win_amd64.whl", hash = "sha256:ff799db717d5b0e423bd81fbae8131cf3463a80a642524a96952f6f3deaf2a95"}, + {file = "PyMuPDF-1.23.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:1f372bcc70b888f0c953add8b15627efb9f3cc2c7b8ad0916560b6081093932c"}, + {file = "PyMuPDF-1.23.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:6389eb4bfc27264a951497847089e5e4485f6609c351ac321071d62881a21982"}, + {file = "PyMuPDF-1.23.2-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:0fc5f600b3a72c29a0944cbcbc1375962ad669023265c50cd1d8f794d7ae95f7"}, + {file = "PyMuPDF-1.23.2-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:4fc4a6183a7a8006b83476fc0b26d6fb849996050e8c3c911b9d6a66fe6ccc1e"}, + {file = "PyMuPDF-1.23.2-cp311-none-win32.whl", hash = "sha256:66f94d35fd48e2b5cbe70a4601f036f76cb826318b893994ab7bd4186a65e78f"}, + {file = "PyMuPDF-1.23.2-cp311-none-win_amd64.whl", hash = "sha256:1ec04285451231c68a024657b75d59a43ce0dcdade582edf3a9cc1d86c75b826"}, + {file = "PyMuPDF-1.23.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:40a713ea439548cf3c6bd910dc904cb868eae9d7bc1c2d0aebc04c84431822af"}, + {file = "PyMuPDF-1.23.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:f0d57e40bdbf6c6deacf94387d3aad918535d8723aa6e3a27e4bef1f3d52158a"}, + {file = "PyMuPDF-1.23.2-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:2de9da709e14a0b32ca1ed7e268615189a8c1e76a26920dd45a92d9f0e207d1f"}, + {file = "PyMuPDF-1.23.2-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:7edc4b4542041a28f5644c09c1e670215ae014adc28a81d32786db73077d4cf3"}, + {file = "PyMuPDF-1.23.2-cp38-none-win32.whl", hash = "sha256:18f19be85f277a36536277f3f4991a2d1d1b9c2d0c3a515925e9bef41780efe0"}, + {file = "PyMuPDF-1.23.2-cp38-none-win_amd64.whl", hash = "sha256:a98cf7bb1ba8d64de78f443005c0f60c0c9644f73b3ebd57cbd20e232e2e5a30"}, + {file = "PyMuPDF-1.23.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:ac236156688627bca0a8062bb4153f77108f072dd4a06a80626fd089c2879e04"}, + {file = "PyMuPDF-1.23.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:c1a08531194d038e068641be92fdc31276efbee2b718a8dc4281dc593f1a99e7"}, + {file = "PyMuPDF-1.23.2-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:78b6c87fd375d1b017c63a426432be7ee4859f2142108b9c5dc8283599c112eb"}, + {file = "PyMuPDF-1.23.2-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:41d9cd45bb61cda890d446baeeded454fb4404086cf7f7e385e440123e9ecb56"}, + {file = "PyMuPDF-1.23.2-cp39-none-win32.whl", hash = "sha256:d34da29cd4305c4b85ea57528c1a31cfc6abfac7921d27153e633470e9dac104"}, + {file = "PyMuPDF-1.23.2-cp39-none-win_amd64.whl", hash = "sha256:86127075227f868a6b115eb96a74405539dde90168cd1a98781b0f1f6d4f9d7c"}, + {file = "PyMuPDF-1.23.2.tar.gz", hash = "sha256:32302d0eb0e28d60ba305f5d74702fb0fab2ed9d9f6b3a9d853429e5023bc6bb"}, ] [package.dependencies] @@ -598,6 +635,26 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pytest" +version = "7.4.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, + {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "python-doi" version = "0.2.0" @@ -923,4 +980,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "2e158747df6100e105c98494d8b5f4b23b7076ae76295ce7a28facf02488ebd5" +content-hash = "d519605837788792d06ffc7bca7a92b315612ca6052227c53c558ec49dffec9f" diff --git a/pyproject.toml b/pyproject.toml index 8ee3741..4ca3257 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,10 @@ python-magic = "^0.4.27" [tool.poetry.plugins."papis.command"] extract = "papis_extract:main" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.0" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/tests/test_annotation.py b/tests/test_annotation.py new file mode 100644 index 0000000..72c7a75 --- /dev/null +++ b/tests/test_annotation.py @@ -0,0 +1,7 @@ +from papis_extract.annotation_data import Annotation + + +def test_matches_colorname_exact(): + sut = Annotation("testfile", colors={"stroke": (1.0, 0.0, 0.0)}) + c_name = sut.colorname + assert c_name == "red"