diff --git a/.woodpecker/lint.yml b/.woodpecker/lint.yml new file mode 100644 index 0000000..eb67dbe --- /dev/null +++ b/.woodpecker/lint.yml @@ -0,0 +1,16 @@ +pipeline: + lint_ruff: + image: python + commands: + - pip install ruff + - python --version && poetry --version && ruff --version + - echo "----------------- running ruff lint ------------------" + - ruff check . + + lint_black: + image: python + commands: + - pip install black + - python --version && poetry --version && black --version + - echo "----------------- running black lint ----------------" + - black --check . diff --git a/.woodpecker/static_analysis.yml b/.woodpecker/static_analysis.yml new file mode 100644 index 0000000..3506c5c --- /dev/null +++ b/.woodpecker/static_analysis.yml @@ -0,0 +1,9 @@ +pipeline: + pyright: + image: ghcr.io/withlogicco/poetry:1.5.1 + commands: + - pip install pyright + - poetry install + - python --version && poetry --version && pyright --version + - echo "------------- running pyright typecheck -------------" + - poetry run pyright diff --git a/.woodpecker/test.yml b/.woodpecker/test.yml new file mode 100644 index 0000000..44e264d --- /dev/null +++ b/.woodpecker/test.yml @@ -0,0 +1,10 @@ +branches: master + +pipeline: + pytest: + image: ghcr.io/withlogicco/poetry:1.5.1 + commands: + - poetry install + - python --version && poetry --version + - echo "------------- running pytest -------------" + - poetry run pytest diff --git a/README.md b/README.md index b403962..8aa6b5b 100644 --- a/README.md +++ b/README.md @@ -173,12 +173,22 @@ Known issues to be fixed: - [x] if both content and text are empty, do not extract an annotation - [x] Speed? - should be fine, on my machine (old i5 laptop) it takes around 90s for ~1000 documents with ~4000 annotations -- [ ] ensure all cmdline options do what they should +- [x] ensure all cmdline options do what they should +- [ ] annotations carry over color object from fitz, should just be Color object or simple tuple with rgb vals +- [ ] docstrings, docstrings! +- [ ] testing testing testing!! + - [ ] refactor into some better abstractions (e.g. Exporter Protocol -> stdout/markdown implementations; Extractor Protocol -> PDF implementation) features to be implemented: +- [ ] CICD + - [x] static analysis (lint, typecheck etc) on pushes + - [ ] test pipeline on master pushes + - [ ] release pipeline to pypi on tags - [ ] on_add hook to extract annotations as files are added + - needs upstream help, 'on_add' hook, and pass-through of affected documents - [ ] add page number if available + - exists in Annotation, just need to place in output - [ ] show overall amount of extractions at the end - [ ] custom formatting decided by user - [ ] improved default exporters @@ -186,7 +196,14 @@ features to be implemented: - pretty display on stdout (rich?) - csv/tsv to stdout - table fmt stdout? -- [ ] arbitrary color -> name settings not dependent on color name existing +- [ ] allow custom colors -> tag name settings not dependent on color name existing (e.g. {"important": (1.0,0.0,0.0)}) +- [ ] `--overwrite` mode where existing annotations are not dropped but overwritten on same line of note +- [ ] `--force` mode where we simply do not drop anything + +upstream changes: + +- [ ] need a hook for adding a document/file +- [ ] need hooks to actually pass through information on the thing they worked on (i.e. their document) ## Issues diff --git a/papis_extract/extractor.py b/papis_extract/extractor.py index 9467231..88ff012 100644 --- a/papis_extract/extractor.py +++ b/papis_extract/extractor.py @@ -42,7 +42,7 @@ def start(filename: Path) -> list[Annotation]: def _tag_from_colorname(colorname: str) -> str: - color_mapping: dict[str,str] = getdict("tags", "plugins.extract") + color_mapping: dict[str, str] = getdict("tags", "plugins.extract") if not color_mapping: return "" @@ -82,6 +82,7 @@ def _retrieve_annotation_content( # just a highlight without any text return (None, None) + # mimics the functions in papis.config.{getlist,getint,getfloat} etc. def getdict(key: str, section: Optional[str] = None) -> dict[str, str]: """Dict getter @@ -97,13 +98,14 @@ def getdict(key: str, section: Optional[str] = None) -> dict[str, str]: rawvalue = eval(rawvalue) except Exception: raise SyntaxError( - "The key '{}' must be a valid Python object: {}" - .format(key, rawvalue)) + "The key '{}' must be a valid Python object: {}".format(key, rawvalue) + ) else: if not isinstance(rawvalue, dict): raise SyntaxError( - "The key '{}' must be a valid Python dict. Got: {} (type {!r})" - .format(key, rawvalue, type(rawvalue).__name__)) + "The key '{}' must be a valid Python dict. Got: {} (type {!r})".format( + key, rawvalue, type(rawvalue).__name__ + ) + ) return rawvalue - diff --git a/poetry.lock b/poetry.lock index e225e20..8d6734d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -224,6 +224,17 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "isbnlib" version = "3.10.14" @@ -456,6 +467,17 @@ html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] source = ["Cython (>=0.29.35)"] +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] + [[package]] name = "papis" version = "0.13" @@ -504,6 +526,21 @@ files = [ {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, ] +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prompt-toolkit" version = "3.0.39" @@ -534,36 +571,36 @@ plugins = ["importlib-metadata"] [[package]] name = "pymupdf" -version = "1.23.1" +version = "1.23.2" description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." optional = false python-versions = ">=3.8" files = [ - {file = "PyMuPDF-1.23.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:eb85e8c8d5ceebe078206f779e5cd918919616e865e081fad3993f90159e61c1"}, - {file = "PyMuPDF-1.23.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3a23857d655180f6147dbc9886f109e627b6654e37ba5088d1b038f6d7861c0"}, - {file = "PyMuPDF-1.23.1-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:9216ca0805892b481be9544f3d3f9efbc06258fd26ba95295d540c2fa0ae0fea"}, - {file = "PyMuPDF-1.23.1-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:c8173a1b8c8539ee466f74263a7c6bdf1427b8f5dc32c66ccaca22222a89e339"}, - {file = "PyMuPDF-1.23.1-cp310-none-win32.whl", hash = "sha256:0d709d3d7c9b894ad5400fa5037bf34e82133b3386b5f3f7bb60b16e9666a1bb"}, - {file = "PyMuPDF-1.23.1-cp310-none-win_amd64.whl", hash = "sha256:11af6e9e86e2a4ad1ecf8085bb64a48323614769188fbf5eaa5a198acef5de39"}, - {file = "PyMuPDF-1.23.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:08b93208eaccac85a78b7aa2b3020645824fb97dda2ab4f6d3a07290ac99b078"}, - {file = "PyMuPDF-1.23.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ead3422bc17607e8e174e87b9d66639b10f5649fbb55c3cde13d0ba937e9c9f8"}, - {file = "PyMuPDF-1.23.1-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:322ef02cefad4c705461d64a89b33bf95147958277fd422206c4ea32323dc89e"}, - {file = "PyMuPDF-1.23.1-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:8ce4dcef3df3c959053a5f35df8dd35c9272cfe5fd3df54039ea3e270a9ef69c"}, - {file = "PyMuPDF-1.23.1-cp311-none-win32.whl", hash = "sha256:245ba657c3f8e39a7e2b17ede676f8519031f70f11963cb1f39b76c00aace3cf"}, - {file = "PyMuPDF-1.23.1-cp311-none-win_amd64.whl", hash = "sha256:55109e3c49f7854e2a471ca92bef5e5f8324841474a3074fb03bee99537d11cd"}, - {file = "PyMuPDF-1.23.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:9f48302062e8d3d2440d197379a1ac9da201c78b2f80c414bccf7a0a4109ac16"}, - {file = "PyMuPDF-1.23.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:67a442a2235ba82fb4b604e1f2acb023131fcb5be4263d615dfef10319b1d617"}, - {file = "PyMuPDF-1.23.1-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:83db19d9d3b27fc14f66d222623a15b014db88f04682a33f9ed4054c31a65184"}, - {file = "PyMuPDF-1.23.1-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:756f24923bb28a8165f603ebe34ae72f951d9fc29ed7d72d8a3dad48001c19c4"}, - {file = "PyMuPDF-1.23.1-cp38-none-win32.whl", hash = "sha256:a69279f4b85db33733eecb4cc42b1b959775b31f61931ae6de2b0d640ac82ad8"}, - {file = "PyMuPDF-1.23.1-cp38-none-win_amd64.whl", hash = "sha256:89d54dce9ca91f204e7f9cf4f91111cf6feede08f710259682f0f3bd2bc77ba7"}, - {file = "PyMuPDF-1.23.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:0d57716ff547a17365eed5f1485fac33b60d4c15776f041503b718834a9a8fe1"}, - {file = "PyMuPDF-1.23.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:bdbf28e9ce255b8a221d1015f8a86810f70d1db45b6c4f90033fa8d8c2fb2b11"}, - {file = "PyMuPDF-1.23.1-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:6ac7ee501ed13932878d52bb291ef7abf88e9bceb6c04144bd896cd079b193c0"}, - {file = "PyMuPDF-1.23.1-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:74e20395bbaa5fd5bc815e5459f017edeb668ddf1ddf4f848f05587ec71b1326"}, - {file = "PyMuPDF-1.23.1-cp39-none-win32.whl", hash = "sha256:25f554c861039ade6fcdb12cd17c942687e530afdfd337d72820529dc9bd440d"}, - {file = "PyMuPDF-1.23.1-cp39-none-win_amd64.whl", hash = "sha256:a3b2ce649b5bba352eebb4415529c81ec937595f751cc02079be2b314c5972b5"}, - {file = "PyMuPDF-1.23.1.tar.gz", hash = "sha256:34cac956024f1a30f5204a1a987d7b2c8d4b2b5df57806b82c2842e3e108786b"}, + {file = "PyMuPDF-1.23.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:701aff64dbf0635c2c875b518979b46b935ed4d3b3d2aee1c449e2960831d766"}, + {file = "PyMuPDF-1.23.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:949c071b50825cf341f03546e7354cef942c36fcc071a72a0417c035d6ee7e33"}, + {file = "PyMuPDF-1.23.2-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:5209612dfc9038fbbb1a61dc01bd298d5279646d5e1c98cfe80878db3d862a3e"}, + {file = "PyMuPDF-1.23.2-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:f20ba77a61440220bd2e380ceef8a86bf51f97ac9374a8af00aeedea904dad46"}, + {file = "PyMuPDF-1.23.2-cp310-none-win32.whl", hash = "sha256:01c45723fbc389fac2ab8150e5ba80c357706ca69a74c29ec1a83a05921c53d1"}, + {file = "PyMuPDF-1.23.2-cp310-none-win_amd64.whl", hash = "sha256:ff799db717d5b0e423bd81fbae8131cf3463a80a642524a96952f6f3deaf2a95"}, + {file = "PyMuPDF-1.23.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:1f372bcc70b888f0c953add8b15627efb9f3cc2c7b8ad0916560b6081093932c"}, + {file = "PyMuPDF-1.23.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:6389eb4bfc27264a951497847089e5e4485f6609c351ac321071d62881a21982"}, + {file = "PyMuPDF-1.23.2-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:0fc5f600b3a72c29a0944cbcbc1375962ad669023265c50cd1d8f794d7ae95f7"}, + {file = "PyMuPDF-1.23.2-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:4fc4a6183a7a8006b83476fc0b26d6fb849996050e8c3c911b9d6a66fe6ccc1e"}, + {file = "PyMuPDF-1.23.2-cp311-none-win32.whl", hash = "sha256:66f94d35fd48e2b5cbe70a4601f036f76cb826318b893994ab7bd4186a65e78f"}, + {file = "PyMuPDF-1.23.2-cp311-none-win_amd64.whl", hash = "sha256:1ec04285451231c68a024657b75d59a43ce0dcdade582edf3a9cc1d86c75b826"}, + {file = "PyMuPDF-1.23.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:40a713ea439548cf3c6bd910dc904cb868eae9d7bc1c2d0aebc04c84431822af"}, + {file = "PyMuPDF-1.23.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:f0d57e40bdbf6c6deacf94387d3aad918535d8723aa6e3a27e4bef1f3d52158a"}, + {file = "PyMuPDF-1.23.2-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:2de9da709e14a0b32ca1ed7e268615189a8c1e76a26920dd45a92d9f0e207d1f"}, + {file = "PyMuPDF-1.23.2-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:7edc4b4542041a28f5644c09c1e670215ae014adc28a81d32786db73077d4cf3"}, + {file = "PyMuPDF-1.23.2-cp38-none-win32.whl", hash = "sha256:18f19be85f277a36536277f3f4991a2d1d1b9c2d0c3a515925e9bef41780efe0"}, + {file = "PyMuPDF-1.23.2-cp38-none-win_amd64.whl", hash = "sha256:a98cf7bb1ba8d64de78f443005c0f60c0c9644f73b3ebd57cbd20e232e2e5a30"}, + {file = "PyMuPDF-1.23.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:ac236156688627bca0a8062bb4153f77108f072dd4a06a80626fd089c2879e04"}, + {file = "PyMuPDF-1.23.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:c1a08531194d038e068641be92fdc31276efbee2b718a8dc4281dc593f1a99e7"}, + {file = "PyMuPDF-1.23.2-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:78b6c87fd375d1b017c63a426432be7ee4859f2142108b9c5dc8283599c112eb"}, + {file = "PyMuPDF-1.23.2-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:41d9cd45bb61cda890d446baeeded454fb4404086cf7f7e385e440123e9ecb56"}, + {file = "PyMuPDF-1.23.2-cp39-none-win32.whl", hash = "sha256:d34da29cd4305c4b85ea57528c1a31cfc6abfac7921d27153e633470e9dac104"}, + {file = "PyMuPDF-1.23.2-cp39-none-win_amd64.whl", hash = "sha256:86127075227f868a6b115eb96a74405539dde90168cd1a98781b0f1f6d4f9d7c"}, + {file = "PyMuPDF-1.23.2.tar.gz", hash = "sha256:32302d0eb0e28d60ba305f5d74702fb0fab2ed9d9f6b3a9d853429e5023bc6bb"}, ] [package.dependencies] @@ -598,6 +635,26 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pytest" +version = "7.4.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, + {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "python-doi" version = "0.2.0" @@ -923,4 +980,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "2e158747df6100e105c98494d8b5f4b23b7076ae76295ce7a28facf02488ebd5" +content-hash = "d519605837788792d06ffc7bca7a92b315612ca6052227c53c558ec49dffec9f" diff --git a/pyproject.toml b/pyproject.toml index 8ee3741..4ca3257 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,10 @@ python-magic = "^0.4.27" [tool.poetry.plugins."papis.command"] extract = "papis_extract:main" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.0" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/tests/test_annotation.py b/tests/test_annotation.py new file mode 100644 index 0000000..72c7a75 --- /dev/null +++ b/tests/test_annotation.py @@ -0,0 +1,7 @@ +from papis_extract.annotation_data import Annotation + + +def test_matches_colorname_exact(): + sut = Annotation("testfile", colors={"stroke": (1.0, 0.0, 0.0)}) + c_name = sut.colorname + assert c_name == "red"