Compare commits
18 commits
0ccb262557
...
15606b293f
| Author | SHA1 | Date | |
|---|---|---|---|
| 15606b293f | |||
| 7fd0e65453 | |||
| f2c048caa9 | |||
| 9e88054736 | |||
| 81f8690b18 | |||
| 32e735baef | |||
| bd51e17221 | |||
| 24a4812051 | |||
| ff6cdf3cca | |||
| ecb999a49e | |||
| 1a4b5e3a70 | |||
| 17c6fefd89 | |||
| 3eb7f3f1c7 | |||
| fd71482526 | |||
| a9ff4152af | |||
| db47ad686d | |||
| d840609ecb | |||
| 3344147f1f |
22 changed files with 1053 additions and 475 deletions
|
|
@ -1,7 +1,12 @@
|
|||
[settings]
|
||||
default-library = main
|
||||
picktool = papis
|
||||
formater = python
|
||||
formatter = python
|
||||
|
||||
[main]
|
||||
database-backend = whoosh
|
||||
use-cache = False
|
||||
dir = temp/lib
|
||||
|
||||
[plugins.extract]
|
||||
tags = {"red": "important", "green": "extra", "blue": "toread"}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,19 @@
|
|||
when:
|
||||
- event: push
|
||||
|
||||
steps:
|
||||
lint_ruff:
|
||||
image: python
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- pip install ruff
|
||||
- python --version && poetry --version && ruff --version
|
||||
- uv tool install ruff
|
||||
- python --version && uv --version && ruff --version
|
||||
- echo "----------------- running ruff lint ------------------"
|
||||
- ruff check .
|
||||
|
||||
lint_black:
|
||||
image: python
|
||||
format_ruff:
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- pip install black
|
||||
- python --version && poetry --version && black --version
|
||||
- echo "----------------- running black lint ----------------"
|
||||
- black --check .
|
||||
- uv tool install ruff
|
||||
- python --version && uv --version && ruff --version
|
||||
- echo "----------------- running ruff format ------------------"
|
||||
- ruff format --check .
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
when:
|
||||
- event: push
|
||||
|
||||
steps:
|
||||
pyright:
|
||||
image: nikolaik/python-nodejs
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- npm install --global pyright
|
||||
- uv sync
|
||||
- python --version && uv version && pyright --version
|
||||
- uv tool install pyright
|
||||
- uv sync --locked
|
||||
- python --version && uv --version && pyright --version
|
||||
- echo "------------- running pyright typecheck -------------"
|
||||
- uv run pyright
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
when:
|
||||
branch: main
|
||||
- event: push
|
||||
|
||||
steps:
|
||||
pytest:
|
||||
image: nikolaik/python-nodejs
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- uv sync
|
||||
- python --version && uv version
|
||||
- uv sync --locked
|
||||
- python --version && uv --version
|
||||
- echo "------------- running pytest -------------"
|
||||
- uv run pytest
|
||||
|
|
|
|||
79
README.md
79
README.md
|
|
@ -1,6 +1,7 @@
|
|||
# papis-extract
|
||||
|
||||
[](https://ci.martyoeh.me/Marty/papis-extract)
|
||||
|
||||
<!-- TODO have to set up pypi badge
|
||||

|
||||
-->
|
||||
|
|
@ -11,6 +12,7 @@ Easily organize all your highlights and thoughts next to your documents and refe
|
|||
## Installation
|
||||
|
||||
<!-- TODO set up pypi repository / explain git install path -->
|
||||
|
||||
You can install through pip with `pip install git+https://git.martyoeh.me/Marty/papis-extract.git`.
|
||||
|
||||
That's it! If you have papis and papis-extract installed in the same environment (whether virtual or global),
|
||||
|
|
@ -26,7 +28,7 @@ You will be set up with the default options but if you want to change anything,
|
|||
|
||||
> **Note**
|
||||
> This plugin is still in fairly early development.
|
||||
> It does what I need it to do, but if you have a meticulously organized library *please* make backups before doing any operation which could affect your notes, or make use of the papis-included git options.
|
||||
> It does what I need it to do, but if you have a meticulously organized library _please_ make backups before doing any operation which could affect your notes, or make use of the papis-included git options.
|
||||
> Take care to read the Issues section of this README if you intend to run it over a large collection.
|
||||
|
||||
## Usage
|
||||
|
|
@ -66,7 +68,7 @@ use `--all` as usual with papis:
|
|||
papis extract --all "author:Einstein"
|
||||
```
|
||||
|
||||
The above command will print out your annotations made on *all* papers by Einstein.
|
||||
The above command will print out your annotations made on _all_ papers by Einstein.
|
||||
|
||||
You can invoke the command with `--manual` to instantly edit the notes in your editor:
|
||||
|
||||
|
|
@ -85,7 +87,7 @@ papis extract --write --all
|
|||
```
|
||||
|
||||
However, the warning for your notes' safety goes doubly for this command since it will touch
|
||||
*most* or *all* of your notes, depending on how many entries in your library have pdfs with annotations attached.
|
||||
_most_ or _all_ of your notes, depending on how many entries in your library have pdfs with annotations attached.
|
||||
|
||||
While I have not done extensive optimizations the process should be relatively quick even for larger libraries:
|
||||
On my current laptop, extracting ~4000 annotations from ~1000 library documents takes around 90 seconds,
|
||||
|
|
@ -116,7 +118,7 @@ papis extract --output count
|
|||
|
||||
For now, these are the only formatters the plugin knows about.
|
||||
|
||||
Be aware that if you re-write to your notes using a completely different output format than the original the plugin will *not* detect old annotations and drop them,
|
||||
Be aware that if you re-write to your notes using a completely different output format than the original the plugin will _not_ detect old annotations and drop them,
|
||||
so you will be doubling up your annotations.
|
||||
See the `minimum_similarity` configuration option for more details.
|
||||
|
||||
|
|
@ -150,7 +152,7 @@ If you set `on_import` to `True`,
|
|||
extraction into notes is automatically run whenever a new document is added to the library,
|
||||
if `False` extraction only happens when you explicitly invoke it.
|
||||
|
||||
Extraction will *not* happen automatically when you add new annotations to an existing document,
|
||||
Extraction will _not_ happen automatically when you add new annotations to an existing document,
|
||||
regardless of this setting.
|
||||
|
||||
> **Note**
|
||||
|
|
@ -190,11 +192,11 @@ Generally, this should be fine as it is but you should change this value if you
|
|||
|
||||
---
|
||||
|
||||
`minimum_similarity_content` sets the required similarity of an annotation's note and in-pdf written words to be viewed as one. Any annotation that has both and is *under* the minimum similarity will be added in the following form:
|
||||
`minimum_similarity_content` sets the required similarity of an annotation's note and in-pdf written words to be viewed as one. Any annotation that has both and is _under_ the minimum similarity will be added in the following form:
|
||||
|
||||
```markdown
|
||||
> my annotation
|
||||
Note: my additional thoughts
|
||||
> Note: my additional thoughts
|
||||
```
|
||||
|
||||
That is, the extractor detects additional written words by whoever annotated and adds them to the extraction.
|
||||
|
|
@ -220,55 +222,6 @@ Then import the resulting `.html` file into the library like any other document
|
|||
(or `papis addto` to add it to existing document references).
|
||||
You are then ready to use extract to get those annotations from the exported list into your notes.
|
||||
|
||||
## TODO: Roadmap to first release
|
||||
|
||||
Known issues to be fixed:
|
||||
|
||||
- [x] if both content and text are empty, do not extract an annotation
|
||||
- [x] Speed?
|
||||
- should be fine, on my machine (old i5 laptop) it takes around 90s for ~1000 documents with ~4000 annotations
|
||||
- [x] ensure all cmdline options do what they should
|
||||
- [x] annotations carry over color object from fitz, should just be Color object or simple tuple with rgb vals
|
||||
- [x] docstrings, docstrings!
|
||||
- [ ] testing testing testing!!
|
||||
- [ ] refactor into some better abstractions (e.g. Exporter Protocol -> stdout/markdown implementations; Extractor Protocol -> PDF implementation)
|
||||
- [ ] dependency injection for extractor/exporter/formatter/annotation modules
|
||||
- [ ] any call to papis.config should start from init and be injected?
|
||||
|
||||
features to be implemented:
|
||||
|
||||
- [ ] CICD
|
||||
- [x] static analysis (lint, typecheck etc) on pushes
|
||||
- [x] test pipeline on master pushes
|
||||
- [ ] release pipeline to pypi on tags
|
||||
- [x] add page number if available
|
||||
- exists in Annotation, just need to place in output
|
||||
- [ ] show overall amount of extractions at the end
|
||||
- implemented for writing to notes (notes exporter)
|
||||
- KNOWN ISSUE: currently returns number of annotation rows (may be multiple per annot)
|
||||
- [ ] custom formatting decided by user
|
||||
- in config as { "myformatter": ">{tag}\n{quote}\n{note}\n{page} etc"}
|
||||
- [ ] improved default exporters
|
||||
- [x] markdown into notes
|
||||
- [ ] pretty display on stdout (rich?)
|
||||
- [x] csv/tsv to stdout
|
||||
- [ ] table fmt stdout?
|
||||
- [ ] allow custom colors -> tag name settings not dependent on color name existing (e.g. {"important": (1.0,0.0,0.0)})
|
||||
- [ ] `--overwrite` mode where existing annotations are not dropped but overwritten on same line of note
|
||||
- [x] `--force` mode where we simply do not drop anything
|
||||
- [x] `--format` option to choose from default or set up a custom formatter
|
||||
- called `--output` in current implementation
|
||||
- [ ] on_add hook to extract annotations as files are added
|
||||
- needs upstream help, 'on_add' hook, and pass-through of affected documents
|
||||
- [ ] target same minimum Python version as papis upstream (3.8 as of papis 0.13)
|
||||
- [ ] change detection:
|
||||
- how does it handle updated citations? updated colors? should it be configurable?
|
||||
|
||||
upstream changes:
|
||||
|
||||
- [ ] need a hook for adding a document/file
|
||||
- [ ] need hooks to actually pass through information on the thing they worked on (i.e. their document)
|
||||
|
||||
## Issues
|
||||
|
||||
First, a note in general: There is the functionality to run this plugin over your whole library in a single command and also in a way that makes permanent changes to that library.
|
||||
|
|
@ -281,12 +234,12 @@ A note on the extraction: Highlights in pdfs can be somewhat difficult to parse
|
|||
(as are most things in them). Sometimes they contain the selected text that is written on the
|
||||
page, sometimes they contain the annotators thoughts as a note, sometimes they contain nothing.
|
||||
This plugin makes an effort to find the right combination and extract the written words,
|
||||
as well as any additional notes made - but things *will* slip through or extract weirdly every now
|
||||
as well as any additional notes made - but things _will_ slip through or extract weirdly every now
|
||||
and again.
|
||||
|
||||
Secondly, a note on the pages: I use the page number that the mupdf library gives me when it
|
||||
extracts anything from the pdf file. Sometimes that number will be correct for the document,
|
||||
sometimes it will however be the number of the *pdf document* internally. This can happen if
|
||||
sometimes it will however be the number of the _pdf document_ internally. This can happen if
|
||||
e.g. an article or a book has frontmatter without numbering scheme or with a different one.
|
||||
Sometimes the correct pages will still be embedded in the pdf and everything will work,
|
||||
others it won't. So always double check your page numbers!
|
||||
|
|
@ -298,19 +251,19 @@ I am not sure if there is much I can do about these issues for now.
|
|||
and for myself whenever I forget. The basic building blocks currently in here are three:
|
||||
|
||||
- extractors (= input format)
|
||||
: Extract data from a source file attached to a papis document.
|
||||
: Extract data from a source file attached to a papis document.
|
||||
Crawls the actual files attached to documents to put them into annotation-friendly formats.
|
||||
|
||||
- formatters (= output format)
|
||||
: Make sure the exporter saves the annotation data according to your preferred layout,
|
||||
: Make sure the exporter saves the annotation data according to your preferred layout,
|
||||
such as a markdown syntax or csv-structure.
|
||||
|
||||
- annotations
|
||||
: The actual extracted blocks of text, containing some metadata
|
||||
: The actual extracted blocks of text, containing some metadata
|
||||
info as well, such as their color, type, page.
|
||||
|
||||
- exporters
|
||||
: Put the extracted data somewhere. For now stdout or into your notes.
|
||||
: Put the extracted data somewhere. For now stdout or into your notes.
|
||||
|
||||
Splitting it into those building blocks makes it easier to recombine them in any way,
|
||||
should someone want to save highlights as csv data in their notes,
|
||||
|
|
@ -329,7 +282,7 @@ However, what I do on my system instead to enable quick testing is inject it int
|
|||
system-wide (but isolated with `pipx`) papis setup:
|
||||
|
||||
```bash
|
||||
pipx install papis # create an isolated papis installation reachable form anywhere
|
||||
pipx install papis # create an isolated papis installation reachable form anywhere
|
||||
pipx inject --editable papis . # inject this folder into the environment and keep up with any changes
|
||||
```
|
||||
|
||||
|
|
|
|||
49
ROADMAP
Normal file
49
ROADMAP
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# ROADMAP to first release
|
||||
|
||||
Known issues to be fixed:
|
||||
|
||||
- [x] if both content and text are empty, do not extract an annotation
|
||||
- [x] Speed?
|
||||
- should be fine, on my machine (old i5 laptop) it takes around 90s for ~1000 documents with ~4000 annotations
|
||||
- [x] ensure all cmdline options do what they should
|
||||
- [x] annotations carry over color object from fitz, should just be Color object or simple tuple with rgb vals
|
||||
- [x] docstrings, docstrings!
|
||||
- [ ] testing testing testing!!
|
||||
- [ ] refactor into some better abstractions (e.g. Exporter Protocol -> stdout/markdown implementations; Extractor Protocol -> PDF implementation)
|
||||
- [ ] dependency injection for extractor/exporter/formatter/annotation modules
|
||||
- [ ] any call to papis.config should start from init and be injected?
|
||||
|
||||
features to be implemented:
|
||||
|
||||
- [ ] CICD
|
||||
- [x] static analysis (lint, typecheck etc) on pushes
|
||||
- [x] test pipeline on master pushes
|
||||
- [ ] release pipeline to pypi on tags
|
||||
- [x] add page number if available
|
||||
- exists in Annotation, just need to place in output
|
||||
- [ ] show overall amount of extractions at the end
|
||||
- implemented for writing to notes (notes exporter)
|
||||
- KNOWN ISSUE: currently returns number of annotation rows (may be multiple per annot)
|
||||
- [ ] custom formatting decided by user
|
||||
- in config as { "myformatter": ">{tag}\n{quote}\n{note}\n{page} etc"}
|
||||
- [ ] improved default exporters
|
||||
- [x] markdown into notes
|
||||
- [ ] pretty display on stdout (rich?)
|
||||
- [x] csv/tsv to stdout
|
||||
- [ ] table fmt stdout?
|
||||
- [ ] allow custom colors -> tag name settings not dependent on color name existing (e.g. {"important": (1.0,0.0,0.0)})
|
||||
- [ ] `--overwrite` mode where existing annotations are not dropped but overwritten on same line of note
|
||||
- [x] `--force` mode where we simply do not drop anything
|
||||
- called `--duplicates` in current implementation
|
||||
- [x] `--format` option to choose from default or set up a custom formatter
|
||||
- called `--output` in current implementation
|
||||
- [ ] on_add hook to extract annotations as files are added
|
||||
- needs upstream help, 'on_add' hook, and pass-through of affected documents
|
||||
- [ ] target same minimum Python version as papis upstream (3.8 as of papis 0.14, 3.10 for upcoming papis ~0.15)
|
||||
- [ ] change detection:
|
||||
- how does it handle updated citations? updated colors? should it be configurable?
|
||||
|
||||
upstream changes:
|
||||
|
||||
- [ ] need a hook for adding a document/file
|
||||
- [ ] need hooks to actually pass through information on the thing they worked on (i.e. their document)
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import math
|
||||
from dataclasses import dataclass
|
||||
from functools import total_ordering
|
||||
from types import NotImplementedType
|
||||
from typing import Any, cast
|
||||
|
||||
import chevron
|
||||
|
|
@ -21,7 +22,7 @@ COLORS: dict[str, tuple[float, float, float]] = {
|
|||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
@total_ordering
|
||||
class Annotation:
|
||||
"""A PDF annotation object.
|
||||
|
||||
|
|
@ -140,3 +141,41 @@ class Annotation:
|
|||
)
|
||||
|
||||
return cast("dict[str, str]", rawvalue)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Annotation({self.type}: '{self.file}', color: {self.color}, tag: '{self.tag}', page: {self.page}, content: '{self.content}', note: '{self.note}', minimum_similarity_color: {self.minimum_similarity_color})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Annotation(type={self.type}, file='{self.file}', color={self.color}, tag='{self.tag}', page={self.page}, content='{self.content}', note='{self.note}', minimum_similarity_color={self.minimum_similarity_color})"
|
||||
|
||||
def __eq__(self, other: object) -> bool | NotImplementedType:
|
||||
if not isinstance(other, Annotation):
|
||||
return NotImplemented
|
||||
|
||||
return (
|
||||
self.content.lower(),
|
||||
self.note.lower(),
|
||||
self.type,
|
||||
self.file,
|
||||
self.color,
|
||||
self.tag,
|
||||
self.page,
|
||||
) == (
|
||||
other.content.lower(),
|
||||
other.note.lower(),
|
||||
other.type,
|
||||
other.file,
|
||||
other.color,
|
||||
other.tag,
|
||||
other.page,
|
||||
)
|
||||
|
||||
def __lt__(self, other: object) -> bool:
|
||||
if not hasattr(other, "page"):
|
||||
return NotImplemented
|
||||
|
||||
other = cast("Annotation", other)
|
||||
selfpage = self.page if self.page != 0 else float("inf")
|
||||
otherpage = other.page if other.page != 0 else float("inf")
|
||||
|
||||
return selfpage < otherpage
|
||||
|
|
|
|||
|
|
@ -1,13 +1,8 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Protocol
|
||||
|
||||
import papis.api
|
||||
import papis.commands.edit
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.git
|
||||
import papis.logging
|
||||
import papis.notes
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.logging
|
||||
from papis.document import Document
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from importlib.util import find_spec
|
|||
import papis.logging
|
||||
|
||||
from papis_extract.extraction import Extractor
|
||||
from papis_extract.extractors import pdf, readera
|
||||
from papis_extract.extractors import pdf, readera, readest
|
||||
from papis_extract.extractors.pocketbook import PocketBookExtractor
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
|
@ -12,6 +12,7 @@ all_extractors: dict[str, Extractor] = {}
|
|||
|
||||
all_extractors["pdf"] = pdf.PdfExtractor()
|
||||
all_extractors["readera"] = readera.ReadEraExtractor()
|
||||
all_extractors["readest"] = readest.ReadestExtractor()
|
||||
|
||||
if find_spec("bs4") and find_spec("magic"):
|
||||
all_extractors["pocketbook"] = PocketBookExtractor()
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
# pyright: strict, reportMissingTypeStubs=false, reportUnknownMemberType=false
|
||||
import mimetypes
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple, cast
|
||||
|
||||
import Levenshtein
|
||||
import magic
|
||||
import papis.config
|
||||
import papis.logging
|
||||
import pymupdf as mu
|
||||
|
|
@ -25,7 +25,12 @@ class PdfExtractor:
|
|||
if not filename.is_file():
|
||||
logger.error(f"File {str(filename)} not readable.")
|
||||
return False
|
||||
return self._is_pdf(filename)
|
||||
|
||||
if not self._is_pdf(filename):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from a file.
|
||||
|
|
@ -72,7 +77,7 @@ class PdfExtractor:
|
|||
|
||||
def _is_pdf(self, fname: Path) -> bool:
|
||||
"""Check if file is a pdf, using mime type."""
|
||||
return magic.from_file(fname, mime=True) == "application/pdf"
|
||||
return mimetypes.guess_type(fname)[0] == "application/pdf"
|
||||
|
||||
def _get_annotation_content(
|
||||
self, page: mu.Page, annotation: mu.Annot
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
# pyright: strict, reportUnknownMemberType=false
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import magic
|
||||
import papis.logging
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
|
@ -12,7 +13,7 @@ logger = papis.logging.get_logger(__name__)
|
|||
|
||||
class PocketBookExtractor:
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if magic.from_file(filename, mime=True) != "text/xml":
|
||||
if not self._is_html(filename):
|
||||
return False
|
||||
|
||||
content = self._read_file(filename)
|
||||
|
|
@ -20,11 +21,16 @@ class PocketBookExtractor:
|
|||
return False
|
||||
|
||||
html = BeautifulSoup(content, features="xml")
|
||||
return bool(
|
||||
html.find(
|
||||
"meta", {"name": "generator", "content": "PocketBook Bookmarks Export"}
|
||||
)
|
||||
)
|
||||
if not html.find(
|
||||
"meta", {"name": "generator", "content": "PocketBook Bookmarks Export"}
|
||||
):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def _is_html(self, filename: Path) -> bool:
|
||||
return mimetypes.guess_type(filename)[0] == "text/html"
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from pocketbook html file.
|
||||
|
|
@ -43,16 +49,14 @@ class PocketBookExtractor:
|
|||
annotations: list[Annotation] = []
|
||||
for bm in html.select("div.bookmark"):
|
||||
content = str(
|
||||
(bm.select_one("div.bm-text>p") or html.new_string("")).text or "" # pyright: ignore [reportUnknownArgumentType]
|
||||
(bm.select_one("div.bm-text>p") or html.new_string("")).text or ""
|
||||
)
|
||||
note = str(
|
||||
(bm.select_one("div.bm-note>p") or html.new_string("")).text or "" # pyright: ignore [reportUnknownArgumentType]
|
||||
)
|
||||
page = int(
|
||||
(bm.select_one("p.bm-page") or html.new_string("")).text or 0 # pyright: ignore [reportUnknownArgumentType]
|
||||
(bm.select_one("div.bm-note>p") or html.new_string("")).text or ""
|
||||
)
|
||||
page = int((bm.select_one("p.bm-page") or html.new_string("")).text or 0)
|
||||
|
||||
el_classes = bm.attrs.get("class", "").split(" ")
|
||||
el_classes = cast("str", bm.attrs.get("class", "")).split(" ")
|
||||
color = (0, 0, 0)
|
||||
for c in el_classes:
|
||||
if "bm-color-" in c:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# pyright: strict, reportUnknownMemberType=false
|
||||
import mimetypes
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import magic
|
||||
import papis.logging
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
|
|
@ -17,7 +17,7 @@ class ReadEraExtractor:
|
|||
"""
|
||||
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if magic.from_file(filename, mime=True) != "text/plain":
|
||||
if not self._is_txt(filename):
|
||||
return False
|
||||
|
||||
content = self._read_file(filename)
|
||||
|
|
@ -36,19 +36,19 @@ class ReadEraExtractor:
|
|||
if not re.search(r"\n\*\*\*\*\*\n\n$", "".join(content)):
|
||||
return False
|
||||
|
||||
logger.debug(
|
||||
f"Found annotation file processable with ReadEraExtractor: {filename}"
|
||||
)
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def _is_txt(self, filename: Path) -> bool:
|
||||
return mimetypes.guess_type(filename)[0] == "text/plain"
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from readera txt file.
|
||||
|
||||
Returns all readable annotations contained in the file
|
||||
passed in, with highlights and notes if available.
|
||||
Could theoretically return the annotation color but I
|
||||
do not have access to a premium version of ReadEra so
|
||||
I cannot add this feature.
|
||||
Returns all readable annotations contained in the file passed in, with
|
||||
highlights and notes if available. Could theoretically return the
|
||||
annotation color but I do not have access to a premium version of
|
||||
ReadEra so I cannot add this feature.
|
||||
"""
|
||||
content = self._read_file(filename)[2:]
|
||||
if not content:
|
||||
|
|
@ -56,7 +56,9 @@ class ReadEraExtractor:
|
|||
|
||||
annotations: list[Annotation] = []
|
||||
|
||||
split = "\n".join(content).split("\n*****\n")
|
||||
# split for *** separators and remove the last entry since it is always
|
||||
# empty
|
||||
split = "\n".join(content).split("\n*****\n")[:-1]
|
||||
note_pattern = re.compile(r"\n--.*")
|
||||
for entry in split:
|
||||
entry = entry.strip()
|
||||
|
|
|
|||
96
papis_extract/extractors/readest.py
Normal file
96
papis_extract/extractors/readest.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
# pyright: strict, reportUnknownMemberType=false
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import papis.logging
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
ACCEPTED_EXTENSIONS = [".txt", ".md", ".qmd", ".rmd"]
|
||||
TEXTCHARS = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
|
||||
|
||||
|
||||
class ReadestExtractor:
|
||||
"""Extracts exported annotations from the FOSS Readest book reading app.
|
||||
|
||||
https://readest.com/
|
||||
"""
|
||||
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if not self._is_readable_text(filename):
|
||||
return False
|
||||
|
||||
content = self._read_file(filename)
|
||||
if not content:
|
||||
return False
|
||||
|
||||
# look for star-shaped divider pattern
|
||||
if not re.search(
|
||||
r"\n\*\*Exported from Readest\*\*: \d{4}-\d{2}-\d{2}\n", "".join(content)
|
||||
):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def _is_readable_text(self, filename: Path) -> bool:
|
||||
"""Checks whether a file has a valid text extension and is not a binary file.
|
||||
|
||||
A file is considered a valid text file if its extension is in
|
||||
:data:`ACCEPTED_EXTENSIONS` and does not contain any non-text characters.
|
||||
|
||||
:returns: A boolean indicating whether the file is a valid text file.
|
||||
"""
|
||||
if filename.suffix not in ACCEPTED_EXTENSIONS:
|
||||
return False
|
||||
try:
|
||||
with filename.open("rb") as rb:
|
||||
return not bool(rb.read(1024).translate(None, TEXTCHARS))
|
||||
except (FileNotFoundError, PermissionError):
|
||||
return False
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from readest txt file.
|
||||
|
||||
Returns all readable annotations contained in the file passed in, with
|
||||
highlights and notes if available.
|
||||
"""
|
||||
content = self._read_file(filename)[2:]
|
||||
if not content:
|
||||
return []
|
||||
|
||||
annotations: list[Annotation] = []
|
||||
|
||||
for i, line in enumerate(content):
|
||||
entry_content: str = ""
|
||||
entry_note: str = ""
|
||||
if line.startswith("> "):
|
||||
entry_content = line.lstrip('> "').rstrip('\n" ')
|
||||
nextline = content[i + 1]
|
||||
if nextline.startswith("**Note**:: "):
|
||||
entry_note = nextline.removeprefix("**Note**:: ").strip()
|
||||
|
||||
a = Annotation(
|
||||
file=str(filename),
|
||||
content=entry_content,
|
||||
note=entry_note,
|
||||
# NOTE: Unfortunately Readest currently does not export color information
|
||||
# color=color,
|
||||
)
|
||||
annotations.append(a)
|
||||
|
||||
logger.debug(
|
||||
f"Found {len(annotations)} "
|
||||
f"{'annotation' if len(annotations) == 1 else 'annotations'} for {filename}."
|
||||
)
|
||||
return annotations
|
||||
|
||||
def _read_file(self, filename: Path) -> list[str]:
|
||||
try:
|
||||
with filename.open("r") as fr:
|
||||
return fr.readlines()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Could not open file {filename} for extraction.")
|
||||
return []
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
[project]
|
||||
authors = [{ name = "Marty Oehme", email = "contact@martyoeh.me" }]
|
||||
license = { text = "GPL-3.0-or-later" }
|
||||
requires-python = "<4.0,>=3.11"
|
||||
requires-python = "<4.0,>=3.10"
|
||||
dependencies = [
|
||||
"pymupdf<2.0.0,>=1.24.2",
|
||||
"levenshtein<1.0.0,>=0.25.1",
|
||||
|
|
@ -22,7 +22,6 @@ keywords = [
|
|||
"bibliography",
|
||||
"reference manager",
|
||||
"research",
|
||||
"science",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
|
@ -63,3 +62,11 @@ build-backend = "hatchling.build"
|
|||
|
||||
[tool.pyright]
|
||||
typeCheckingMode = "strict"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
filterwarnings = [
|
||||
# see https://github.com/swig/swig/issues/2881#issuecomment-2877988848
|
||||
# should hopefully be redundant with swig-4.4
|
||||
"ignore:.*builtin type SwigPy\\w* has no __module__ attribute.*:DeprecationWarning",
|
||||
"ignore:.*builtin type swigvarlink has no __module__ attribute.*:DeprecationWarning"
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,25 +0,0 @@
|
|||
The Circle of the Snake
|
||||
Grafton Tanner
|
||||
|
||||
digital technologies of the twenty-first century can only exist thanks to this kind of outsourced labor. The relative invisibility of the tech supply chain is part of the ruse; American consumers do not see where smartphones come from. They do not see the conflict zones where coltan is mined to be used in electronic devices, or the sweatshops in which digital products are manufactured. The latest technologies arrive instead in pristine condition, as if delivered from on high.
|
||||
|
||||
*****
|
||||
|
||||
We don’t necessarily want our leaders to be average persons like us, even though we often enjoy hearing that famous celebrities eat the same fast food as regular people. But in the beginning of the twenty-first century, we carefully watch our public figures to ensure they do not commit an unconscionable act. Doing so helps to rid the public stage of bigots, even as it also threatens the last known walls of privacy. This is an inevitable tension that must be maintained as we open the door on the private lives of others
|
||||
--We continuously demystify our leaders - first through television, now through social media, streams, docus, etc. this brings them down to our level, which is what this paragraph talks about
|
||||
|
||||
*****
|
||||
|
||||
Initially, the Internet was praised as a freer way to encounter information. In the early 1990s, digital theorist George Landow saw hypertext as a liberatory reading strategy.¹⁶ He embraced it as a common good and thought hyperlinks would emancipate readers from the prison of the fixed word, allowing them to flow freely between various information sources. But what Landow never imagined is the exhaustion that could come from the endless, rootless process of reading in this way. Not everyone wants to jump from point to point without any center. Infinity might not always be alluring.
|
||||
Landow puts the agency in the reader when he writes, ‘anyone who uses hypertext makes his or her own interests the de facto organizing principle (or center) for the investigation at the moment.’¹⁷ But how much agency do we actually have when falling down the rabbit hole? What if we get lost in the hole where the center was? How much attention do we end up losing in a world where we must always multi-task and where reading itself is disrupted by one hyperlink after another? And who
|
||||
|
||||
*****
|
||||
|
||||
In the Dust of This Planet
|
||||
Horror of Philosophy vol. 1
|
||||
Eugene Thacker
|
||||
In the first of a series of three books on the Horror of Philosophy,In the Dust of This Planetoffers the genre of horror as a way of thinking about the unthinkable.
|
||||
Paperback: 978-1-84694-676-9 ebook: 978-1-78099-010-1
|
||||
|
||||
*****
|
||||
|
||||
40
tests/extractors/test_readera.py
Normal file
40
tests/extractors/test_readera.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
from pathlib import Path
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.extractors.readera import ReadEraExtractor
|
||||
|
||||
valid_file = Path("tests/resources/ReadEra_sample.txt")
|
||||
invalid_file = Path("tests/resources/Readest_sample.txt")
|
||||
|
||||
expected = [
|
||||
Annotation(
|
||||
file="tests/resources/ReadEra_sample.txt",
|
||||
content="digital technologies of the twenty-first century can only exist thanks to this kind of outsourced labor. The relative invisibility of the tech supply chain is part of the ruse; American consumers do not see where smartphones come from.",
|
||||
),
|
||||
Annotation(
|
||||
file="tests/resources/ReadEra_sample.txt",
|
||||
content="We don’t necessarily want our leaders to be average persons like us, even though we often enjoy hearing that famous celebrities eat the same fast food as regular people. ",
|
||||
note="We continuously demystify our leaders - first through television, now through social media",
|
||||
),
|
||||
Annotation(
|
||||
file="tests/resources/ReadEra_sample.txt",
|
||||
content="Initially, the Internet was praised as a freer way to encounter information. In the early 1990s, digital theorist George Landow saw hypertext as a liberatory reading strategy.",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_identifies_readera_exports():
|
||||
ex = ReadEraExtractor()
|
||||
assert ex.can_process(valid_file)
|
||||
|
||||
|
||||
# Readest exports are very similar so we should ensure it ignores them
|
||||
def test_ignores_readest_exports():
|
||||
ex = ReadEraExtractor()
|
||||
assert not ex.can_process(invalid_file)
|
||||
|
||||
|
||||
def test_entry_extractions():
|
||||
ex = ReadEraExtractor()
|
||||
result = ex.run(valid_file)
|
||||
assert result == expected
|
||||
17
tests/resources/ReadEra_sample.txt
Normal file
17
tests/resources/ReadEra_sample.txt
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
The Circle of the Snake
|
||||
Grafton Tanner
|
||||
|
||||
digital technologies of the twenty-first century can only exist thanks to this kind of outsourced labor. The relative invisibility of the tech supply chain is part of the ruse; American consumers do not see where smartphones come from.
|
||||
|
||||
*****
|
||||
|
||||
We don’t necessarily want our leaders to be average persons like us, even though we often enjoy hearing that famous celebrities eat the same fast food as regular people.
|
||||
--We continuously demystify our leaders - first through television, now through social media
|
||||
|
||||
*****
|
||||
|
||||
Initially, the Internet was praised as a freer way to encounter information.
|
||||
In the early 1990s, digital theorist George Landow saw hypertext as a liberatory reading strategy.
|
||||
|
||||
*****
|
||||
|
||||
70
tests/resources/Readest_sample.txt
Normal file
70
tests/resources/Readest_sample.txt
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# Ideology and the Virtual City
|
||||
**Author**:
|
||||
|
||||
**Exported from Readest**: 2025-09-10
|
||||
|
||||
---
|
||||
|
||||
## Highlights & Annotations
|
||||
|
||||
### 5 No More Heroes: The City as Wasteland
|
||||
> "As an ideological response, this “escapist defeatism” contains elements of cynicism, in that it also involves an outward rejection of normal social demands and a pessimistic outlook. The difference between it and cynical self-interest, however, is that the defeatist doesn’t still want to thrive within the existing order. Whereas cynical self-interest distances behaviour from moral values only to really enjoy following dominant demands after all, the defeatist is less excited by regular notions of success, but cannot imagine a way out. I believe this position embodies various features of what Mark Fisher calls “capitalist realism,” which is less about competitive spirit or “making it” and more a kind of depressed state of low expectation within a totalizing capitalist reality."
|
||||
|
||||
> "As an ideological response, this “escapist defeatism” contains elements of cynicism, in that it also involves an outward rejection of normal social demands and a pessimistic outlook. The difference between it and cynical self-interest, however, is that the defeatist doesn’t still want to thrive within the existing order. Whereas cynical self-interest distances behaviour from moral values only to really enjoy following dominant demands after all, the defeatist is less excited by regular notions of success, but cannot imagine a way out. I believe this position embodies various features of what Mark Fisher calls “capitalist realism,” which is less about competitive spirit or “making it” and more a kind of depressed state of low expectation within a totalizing capitalist reality."
|
||||
**Note**:: Opposing cynical defeatism to cynical self-interest
|
||||
|
||||
> "As he says, this modern depression “is constituted not by an inability to get pleasure so much as it is by an inability to do anything else except pursue pleasure. There is a sense that ‘something is missing’—but no appreciation that this mysterious, missing enjoyment can only be accessed beyond the pleasure principle.”22 And of course, the obsessive retreat into consumer pleasures only connects the defeatist even more deeply to the depressing reality, as it becomes part of the work-leisure cycle in which body and mind are allowed to relax, only to be rejuvenated for more draining labour. Even in strictly economic terms, if the escape is all we live for it still must be funded through labour; the only way to get more escape time is to return to the prison and earn it."
|
||||
**Note**:: cf. Mark Fisher
|
||||
|
||||
> "As he says, this modern depression “is constituted not by an inability to get pleasure so much as it is by an inability to do anything else except pursue pleasure. There is a sense that ‘something is missing’—but no appreciation that this mysterious, missing enjoyment can only be accessed beyond the pleasure principle.”22 And of course, the obsessive retreat into consumer pleasures only connects the defeatist even more deeply to the depressing reality, as it becomes part of the work-leisure cycle in which body and mind are allowed to relax, only to be rejuvenated for more draining labour. Even in strictly economic terms, if the escape is all we live for it still must be funded through labour; the only way to get more escape time is to return to the prison and earn it."
|
||||
|
||||
> "His position then reflects the idea developed by Žižek through the terminology of psychoanalyst Jacques Lacan that, in placing increasing emphasis on personal responsibility for success and enjoyment, neoliberalized societies appear to have no “big Other,” or no generalized symbolic guarantee of meaning that explicitly prescribes standards of behaviour. With no singular authoritative voice to aid our decision-making, the attraction of consumerist distractions is perhaps unsurprising, but as the seductive promises of consumer entertainment remain unfulfilled, the pressure remains on us to take responsibility for this dissatisfaction as well. Neoliberalism’s constant demand to do more, without telling us what to do or where we might find it, means we are always guilty of failure, and can’t really identify once and for all what we wish to become"
|
||||
|
||||
> "NMH highlights how consumerist escapist fantasies must be funded by the very normality we wish to escape, and the deeper the escape the more funding is required. The paradox is represented superbly in the kind of jobs Travis must do between matches. While there are extra contract killing jobs on offer, the game only grants access to these after the player completes more menial tasks taken from the local job centre, from picking coconuts and mowing lawns to collecting litter and cleaning graffiti. At these points, Travis is plunged back into an ideological normalcy where work is deemed valuable and fulfilling"
|
||||
|
||||
> "NMH highlights how consumerist escapist fantasies must be funded by the very normality we wish to escape, and the deeper the escape the more funding is required. The paradox is represented superbly in the kind of jobs Travis must do between matches. While there are extra contract killing jobs on offer, the game only grants access to these after the player completes more menial tasks taken from the local job centre, from picking coconuts and mowing lawns to collecting litter and cleaning graffiti. At these points, Travis is plunged back into an ideological normalcy where work is deemed valuable and fulfilling"
|
||||
**Note**:: 'No more heroes'
|
||||
|
||||
> "“something deeper” may be seen in psychoanalytic terms as “death drive,” or a kind of gap in our subjective identities that means we are never quite fully reconciled with ourselves. It is an inevitable sense of uncertainty that emerges because there is no final truth about what things mean, only different understandings that emerge in human consciousness through language. No matter how knowledgeable we are, each of us only has our own perspective and cannot know the innermost feelings and thoughts of others. Moreover, our own deepest desires are not even compatible (for example, the desires for risk and security), so that satisfying one may leave us unsatisfied in another way. In short, nothing is ever really, conclusively fulfilling, and our ideological fantasies which make us feel that there is some absolute meaning merely distract us from that realization."
|
||||
|
||||
---
|
||||
|
||||
### 6 Persona 5: The City as Prison
|
||||
> "missing here is any appreciation of the extent to which neoliberalized political, financial, legal and cultural institutions themselves cause social disengagement, or how the logic of profit actively works against deeper democratic participation. In other words, it does not ask whether democracy may itself be problematic in its existing form, or whether people might be politically disengaged because of privatized media’s tendency to trivialize politics, the influence of corporate money on political parties, or the excessive demands on other aspects of their lives. Indeed, meaningful political change is unlikely, as Jodi Dean points out, when “the appeal to democracy presupposes democracy is the solution to the problems of democracy, because it incorporates in advance any hope things might be otherwise as already the fundamental democratic promise and provision.”25 "
|
||||
|
||||
> "one is represented by the general public, who are shown to be uncommitted, fickle, apathetic and ignorant individuals that may want a just society, but take no responsibility for it. In what Žižek calls the “culture of complaint” in modern societies, they constantly ask some authority figure to make life better, and even “enjoy” playing the perpetual victim, effectively recognizing themselves as subordinates, rather than as agents who could actually affect change. Likewise, in P5, public opinion first imagines the Phantom Thieves as saviours, but later disowns them when they appear to have become too radical, and when Shido promises to restore order through conventional political means."
|
||||
|
||||
---
|
||||
|
||||
### 7 Conclusion: Conformism and Critique
|
||||
> "“hedonism,” in the context of neoliberalism I am thinking of a lifestyle that focuses substantially on consumerist entertainment or pleasure. This hedonist need not be an extreme thrill seeker or a wild party-goer. It would be equally “hedonistic,” in the sense I am defining here, for someone to fixate on what they might want for dinner every night, be genuinely excited about seeing the latest superhero movie, or constantly want to go shopping. In this way, for this hedonist, work is predominantly a means to an end that must be done to pay for enjoyment. The hedonist wouldn’t think twice about stopping work if they came into a large amount of money, but until then work is on balance a good thing, because it enables the purchase of pleasurable goods and services. At the same time, many big social issues, including politics, would appear boring to the hedonist. It is of course better when the economy is thriving, but it makes more sense to leave that stuff to the experts. In all this, such hedonism is not necessarily motivated by pure self-interest. Rather, it involves lines of rationalization based on neoliberal ideas, including consumerist concepts of identity creation but also an understanding of freedom in individual terms and a notion that there are no political alternatives."
|
||||
|
||||
> "while it recognizes the hypocrisy when powerful figures claim to be acting in the best interests of all, it has no time for radical critique, whether left or right, that actually seeks significant changes to the status quo. Instead, the cynic wants things to stay as they are and to exploit them to his/her advantage. This self-interest is supported by pragmatic “realism.” The market is flawed, so the argument goes, but better than the alternative; at least we aren’t subjected to the oppressive and broken systems of, say, Soviet Russia or North Korea. We must remember that humans are a selfish and aggressive bunch, and grand social projects always fail, so we should make the most of what we have."
|
||||
**Note**:: Cynical self-interest
|
||||
|
||||
> "while it recognizes the hypocrisy when powerful figures claim to be acting in the best interests of all, it has no time for radical critique, whether left or right, that actually seeks significant changes to the status quo. Instead, the cynic wants things to stay as they are and to exploit them to his/her advantage. This self-interest is supported by pragmatic “realism.” The market is flawed, so the argument goes, but better than the alternative; at least we aren’t subjected to the oppressive and broken systems of, say, Soviet Russia or North Korea. We must remember that humans are a selfish and aggressive bunch, and grand social projects always fail, so we should make the most of what we have."
|
||||
|
||||
> "it obscures that many people in the world still do have it bad, and that their situation is deeply connected to the wealth and freedoms elsewhere, so could improve under a different social order. Here, the cynic may claim that nothing can be done about this, or that someone somewhere always gets a raw deal, so we may as well just enjoy our luck. What is still missing, however, is any consideration that “human nature” has other aspects and adapts to different conditions, or that humans also share and act collectively, and are more likely to do so in more equal societies, where everyone has a stake and a reasonable quality of life."
|
||||
**Note**:: Cynical self-interest
|
||||
|
||||
> "it obscures that many people in the world still do have it bad, and that their situation is deeply connected to the wealth and freedoms elsewhere, so could improve under a different social order. Here, the cynic may claim that nothing can be done about this, or that someone somewhere always gets a raw deal, so we may as well just enjoy our luck. What is still missing, however, is any consideration that “human nature” has other aspects and adapts to different conditions, or that humans also share and act collectively, and are more likely to do so in more equal societies, where everyone has a stake and a reasonable quality of life."
|
||||
|
||||
> "Central to cynicism is an idea that it rejects ideology for rationality, because ideologies are dogmatic and lead to authoritarianism. But with this alibi it fails to account for its own presumption of neoliberal ideas, particularly its onus on individual self-sufficiency and the impossibility of even potentially workable political or economic alternatives"
|
||||
|
||||
> "In “escapist defeatism” there is also recognition of social problems and also a notion that not much can be done about them, at least by the average citizen. It does perhaps entail more of a historical dimension than cynical self-interest, in focusing less on human nature and more on the way things are now, under a seemingly omnipotent global capitalist system. Moreover, for the defeatist, taking advantage of the situation isn’t the most attractive option, because dominant notions of success simply don’t seem that satisfying, or because an outward pursuit of self-interest remains ethically abhorrent."
|
||||
|
||||
> "Such media consumption goes beyond the throwaway pleasures of hedonism to a deeper identification with the chosen material, even if, deep down, the defeatist knows it is meaningless"
|
||||
|
||||
> "how heavily tied they are to the economy, forcing the individuals concerned back into the normalcy of work-leisure routines to financially maintain their obsession. Another possible aspect of this lifestyle is a kind of victim complex, in which the powers that be are held responsible for life’s difficulties, but cannot be resisted, only mocked or angrily reproached. Either way, the defeatist does not take responsibility for his/her part in reproducing the status quo through excessive consumerist habits, or make any attempt to organize politically and create change."
|
||||
|
||||
> "he answer is somewhere in between: the system works in principle but needs attention if it’s ever going to run smoothly enough to produce a proper meritocracy. What is required, therefore, is collective vigilance and effort to get involved in improving the system, to stop anyone from having an overly corruptive impact. In short, no major overhaul is required, merely greater commitment to properly enforce the concepts of justice and equal opportunity that already exist. This position involves strong political commitment and a good working knowledge of existing systems. But it also implies strict adherence to a particular set of principles, specifically ones that do not question the actual political, social and economic institutions that are currently in place. In addition, the very idea of “making it” inscribed into its meritocratic ideal is limited by neoliberal notions of success: fulfilling career potentials, accumulating property, developing personal relationships and expressing identities through consumerist"
|
||||
**Note**:: Political reformism
|
||||
|
||||
> "he answer is somewhere in between: the system works in principle but needs attention if it’s ever going to run smoothly enough to produce a proper meritocracy. What is required, therefore, is collective vigilance and effort to get involved in improving the system, to stop anyone from having an overly corruptive impact. In short, no major overhaul is required, merely greater commitment to properly enforce the concepts of justice and equal opportunity that already exist. This position involves strong political commitment and a good working knowledge of existing systems. But it also implies strict adherence to a particular set of principles, specifically ones that do not question the actual political, social and economic institutions that are currently in place. In addition, the very idea of “making it” inscribed into its meritocratic ideal is limited by neoliberal notions of success: fulfilling career potentials, accumulating property, developing personal relationships and expressing identities through consumerist"
|
||||
|
||||
> "It does not consider whether these aims are really universal, or whether some people may have valid alternate desires that would require extensive social change to realize. Nor does it question whether meritocracy is fair in the first place, or whether people who simply lack marketable personal attributes deserve a lesser standard of life."
|
||||
|
||||
> "Such a perspective may, for example, contemplate whether the kinds of work we are generally obliged to do are socially necessary or personally fulfilling. It may ask whether the demand to “enjoy responsibly” by balancing ever growing work and consumer pressures actually constitutes a meaningful or coherent life goal, or whether alternative goals could be more satisfactory. It can also imagine the possibility of greater political engagement from the majority of people, and what it means to take collective responsibility for social conditions, rather than following individual desires. And if all these ideas are framed in relation to neoliberal capitalist systems of economics, politics and culture, they can even consider whether the systems as such produce demands that are inherently unrealistic, antagonistic and socially destructive.
|
||||
The specific questions that then arise from such critical thinking are many and varied. For example, are there even enough jobs, especially satisfying, well-paid jobs, for everyone to do today? Is throwaway mass consumerism environmentally sustainable? Does an individualized focus on employment and consumerism detract from more meaningful social activity? Are the demands of globalized markets compatible with free and effective political decision-making? To what extent does corporate advertising, PR and lobbying help frame our political desires? And how might we need to change society to provide the kind of meaning, involvement and satisfaction we desire? If some of these questions seem overly radical even for progressives who perceive a need to address existing social problems, it should be clear by now that getting to the root of these problems may require re-evaluation of even the most deeply embedded assumptions about how societies work and how they could work."
|
||||
|
||||
---
|
||||
|
|
@ -4,6 +4,42 @@ from papis.document import Document
|
|||
from papis_extract.annotation import Annotation
|
||||
|
||||
|
||||
def test_value_inequality_comparison():
|
||||
sut = Annotation("myfile", content="Here be content!", note="and a note")
|
||||
other = Annotation(
|
||||
"myfile", content="Here be different content!", note="but still a note"
|
||||
)
|
||||
assert sut != other
|
||||
|
||||
|
||||
def test_oder_lt_comparison():
|
||||
sut = Annotation("myfile", content="Here be content!", note="and a note", page=2)
|
||||
other = Annotation(
|
||||
"myfile", content="Here be different content!", note="but still a note", page=10
|
||||
)
|
||||
assert sut < other
|
||||
|
||||
|
||||
def test_oder_ge_comparison():
|
||||
big = Annotation("mf", content="ct", note="nt", page=10)
|
||||
small = Annotation("mf", content="ct", note="nt", page=2)
|
||||
alsosmall = Annotation("mf", content="ct", note="nt", page=2)
|
||||
assert big >= small
|
||||
assert small >= alsosmall
|
||||
|
||||
|
||||
def test_oder_gt_comparison_single_nopage():
|
||||
zeropage = Annotation("mf", content="ct", note="nt", page=0)
|
||||
small = Annotation("mf", content="ct", note="nt", page=2)
|
||||
assert zeropage > small
|
||||
|
||||
|
||||
def test_oder_le_comparison_all_nopage():
|
||||
zeropage = Annotation("mf", content="ct", note="nt", page=0)
|
||||
small = Annotation("mf", content="ct", note="nt", page=0)
|
||||
assert zeropage <= small
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fmt_string,expected",
|
||||
[
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from papis.document import Document
|
||||
from papis_extract.annotation import Annotation
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import (
|
||||
format_count,
|
||||
format_csv,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue