Compare commits
23 commits
7a69bd509f
...
4e5a9e92a7
| Author | SHA1 | Date | |
|---|---|---|---|
| 4e5a9e92a7 | |||
| 528052967f | |||
| 1dd00adaee | |||
| dfc3bdea70 | |||
| 7459fbeb0b | |||
| 30bc8452fa | |||
| d312f75655 | |||
| b2ce6023a2 | |||
| 729b6aa62a | |||
| 9c27ea1f6f | |||
| 04bc256a16 | |||
| 764b3204a1 | |||
| e46219151b | |||
| ff36d30f91 | |||
| f5455b6946 | |||
| 96cd4929c9 | |||
| a854ef00d6 | |||
| f7801365f0 | |||
| e90a123f88 | |||
| 5f01aa1f2b | |||
| 3ef45e24f7 | |||
| 5350b9215e | |||
| 1f65317d65 |
25 changed files with 1145 additions and 529 deletions
|
|
@ -1,16 +1,16 @@
|
|||
steps:
|
||||
lint_ruff:
|
||||
image: python
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- pip install ruff
|
||||
- python --version && poetry --version && ruff --version
|
||||
- uv tool install ruff
|
||||
- python --version && uv --version && ruff --version
|
||||
- echo "----------------- running ruff lint ------------------"
|
||||
- ruff check .
|
||||
|
||||
lint_black:
|
||||
image: python
|
||||
format_ruff:
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- pip install black
|
||||
- python --version && poetry --version && black --version
|
||||
- echo "----------------- running black lint ----------------"
|
||||
- black --check .
|
||||
- uv tool install ruff
|
||||
- python --version && uv --version && ruff --version
|
||||
- echo "----------------- running ruff format ------------------"
|
||||
- ruff format --check .
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
steps:
|
||||
pyright:
|
||||
image: nikolaik/python-nodejs
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- npm install --global pyright
|
||||
- uv sync
|
||||
- python --version && uv version && pyright --version
|
||||
- uv tool install pyright
|
||||
- uv sync --locked
|
||||
- python --version && uv --version && pyright --version
|
||||
- echo "------------- running pyright typecheck -------------"
|
||||
- uv run pyright
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ when:
|
|||
|
||||
steps:
|
||||
pytest:
|
||||
image: nikolaik/python-nodejs
|
||||
image: ghcr.io/astral-sh/uv:python3.11-trixie-slim
|
||||
commands:
|
||||
- uv sync
|
||||
- python --version && uv version
|
||||
- uv sync --locked
|
||||
- python --version && uv --version
|
||||
- echo "------------- running pytest -------------"
|
||||
- uv run pytest
|
||||
|
|
|
|||
57
CHANGELOG.md
Normal file
57
CHANGELOG.md
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
### Changed
|
||||
|
||||
### Fixed
|
||||
|
||||
### Removed
|
||||
|
||||
## [0.2.1]
|
||||
|
||||
### Added
|
||||
|
||||
- Add option to force-add duplicated annotations
|
||||
- Add cli option to choose extractor
|
||||
- Add CSV formatter
|
||||
- Add count formatter (displays the annotation count per item)
|
||||
|
||||
### Changed
|
||||
|
||||
- Switch to uv packaging and hatch backend
|
||||
|
||||
### Fixed
|
||||
|
||||
- Only inform if no extractor finds valid files
|
||||
- Respect minimum color similarity option
|
||||
|
||||
## [0.2.0]
|
||||
|
||||
### Added
|
||||
|
||||
- Add pocketbook extractor (requires BeautifulSoup4)
|
||||
- Add ReadEra extractor
|
||||
- Allow different formatting for first format entry
|
||||
- Add Markdown style formatting
|
||||
- Add stdout or write to note exporters
|
||||
|
||||
### Changed
|
||||
|
||||
- Update dependencies
|
||||
- Update to papis 0.14
|
||||
- Refactor and simplify test dependencies
|
||||
|
||||
## [0.1.0]
|
||||
|
||||
### Added
|
||||
|
||||
- Add extractor and install info
|
||||
- Add pdf extractor
|
||||
- Allow cli option for choosing a template
|
||||
- Add mustache templating
|
||||
- Add preliminary README
|
||||
|
|
@ -5,7 +5,7 @@
|
|||

|
||||
-->
|
||||
|
||||
Quickly extract annotations from your pdf files with the help of the [papis](https://github.com/papis/papis) bibliography manager.\
|
||||
Quickly extract annotations from your files with the help of the [papis](https://github.com/papis/papis) bibliography manager.\
|
||||
Easily organize all your highlights and thoughts next to your documents and references.\
|
||||
|
||||
## Installation
|
||||
|
|
|
|||
|
|
@ -1,17 +1,19 @@
|
|||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.exporter import Exporter
|
||||
|
||||
import click
|
||||
import papis.cli
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.logging
|
||||
import papis.notes
|
||||
import papis.strings
|
||||
from papis.document import Document
|
||||
|
||||
from papis_extract import extraction
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.exporter import Exporter
|
||||
from papis_extract.exporters import all_exporters
|
||||
from papis_extract.extractors import all_extractors
|
||||
from papis_extract.formatter import Formatter, formatters
|
||||
|
|
@ -71,8 +73,8 @@ papis.config.register_default_settings(DEFAULT_OPTIONS)
|
|||
help="Choose which input formats to gather annotations from. [default: all]",
|
||||
)
|
||||
@click.option(
|
||||
"--force/--no-force",
|
||||
"-f",
|
||||
"--duplicates/--no-duplicates",
|
||||
"-d",
|
||||
help="Do not drop any annotations because they already exist.",
|
||||
show_default=True,
|
||||
)
|
||||
|
|
@ -88,7 +90,7 @@ def main(
|
|||
extractors: list[str],
|
||||
output: str,
|
||||
git: bool,
|
||||
force: bool,
|
||||
duplicates: bool,
|
||||
) -> None:
|
||||
"""Extract annotations from any documents.
|
||||
|
||||
|
|
@ -118,7 +120,7 @@ def main(
|
|||
git=git,
|
||||
formatter=formatter,
|
||||
extractors=[all_extractors.get(e) for e in extractors],
|
||||
force=force,
|
||||
duplicates=duplicates,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -129,7 +131,7 @@ def run(
|
|||
edit: bool = False,
|
||||
write: bool = False,
|
||||
git: bool = False,
|
||||
force: bool = False,
|
||||
duplicates: bool = False,
|
||||
) -> None:
|
||||
exporter: Exporter
|
||||
if write:
|
||||
|
|
@ -137,7 +139,7 @@ def run(
|
|||
formatter=formatter or formatters["markdown-atx"],
|
||||
edit=edit,
|
||||
git=git,
|
||||
force=force,
|
||||
duplicates=duplicates,
|
||||
)
|
||||
else:
|
||||
exporter = all_exporters["stdout"](
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional, cast
|
||||
from types import NotImplementedType
|
||||
from typing import Any, cast
|
||||
|
||||
import chevron
|
||||
import papis.config
|
||||
|
|
@ -21,7 +21,6 @@ COLORS: dict[str, tuple[float, float, float]] = {
|
|||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Annotation:
|
||||
"""A PDF annotation object.
|
||||
|
||||
|
|
@ -117,7 +116,7 @@ class Annotation:
|
|||
return color_mapping.get(colorname, "")
|
||||
|
||||
# mimics the functions in papis.config.{getlist,getint,getfloat} etc.
|
||||
def _getdict(self, key: str, section: Optional[str] = None) -> dict[str, str]:
|
||||
def _getdict(self, key: str, section: str | None = None) -> dict[str, str]:
|
||||
"""Dict getter
|
||||
|
||||
:returns: A python dict
|
||||
|
|
@ -126,19 +125,55 @@ class Annotation:
|
|||
"""
|
||||
rawvalue: Any = papis.config.general_get(key, section=section)
|
||||
if isinstance(rawvalue, dict):
|
||||
return cast(dict[str, str], rawvalue)
|
||||
return cast("dict[str, str]", rawvalue)
|
||||
try:
|
||||
rawvalue = eval(rawvalue)
|
||||
except Exception:
|
||||
raise SyntaxError(
|
||||
"The key '{}' must be a valid Python object: {}".format(key, rawvalue)
|
||||
f"The configuration key '{key}' must be a valid Python dict: {rawvalue}"
|
||||
)
|
||||
else:
|
||||
if not isinstance(rawvalue, dict):
|
||||
raise SyntaxError(
|
||||
"The key '{}' must be a valid Python dict. Got: {} (type {!r})".format(
|
||||
key, rawvalue, type(rawvalue).__name__
|
||||
)
|
||||
f"The configuration key '{key}' must be a valid Python dict. Got: {rawvalue} (type {type(rawvalue).__name__})"
|
||||
)
|
||||
|
||||
return cast(dict[str, str], rawvalue)
|
||||
return cast("dict[str, str]", rawvalue)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Annotation({self.type}: '{self.file}', color: {self.color}, tag: '{self.tag}', page: {self.page}, content: '{self.content}', note: '{self.note}', minimum_similarity_color: {self.minimum_similarity_color})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Annotation(type={self.type}, file='{self.file}', color={self.color}, tag='{self.tag}', page={self.page}, content='{self.content}', note='{self.note}', minimum_similarity_color={self.minimum_similarity_color})"
|
||||
|
||||
def __eq__(self, other: object) -> bool | NotImplementedType:
|
||||
if not isinstance(other, Annotation):
|
||||
return NotImplemented
|
||||
|
||||
return (
|
||||
self.content.lower(),
|
||||
self.note.lower(),
|
||||
self.type,
|
||||
self.file,
|
||||
self.color,
|
||||
self.tag,
|
||||
self.page,
|
||||
) == (
|
||||
other.content.lower(),
|
||||
other.note.lower(),
|
||||
other.type,
|
||||
other.file,
|
||||
other.color,
|
||||
other.tag,
|
||||
other.page,
|
||||
)
|
||||
|
||||
def __lt__(self, other: object) -> bool:
|
||||
if not hasattr(other, "page"):
|
||||
return NotImplemented
|
||||
|
||||
other = cast("Annotation", other)
|
||||
selfpage = self.page if self.page != 0 else float("inf")
|
||||
otherpage = other.page if other.page != 0 else float("inf")
|
||||
|
||||
return selfpage < otherpage
|
||||
|
|
|
|||
|
|
@ -1,13 +1,8 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Protocol
|
||||
|
||||
import papis.api
|
||||
import papis.commands.edit
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.git
|
||||
import papis.logging
|
||||
import papis.notes
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
|
|
@ -20,7 +15,7 @@ class Exporter(Protocol):
|
|||
formatter: Formatter
|
||||
edit: bool = False
|
||||
git: bool = False
|
||||
force: bool = False
|
||||
duplicates: bool = False
|
||||
|
||||
def run(
|
||||
self, annot_docs: list[tuple[papis.document.Document, list[Annotation]]]
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ from papis_extract.exporter import Exporter
|
|||
from papis_extract.exporters.notes import NotesExporter
|
||||
from papis_extract.exporters.stdout import StdoutExporter
|
||||
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
all_exporters: dict[str, type[Exporter]] = {}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,17 @@
|
|||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import Levenshtein
|
||||
from papis.document import Document
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
from papis.logging import get_logger
|
||||
import papis.notes
|
||||
import papis.commands.edit
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.git
|
||||
import papis.config
|
||||
import papis.commands.edit
|
||||
import papis.notes
|
||||
from papis.document import Document
|
||||
from papis.logging import get_logger
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import Formatter
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
|
@ -18,7 +21,7 @@ class NotesExporter:
|
|||
formatter: Formatter
|
||||
edit: bool = False
|
||||
git: bool = False
|
||||
force: bool = False
|
||||
duplicates: bool = False
|
||||
|
||||
def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None:
|
||||
"""Write annotations into document notes.
|
||||
|
|
@ -33,7 +36,9 @@ class NotesExporter:
|
|||
doc, annots, first=True
|
||||
).split("\n")
|
||||
if formatted_annotations:
|
||||
self._add_annots_to_note(doc, formatted_annotations, force=self.force)
|
||||
self._add_annots_to_note(
|
||||
doc, formatted_annotations, duplicates=self.duplicates
|
||||
)
|
||||
|
||||
if self.edit:
|
||||
papis.commands.edit.edit_notes(doc, git=self.git)
|
||||
|
|
@ -43,15 +48,15 @@ class NotesExporter:
|
|||
document: Document,
|
||||
formatted_annotations: list[str],
|
||||
git: bool = False,
|
||||
force: bool = False,
|
||||
duplicates: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Append new annotations to the end of a note.
|
||||
|
||||
This function appends new annotations to the end of a note file. It takes in a
|
||||
document object containing the note, a list of formatted annotations to be
|
||||
added, and optional flags git and force. If git is True, the changes will be
|
||||
committed to git. If force is True, the annotations will be added even if they
|
||||
added, and optional flags git and duplicates. If git is True, the changes will be
|
||||
committed to git. If duplicates is True, the annotations will be added even if they
|
||||
already exist in the note.
|
||||
|
||||
:param document: The document object representing the note
|
||||
|
|
@ -60,45 +65,45 @@ class NotesExporter:
|
|||
:type formatted_annotations: list[str]
|
||||
:param git: Flag indicating whether to commit changes to git, defaults to False.
|
||||
:type git: bool, optional
|
||||
:param force: Flag indicating whether to force adding annotations even if they
|
||||
already exist, defaults to False.
|
||||
:type force: bool, optional
|
||||
:param duplicates: Flag indicating whether to force adding annotations as duplicates
|
||||
even if they already exist, defaults to False.
|
||||
:type duplicates: bool, optional
|
||||
"""
|
||||
logger.debug("Adding annotations to note.")
|
||||
notes_path = papis.notes.notes_path_ensured(document)
|
||||
logger.debug("Adding annotations to note...")
|
||||
notes_path = Path(papis.notes.notes_path_ensured(document))
|
||||
|
||||
existing: list[str] = []
|
||||
with open(notes_path, "r") as file_read:
|
||||
existing = file_read.readlines()
|
||||
with notes_path.open("r") as fr:
|
||||
existing = fr.readlines()
|
||||
|
||||
new_annotations: list[str] = []
|
||||
if not force:
|
||||
new_annotations: list[str] = formatted_annotations
|
||||
if not duplicates:
|
||||
new_annotations = self._drop_existing_annotations(
|
||||
formatted_annotations, existing
|
||||
)
|
||||
if not new_annotations:
|
||||
logger.debug("No new annotations to be added.")
|
||||
return
|
||||
|
||||
with open(notes_path, "a") as f:
|
||||
with notes_path.open("a") as fa:
|
||||
# add newline if theres no empty space at file end
|
||||
if len(existing) > 0 and existing[-1].strip() != "":
|
||||
f.write("\n")
|
||||
# FIXME this either joins them too close or moves them too far apart
|
||||
# We need a better algorithm which knows what a full 'annotation' is.
|
||||
f.write("\n".join(new_annotations))
|
||||
f.write("\n")
|
||||
fa.write("\n")
|
||||
# We filter out any empty lines from the annotations
|
||||
filtered_annotations = [annot for annot in new_annotations if annot != ""]
|
||||
fa.write("\n\n".join(filtered_annotations))
|
||||
logger.info(
|
||||
f"Wrote {len(new_annotations)} "
|
||||
f"{'line' if len(new_annotations) == 1 else 'lines'} "
|
||||
f"Wrote {len(filtered_annotations)} "
|
||||
f"{'line' if len(filtered_annotations) == 1 else 'lines'} "
|
||||
f"to {papis.document.describe(document)}"
|
||||
)
|
||||
|
||||
if git:
|
||||
msg = "Update notes for '{0}'".format(papis.document.describe(document))
|
||||
msg = f"Update annotations for '{papis.document.describe(document)}'"
|
||||
folder = document.get_main_folder()
|
||||
if folder:
|
||||
papis.git.add_and_commit_resources(
|
||||
folder, [notes_path, document.get_info_file()], msg
|
||||
folder, [str(notes_path), document.get_info_file()], msg
|
||||
)
|
||||
|
||||
def _drop_existing_annotations(
|
||||
|
|
@ -107,7 +112,7 @@ class NotesExporter:
|
|||
"""Returns the input annotations dropping any existing.
|
||||
|
||||
Takes a list of formatted annotations and a list of strings
|
||||
(most probably existing lines in a file). If anny annotations
|
||||
(most probably existing lines in a file). If any annotations
|
||||
match an existing line closely enough, they will be dropped.
|
||||
|
||||
Returns list of annotations without duplicates.
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ class StdoutExporter:
|
|||
formatter: Formatter
|
||||
edit: bool = False
|
||||
git: bool = False
|
||||
force: bool = False
|
||||
duplicates: bool = False
|
||||
|
||||
def run(self, annot_docs: list[tuple[Document, list[Annotation]]]) -> None:
|
||||
"""Pretty print annotations to stdout.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
|
||||
import papis.config
|
||||
import papis.document
|
||||
import papis.logging
|
||||
from papis.document import Document
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from importlib.util import find_spec
|
|||
import papis.logging
|
||||
|
||||
from papis_extract.extraction import Extractor
|
||||
from papis_extract.extractors import pdf
|
||||
from papis_extract.extractors import pdf, readera, readest
|
||||
from papis_extract.extractors.pocketbook import PocketBookExtractor
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
|
@ -11,6 +11,8 @@ logger = papis.logging.get_logger(__name__)
|
|||
all_extractors: dict[str, Extractor] = {}
|
||||
|
||||
all_extractors["pdf"] = pdf.PdfExtractor()
|
||||
all_extractors["readera"] = readera.ReadEraExtractor()
|
||||
all_extractors["readest"] = readest.ReadestExtractor()
|
||||
|
||||
if find_spec("bs4") and find_spec("magic"):
|
||||
all_extractors["pocketbook"] = PocketBookExtractor()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
# pyright: strict, reportMissingTypeStubs=false, reportUnknownMemberType=false
|
||||
import mimetypes
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
from typing import NamedTuple, cast
|
||||
|
||||
import Levenshtein
|
||||
import magic
|
||||
import papis.config
|
||||
import papis.logging
|
||||
import pymupdf as mu
|
||||
|
|
@ -14,13 +15,21 @@ from papis_extract.exceptions import ExtractionError
|
|||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
|
||||
class PdfAnnot(NamedTuple):
|
||||
page: mu.Page
|
||||
annot: mu.Annot
|
||||
|
||||
|
||||
class PdfExtractor:
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if not filename.is_file():
|
||||
logger.error(f"File {str(filename)} not readable.")
|
||||
return False
|
||||
|
||||
if not self._is_pdf(filename):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
|
|
@ -31,34 +40,24 @@ class PdfExtractor:
|
|||
"""
|
||||
annotations: list[Annotation] = []
|
||||
try:
|
||||
with mu.Document(filename) as doc:
|
||||
for (
|
||||
page
|
||||
) in doc: # pyright: ignore [reportUnknownVariableType] - missing stub
|
||||
annot: mu.Annot
|
||||
for annot in page.annots():
|
||||
quote, note = self._retrieve_annotation_content(page, annot)
|
||||
if not quote and not note:
|
||||
continue
|
||||
color: tuple[float, float, float] = cast(
|
||||
tuple[float, float, float],
|
||||
(
|
||||
annot.colors.get("fill")
|
||||
or annot.colors.get("stroke")
|
||||
or (0.0, 0.0, 0.0)
|
||||
),
|
||||
)
|
||||
page_nr: int = cast(int, page.number or 0)
|
||||
highlight_type: str = cast(str, annot.type[1] or "")
|
||||
a = Annotation(
|
||||
file=str(filename),
|
||||
content=quote or "",
|
||||
note=note or "",
|
||||
color=color,
|
||||
type=highlight_type,
|
||||
page=page_nr,
|
||||
)
|
||||
annotations.append(a)
|
||||
for page, annot in self._all_pdf_annots(filename):
|
||||
quote, note = self._get_annotation_content(page, annot)
|
||||
if not quote and not note:
|
||||
continue
|
||||
|
||||
color = self._get_correct_color(annot)
|
||||
page_nr: int = cast("int", page.number or 0)
|
||||
highlight_type: str = cast("str", annot.type[1] or "")
|
||||
|
||||
a = Annotation(
|
||||
file=str(filename),
|
||||
content=quote or "",
|
||||
note=note or "",
|
||||
color=color,
|
||||
type=highlight_type,
|
||||
page=page_nr,
|
||||
)
|
||||
annotations.append(a)
|
||||
logger.debug(
|
||||
f"Found {len(annotations)} "
|
||||
f"{'annotation' if len(annotations) == 1 else 'annotations'} for {filename}."
|
||||
|
|
@ -69,11 +68,18 @@ class PdfExtractor:
|
|||
|
||||
return annotations
|
||||
|
||||
def _all_pdf_annots(self, filename: Path) -> Generator[PdfAnnot]:
|
||||
with mu.Document(filename) as doc:
|
||||
for page in doc:
|
||||
annot: mu.Annot
|
||||
for annot in page.annots():
|
||||
yield PdfAnnot(page, annot)
|
||||
|
||||
def _is_pdf(self, fname: Path) -> bool:
|
||||
"""Check if file is a pdf, using mime type."""
|
||||
return magic.from_file(fname, mime=True) == "application/pdf"
|
||||
return mimetypes.guess_type(fname)[0] == "application/pdf"
|
||||
|
||||
def _retrieve_annotation_content(
|
||||
def _get_annotation_content(
|
||||
self, page: mu.Page, annotation: mu.Annot
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""Gets the text content of an annotation.
|
||||
|
|
@ -85,7 +91,7 @@ class PdfExtractor:
|
|||
should both be included or are the same, using
|
||||
Levenshtein distance.
|
||||
"""
|
||||
content = cast(str, annotation.info["content"].replace("\n", " "))
|
||||
content = cast("str", annotation.info["content"].replace("\n", " "))
|
||||
written = page.get_textbox(annotation.rect).replace("\n", " ")
|
||||
|
||||
# highlight with selection in note
|
||||
|
|
@ -96,13 +102,20 @@ class PdfExtractor:
|
|||
if Levenshtein.ratio(content, written) > minimum_similarity:
|
||||
return (content, None)
|
||||
# both a highlight and a note
|
||||
elif content and written:
|
||||
if content and written:
|
||||
return (written, content)
|
||||
# an independent note, not a highlight
|
||||
elif content:
|
||||
if content:
|
||||
return (None, content)
|
||||
# highlight with selection not in note
|
||||
elif written:
|
||||
if written:
|
||||
return (written, None)
|
||||
# just a highlight without any text
|
||||
return (None, None)
|
||||
|
||||
def _get_correct_color(self, annot: mu.Annot):
|
||||
color: tuple[float, float, float] = cast(
|
||||
"tuple[float, float, float]",
|
||||
(annot.colors.get("fill") or annot.colors.get("stroke") or (0.0, 0.0, 0.0)),
|
||||
)
|
||||
return color
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# pyright: strict, reportUnknownMemberType=false
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import magic
|
||||
import papis.config
|
||||
import papis.logging
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
|
@ -13,7 +13,7 @@ logger = papis.logging.get_logger(__name__)
|
|||
|
||||
class PocketBookExtractor:
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if not magic.from_file(filename, mime=True) == "text/xml":
|
||||
if not self._is_html(filename):
|
||||
return False
|
||||
|
||||
content = self._read_file(filename)
|
||||
|
|
@ -25,8 +25,13 @@ class PocketBookExtractor:
|
|||
"meta", {"name": "generator", "content": "PocketBook Bookmarks Export"}
|
||||
):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def _is_html(self, filename: Path) -> bool:
|
||||
return mimetypes.guess_type(filename)[0] == "text/html"
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from pocketbook html file.
|
||||
|
||||
|
|
@ -44,19 +49,14 @@ class PocketBookExtractor:
|
|||
annotations: list[Annotation] = []
|
||||
for bm in html.select("div.bookmark"):
|
||||
content = str(
|
||||
(bm.select_one("div.bm-text>p") or html.new_string("")).text
|
||||
or "" # pyright: ignore [reportUnknownArgumentType]
|
||||
(bm.select_one("div.bm-text>p") or html.new_string("")).text or ""
|
||||
)
|
||||
note = str(
|
||||
(bm.select_one("div.bm-note>p") or html.new_string("")).text
|
||||
or "" # pyright: ignore [reportUnknownArgumentType]
|
||||
)
|
||||
page = int(
|
||||
(bm.select_one("p.bm-page") or html.new_string("")).text
|
||||
or 0 # pyright: ignore [reportUnknownArgumentType]
|
||||
(bm.select_one("div.bm-note>p") or html.new_string("")).text or ""
|
||||
)
|
||||
page = int((bm.select_one("p.bm-page") or html.new_string("")).text or 0)
|
||||
|
||||
el_classes = bm.attrs.get("class", "").split(" ")
|
||||
el_classes = cast("str", bm.attrs.get("class", "")).split(" ")
|
||||
color = (0, 0, 0)
|
||||
for c in el_classes:
|
||||
if "bm-color-" in c:
|
||||
|
|
@ -81,8 +81,8 @@ class PocketBookExtractor:
|
|||
|
||||
def _read_file(self, filename: Path) -> str:
|
||||
try:
|
||||
with open(filename) as f:
|
||||
return f.read()
|
||||
with filename.open("r") as fr:
|
||||
return fr.read()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Could not open file {filename} for extraction.")
|
||||
return ""
|
||||
|
|
|
|||
92
papis_extract/extractors/readera.py
Normal file
92
papis_extract/extractors/readera.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# pyright: strict, reportUnknownMemberType=false
|
||||
import mimetypes
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import papis.logging
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
|
||||
class ReadEraExtractor:
|
||||
"""Extracts exported annotations from the ReadEra book reading app for Android and iOS.
|
||||
|
||||
https://readera.org/
|
||||
"""
|
||||
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if not self._is_txt(filename):
|
||||
return False
|
||||
|
||||
content = self._read_file(filename)
|
||||
if not content:
|
||||
return False
|
||||
|
||||
# look for title and author lines up top
|
||||
if not content[0] or not content[1]:
|
||||
return False
|
||||
|
||||
# look for star-shaped divider pattern
|
||||
if not re.search(r"\n\*\*\*\*\*\n", "".join(content)):
|
||||
return False
|
||||
|
||||
# look for star-shaped pattern at end of file
|
||||
if not re.search(r"\n\*\*\*\*\*\n\n$", "".join(content)):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def _is_txt(self, filename: Path) -> bool:
|
||||
return mimetypes.guess_type(filename)[0] == "text/plain"
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from readera txt file.
|
||||
|
||||
Returns all readable annotations contained in the file passed in, with
|
||||
highlights and notes if available. Could theoretically return the
|
||||
annotation color but I do not have access to a premium version of
|
||||
ReadEra so I cannot add this feature.
|
||||
"""
|
||||
content = self._read_file(filename)[2:]
|
||||
if not content:
|
||||
return []
|
||||
|
||||
annotations: list[Annotation] = []
|
||||
|
||||
# split for *** separators and remove the last entry since it is always
|
||||
# empty
|
||||
split = "\n".join(content).split("\n*****\n")[:-1]
|
||||
note_pattern = re.compile(r"\n--.*")
|
||||
for entry in split:
|
||||
entry = entry.strip()
|
||||
note = note_pattern.search(entry)
|
||||
if note:
|
||||
entry = note_pattern.sub("", entry)
|
||||
note = re.sub(r"\n--", "", note.group())
|
||||
|
||||
entry = re.sub(r"\n", " ", entry)
|
||||
|
||||
a = Annotation(
|
||||
file=str(filename),
|
||||
content=entry,
|
||||
note=note if note else "",
|
||||
# color=color, # TODO: Implement for premium ReadEra version
|
||||
)
|
||||
annotations.append(a)
|
||||
|
||||
logger.debug(
|
||||
f"Found {len(annotations)} "
|
||||
f"{'annotation' if len(annotations) == 1 else 'annotations'} for {filename}."
|
||||
)
|
||||
return annotations
|
||||
|
||||
def _read_file(self, filename: Path) -> list[str]:
|
||||
try:
|
||||
with filename.open("r") as fr:
|
||||
return fr.readlines()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Could not open file {filename} for extraction.")
|
||||
return []
|
||||
96
papis_extract/extractors/readest.py
Normal file
96
papis_extract/extractors/readest.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
# pyright: strict, reportUnknownMemberType=false
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import papis.logging
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
|
||||
logger = papis.logging.get_logger(__name__)
|
||||
|
||||
ACCEPTED_EXTENSIONS = [".txt", ".md", ".qmd", ".rmd"]
|
||||
TEXTCHARS = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
|
||||
|
||||
|
||||
class ReadestExtractor:
|
||||
"""Extracts exported annotations from the FOSS Readest book reading app.
|
||||
|
||||
https://readest.com/
|
||||
"""
|
||||
|
||||
def can_process(self, filename: Path) -> bool:
|
||||
if not self._is_readable_text(filename):
|
||||
return False
|
||||
|
||||
content = self._read_file(filename)
|
||||
if not content:
|
||||
return False
|
||||
|
||||
# look for star-shaped divider pattern
|
||||
if not re.search(
|
||||
r"\n\*\*Exported from Readest\*\*: \d{4}-\d{2}-\d{2}\n", "".join(content)
|
||||
):
|
||||
return False
|
||||
|
||||
logger.debug(f"Found processable annotation file: {filename}")
|
||||
return True
|
||||
|
||||
def _is_readable_text(self, filename: Path) -> bool:
|
||||
"""Checks whether a file has a valid text extension and is not a binary file.
|
||||
|
||||
A file is considered a valid text file if its extension is in
|
||||
:data:`ACCEPTED_EXTENSIONS` and does not contain any non-text characters.
|
||||
|
||||
:returns: A boolean indicating whether the file is a valid text file.
|
||||
"""
|
||||
if filename.suffix not in ACCEPTED_EXTENSIONS:
|
||||
return False
|
||||
try:
|
||||
with filename.open("rb") as rb:
|
||||
return not bool(rb.read(1024).translate(None, TEXTCHARS))
|
||||
except (FileNotFoundError, PermissionError):
|
||||
return False
|
||||
|
||||
def run(self, filename: Path) -> list[Annotation]:
|
||||
"""Extract annotations from readest txt file.
|
||||
|
||||
Returns all readable annotations contained in the file passed in, with
|
||||
highlights and notes if available.
|
||||
"""
|
||||
content = self._read_file(filename)[2:]
|
||||
if not content:
|
||||
return []
|
||||
|
||||
annotations: list[Annotation] = []
|
||||
|
||||
for i, line in enumerate(content):
|
||||
entry_content: str = ""
|
||||
entry_note: str = ""
|
||||
if line.startswith("> "):
|
||||
entry_content = line.lstrip('> "').rstrip('\n" ')
|
||||
nextline = content[i + 1]
|
||||
if nextline.startswith("**Note**:: "):
|
||||
entry_note = nextline.removeprefix("**Note**:: ").strip()
|
||||
|
||||
a = Annotation(
|
||||
file=str(filename),
|
||||
content=entry_content,
|
||||
note=entry_note,
|
||||
# NOTE: Unfortunately Readest currently does not export color information
|
||||
# color=color,
|
||||
)
|
||||
annotations.append(a)
|
||||
|
||||
logger.debug(
|
||||
f"Found {len(annotations)} "
|
||||
f"{'annotation' if len(annotations) == 1 else 'annotations'} for {filename}."
|
||||
)
|
||||
return annotations
|
||||
|
||||
def _read_file(self, filename: Path) -> list[str]:
|
||||
try:
|
||||
with filename.open("r") as fr:
|
||||
return fr.readlines()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Could not open file {filename} for extraction.")
|
||||
return []
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
from typing import Protocol
|
||||
|
||||
from papis.document import Document
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
|
|
|
|||
|
|
@ -1,19 +1,17 @@
|
|||
[project]
|
||||
authors = [
|
||||
{name = "Marty Oehme", email = "contact@martyoeh.me"},
|
||||
]
|
||||
license = {text = "GPL-3.0-or-later"}
|
||||
authors = [{ name = "Marty Oehme", email = "contact@martyoeh.me" }]
|
||||
license = { text = "GPL-3.0-or-later" }
|
||||
requires-python = "<4.0,>=3.11"
|
||||
dependencies = [
|
||||
"pymupdf<2.0.0,>=1.24.2",
|
||||
"levenshtein<1.0.0,>=0.25.1",
|
||||
"papis<1.0,>=0.13",
|
||||
"papis<1.0,>=0.14",
|
||||
"click<9.0.0,>=8.1.7",
|
||||
"python-magic<1.0.0,>=0.4.27",
|
||||
"chevron<1.0.0,>=0.14.0",
|
||||
]
|
||||
name = "papis-extract"
|
||||
version = "0.2.0"
|
||||
version = "0.2.1"
|
||||
description = ""
|
||||
readme = "README.md"
|
||||
keywords = [
|
||||
|
|
@ -24,7 +22,6 @@ keywords = [
|
|||
"bibliography",
|
||||
"reference manager",
|
||||
"research",
|
||||
"science",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
|
@ -35,17 +32,28 @@ repository = "https://github.com/marty-oehme/papis-extract"
|
|||
extract = "papis_extract:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
whoosh = [
|
||||
"whoosh<3.0.0,>=2.7.4",
|
||||
]
|
||||
pocketbook = [
|
||||
"beautifulsoup4<5.0.0,>=4.12.3",
|
||||
]
|
||||
whoosh = ["whoosh<3.0.0,>=2.7.4"]
|
||||
pocketbook = ["beautifulsoup4<5.0.0,>=4.12.3"]
|
||||
|
||||
[tool.uv]
|
||||
dev-dependencies = [
|
||||
"pytest<9.0.0,>=8.0.0",
|
||||
"pytest-cov<7.0.0,>=6.0.0",
|
||||
dev-dependencies = ["pytest<9.0.0,>=8.0.0", "pytest-cov<7.0.0,>=6.0.0"]
|
||||
|
||||
[tool.ruff.lint]
|
||||
extend-select = [
|
||||
"C4", # Catch incorrect use of comprehensions, dict, list, etc
|
||||
"F", # Pyflakes rules
|
||||
"FA", # Enforce from __future__ import annotations
|
||||
"I", # Sort imports properly
|
||||
"ICN", # Use common import conventions
|
||||
"ISC", # Good use of string concatenation
|
||||
"NPY", # Some numpy-specific things
|
||||
"PTH", # Use pathlib instead of os.path
|
||||
"RET", # Good return practices
|
||||
"SIM", # Common simplification rules
|
||||
"TC", # Enforce importing certain types in a TYPE_CHECKING block
|
||||
"TID", # Some good import practices
|
||||
"UP", # Warn if certain things can changed due to newer Python versions
|
||||
"W", # PyCodeStyle warnings
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
|
|
|||
40
tests/extractors/test_readera.py
Normal file
40
tests/extractors/test_readera.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
from pathlib import Path
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.extractors.readera import ReadEraExtractor
|
||||
|
||||
valid_file = Path("tests/resources/ReadEra_sample.txt")
|
||||
invalid_file = Path("tests/resources/Readest_sample.txt")
|
||||
|
||||
expected = [
|
||||
Annotation(
|
||||
file="tests/resources/ReadEra_sample.txt",
|
||||
content="digital technologies of the twenty-first century can only exist thanks to this kind of outsourced labor. The relative invisibility of the tech supply chain is part of the ruse; American consumers do not see where smartphones come from.",
|
||||
),
|
||||
Annotation(
|
||||
file="tests/resources/ReadEra_sample.txt",
|
||||
content="We don’t necessarily want our leaders to be average persons like us, even though we often enjoy hearing that famous celebrities eat the same fast food as regular people. ",
|
||||
note="We continuously demystify our leaders - first through television, now through social media",
|
||||
),
|
||||
Annotation(
|
||||
file="tests/resources/ReadEra_sample.txt",
|
||||
content="Initially, the Internet was praised as a freer way to encounter information. In the early 1990s, digital theorist George Landow saw hypertext as a liberatory reading strategy.",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_identifies_readera_exports():
|
||||
ex = ReadEraExtractor()
|
||||
assert ex.can_process(valid_file)
|
||||
|
||||
|
||||
# Readest exports are very similar so we should ensure it ignores them
|
||||
def test_ignores_readest_exports():
|
||||
ex = ReadEraExtractor()
|
||||
assert not ex.can_process(invalid_file)
|
||||
|
||||
|
||||
def test_entry_extractions():
|
||||
ex = ReadEraExtractor()
|
||||
result = ex.run(valid_file)
|
||||
assert result == expected
|
||||
17
tests/resources/ReadEra_sample.txt
Normal file
17
tests/resources/ReadEra_sample.txt
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
The Circle of the Snake
|
||||
Grafton Tanner
|
||||
|
||||
digital technologies of the twenty-first century can only exist thanks to this kind of outsourced labor. The relative invisibility of the tech supply chain is part of the ruse; American consumers do not see where smartphones come from.
|
||||
|
||||
*****
|
||||
|
||||
We don’t necessarily want our leaders to be average persons like us, even though we often enjoy hearing that famous celebrities eat the same fast food as regular people.
|
||||
--We continuously demystify our leaders - first through television, now through social media
|
||||
|
||||
*****
|
||||
|
||||
Initially, the Internet was praised as a freer way to encounter information.
|
||||
In the early 1990s, digital theorist George Landow saw hypertext as a liberatory reading strategy.
|
||||
|
||||
*****
|
||||
|
||||
70
tests/resources/Readest_sample.txt
Normal file
70
tests/resources/Readest_sample.txt
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# Ideology and the Virtual City
|
||||
**Author**:
|
||||
|
||||
**Exported from Readest**: 2025-09-10
|
||||
|
||||
---
|
||||
|
||||
## Highlights & Annotations
|
||||
|
||||
### 5 No More Heroes: The City as Wasteland
|
||||
> "As an ideological response, this “escapist defeatism” contains elements of cynicism, in that it also involves an outward rejection of normal social demands and a pessimistic outlook. The difference between it and cynical self-interest, however, is that the defeatist doesn’t still want to thrive within the existing order. Whereas cynical self-interest distances behaviour from moral values only to really enjoy following dominant demands after all, the defeatist is less excited by regular notions of success, but cannot imagine a way out. I believe this position embodies various features of what Mark Fisher calls “capitalist realism,” which is less about competitive spirit or “making it” and more a kind of depressed state of low expectation within a totalizing capitalist reality."
|
||||
|
||||
> "As an ideological response, this “escapist defeatism” contains elements of cynicism, in that it also involves an outward rejection of normal social demands and a pessimistic outlook. The difference between it and cynical self-interest, however, is that the defeatist doesn’t still want to thrive within the existing order. Whereas cynical self-interest distances behaviour from moral values only to really enjoy following dominant demands after all, the defeatist is less excited by regular notions of success, but cannot imagine a way out. I believe this position embodies various features of what Mark Fisher calls “capitalist realism,” which is less about competitive spirit or “making it” and more a kind of depressed state of low expectation within a totalizing capitalist reality."
|
||||
**Note**:: Opposing cynical defeatism to cynical self-interest
|
||||
|
||||
> "As he says, this modern depression “is constituted not by an inability to get pleasure so much as it is by an inability to do anything else except pursue pleasure. There is a sense that ‘something is missing’—but no appreciation that this mysterious, missing enjoyment can only be accessed beyond the pleasure principle.”22 And of course, the obsessive retreat into consumer pleasures only connects the defeatist even more deeply to the depressing reality, as it becomes part of the work-leisure cycle in which body and mind are allowed to relax, only to be rejuvenated for more draining labour. Even in strictly economic terms, if the escape is all we live for it still must be funded through labour; the only way to get more escape time is to return to the prison and earn it."
|
||||
**Note**:: cf. Mark Fisher
|
||||
|
||||
> "As he says, this modern depression “is constituted not by an inability to get pleasure so much as it is by an inability to do anything else except pursue pleasure. There is a sense that ‘something is missing’—but no appreciation that this mysterious, missing enjoyment can only be accessed beyond the pleasure principle.”22 And of course, the obsessive retreat into consumer pleasures only connects the defeatist even more deeply to the depressing reality, as it becomes part of the work-leisure cycle in which body and mind are allowed to relax, only to be rejuvenated for more draining labour. Even in strictly economic terms, if the escape is all we live for it still must be funded through labour; the only way to get more escape time is to return to the prison and earn it."
|
||||
|
||||
> "His position then reflects the idea developed by Žižek through the terminology of psychoanalyst Jacques Lacan that, in placing increasing emphasis on personal responsibility for success and enjoyment, neoliberalized societies appear to have no “big Other,” or no generalized symbolic guarantee of meaning that explicitly prescribes standards of behaviour. With no singular authoritative voice to aid our decision-making, the attraction of consumerist distractions is perhaps unsurprising, but as the seductive promises of consumer entertainment remain unfulfilled, the pressure remains on us to take responsibility for this dissatisfaction as well. Neoliberalism’s constant demand to do more, without telling us what to do or where we might find it, means we are always guilty of failure, and can’t really identify once and for all what we wish to become"
|
||||
|
||||
> "NMH highlights how consumerist escapist fantasies must be funded by the very normality we wish to escape, and the deeper the escape the more funding is required. The paradox is represented superbly in the kind of jobs Travis must do between matches. While there are extra contract killing jobs on offer, the game only grants access to these after the player completes more menial tasks taken from the local job centre, from picking coconuts and mowing lawns to collecting litter and cleaning graffiti. At these points, Travis is plunged back into an ideological normalcy where work is deemed valuable and fulfilling"
|
||||
|
||||
> "NMH highlights how consumerist escapist fantasies must be funded by the very normality we wish to escape, and the deeper the escape the more funding is required. The paradox is represented superbly in the kind of jobs Travis must do between matches. While there are extra contract killing jobs on offer, the game only grants access to these after the player completes more menial tasks taken from the local job centre, from picking coconuts and mowing lawns to collecting litter and cleaning graffiti. At these points, Travis is plunged back into an ideological normalcy where work is deemed valuable and fulfilling"
|
||||
**Note**:: 'No more heroes'
|
||||
|
||||
> "“something deeper” may be seen in psychoanalytic terms as “death drive,” or a kind of gap in our subjective identities that means we are never quite fully reconciled with ourselves. It is an inevitable sense of uncertainty that emerges because there is no final truth about what things mean, only different understandings that emerge in human consciousness through language. No matter how knowledgeable we are, each of us only has our own perspective and cannot know the innermost feelings and thoughts of others. Moreover, our own deepest desires are not even compatible (for example, the desires for risk and security), so that satisfying one may leave us unsatisfied in another way. In short, nothing is ever really, conclusively fulfilling, and our ideological fantasies which make us feel that there is some absolute meaning merely distract us from that realization."
|
||||
|
||||
---
|
||||
|
||||
### 6 Persona 5: The City as Prison
|
||||
> "missing here is any appreciation of the extent to which neoliberalized political, financial, legal and cultural institutions themselves cause social disengagement, or how the logic of profit actively works against deeper democratic participation. In other words, it does not ask whether democracy may itself be problematic in its existing form, or whether people might be politically disengaged because of privatized media’s tendency to trivialize politics, the influence of corporate money on political parties, or the excessive demands on other aspects of their lives. Indeed, meaningful political change is unlikely, as Jodi Dean points out, when “the appeal to democracy presupposes democracy is the solution to the problems of democracy, because it incorporates in advance any hope things might be otherwise as already the fundamental democratic promise and provision.”25 "
|
||||
|
||||
> "one is represented by the general public, who are shown to be uncommitted, fickle, apathetic and ignorant individuals that may want a just society, but take no responsibility for it. In what Žižek calls the “culture of complaint” in modern societies, they constantly ask some authority figure to make life better, and even “enjoy” playing the perpetual victim, effectively recognizing themselves as subordinates, rather than as agents who could actually affect change. Likewise, in P5, public opinion first imagines the Phantom Thieves as saviours, but later disowns them when they appear to have become too radical, and when Shido promises to restore order through conventional political means."
|
||||
|
||||
---
|
||||
|
||||
### 7 Conclusion: Conformism and Critique
|
||||
> "“hedonism,” in the context of neoliberalism I am thinking of a lifestyle that focuses substantially on consumerist entertainment or pleasure. This hedonist need not be an extreme thrill seeker or a wild party-goer. It would be equally “hedonistic,” in the sense I am defining here, for someone to fixate on what they might want for dinner every night, be genuinely excited about seeing the latest superhero movie, or constantly want to go shopping. In this way, for this hedonist, work is predominantly a means to an end that must be done to pay for enjoyment. The hedonist wouldn’t think twice about stopping work if they came into a large amount of money, but until then work is on balance a good thing, because it enables the purchase of pleasurable goods and services. At the same time, many big social issues, including politics, would appear boring to the hedonist. It is of course better when the economy is thriving, but it makes more sense to leave that stuff to the experts. In all this, such hedonism is not necessarily motivated by pure self-interest. Rather, it involves lines of rationalization based on neoliberal ideas, including consumerist concepts of identity creation but also an understanding of freedom in individual terms and a notion that there are no political alternatives."
|
||||
|
||||
> "while it recognizes the hypocrisy when powerful figures claim to be acting in the best interests of all, it has no time for radical critique, whether left or right, that actually seeks significant changes to the status quo. Instead, the cynic wants things to stay as they are and to exploit them to his/her advantage. This self-interest is supported by pragmatic “realism.” The market is flawed, so the argument goes, but better than the alternative; at least we aren’t subjected to the oppressive and broken systems of, say, Soviet Russia or North Korea. We must remember that humans are a selfish and aggressive bunch, and grand social projects always fail, so we should make the most of what we have."
|
||||
**Note**:: Cynical self-interest
|
||||
|
||||
> "while it recognizes the hypocrisy when powerful figures claim to be acting in the best interests of all, it has no time for radical critique, whether left or right, that actually seeks significant changes to the status quo. Instead, the cynic wants things to stay as they are and to exploit them to his/her advantage. This self-interest is supported by pragmatic “realism.” The market is flawed, so the argument goes, but better than the alternative; at least we aren’t subjected to the oppressive and broken systems of, say, Soviet Russia or North Korea. We must remember that humans are a selfish and aggressive bunch, and grand social projects always fail, so we should make the most of what we have."
|
||||
|
||||
> "it obscures that many people in the world still do have it bad, and that their situation is deeply connected to the wealth and freedoms elsewhere, so could improve under a different social order. Here, the cynic may claim that nothing can be done about this, or that someone somewhere always gets a raw deal, so we may as well just enjoy our luck. What is still missing, however, is any consideration that “human nature” has other aspects and adapts to different conditions, or that humans also share and act collectively, and are more likely to do so in more equal societies, where everyone has a stake and a reasonable quality of life."
|
||||
**Note**:: Cynical self-interest
|
||||
|
||||
> "it obscures that many people in the world still do have it bad, and that their situation is deeply connected to the wealth and freedoms elsewhere, so could improve under a different social order. Here, the cynic may claim that nothing can be done about this, or that someone somewhere always gets a raw deal, so we may as well just enjoy our luck. What is still missing, however, is any consideration that “human nature” has other aspects and adapts to different conditions, or that humans also share and act collectively, and are more likely to do so in more equal societies, where everyone has a stake and a reasonable quality of life."
|
||||
|
||||
> "Central to cynicism is an idea that it rejects ideology for rationality, because ideologies are dogmatic and lead to authoritarianism. But with this alibi it fails to account for its own presumption of neoliberal ideas, particularly its onus on individual self-sufficiency and the impossibility of even potentially workable political or economic alternatives"
|
||||
|
||||
> "In “escapist defeatism” there is also recognition of social problems and also a notion that not much can be done about them, at least by the average citizen. It does perhaps entail more of a historical dimension than cynical self-interest, in focusing less on human nature and more on the way things are now, under a seemingly omnipotent global capitalist system. Moreover, for the defeatist, taking advantage of the situation isn’t the most attractive option, because dominant notions of success simply don’t seem that satisfying, or because an outward pursuit of self-interest remains ethically abhorrent."
|
||||
|
||||
> "Such media consumption goes beyond the throwaway pleasures of hedonism to a deeper identification with the chosen material, even if, deep down, the defeatist knows it is meaningless"
|
||||
|
||||
> "how heavily tied they are to the economy, forcing the individuals concerned back into the normalcy of work-leisure routines to financially maintain their obsession. Another possible aspect of this lifestyle is a kind of victim complex, in which the powers that be are held responsible for life’s difficulties, but cannot be resisted, only mocked or angrily reproached. Either way, the defeatist does not take responsibility for his/her part in reproducing the status quo through excessive consumerist habits, or make any attempt to organize politically and create change."
|
||||
|
||||
> "he answer is somewhere in between: the system works in principle but needs attention if it’s ever going to run smoothly enough to produce a proper meritocracy. What is required, therefore, is collective vigilance and effort to get involved in improving the system, to stop anyone from having an overly corruptive impact. In short, no major overhaul is required, merely greater commitment to properly enforce the concepts of justice and equal opportunity that already exist. This position involves strong political commitment and a good working knowledge of existing systems. But it also implies strict adherence to a particular set of principles, specifically ones that do not question the actual political, social and economic institutions that are currently in place. In addition, the very idea of “making it” inscribed into its meritocratic ideal is limited by neoliberal notions of success: fulfilling career potentials, accumulating property, developing personal relationships and expressing identities through consumerist"
|
||||
**Note**:: Political reformism
|
||||
|
||||
> "he answer is somewhere in between: the system works in principle but needs attention if it’s ever going to run smoothly enough to produce a proper meritocracy. What is required, therefore, is collective vigilance and effort to get involved in improving the system, to stop anyone from having an overly corruptive impact. In short, no major overhaul is required, merely greater commitment to properly enforce the concepts of justice and equal opportunity that already exist. This position involves strong political commitment and a good working knowledge of existing systems. But it also implies strict adherence to a particular set of principles, specifically ones that do not question the actual political, social and economic institutions that are currently in place. In addition, the very idea of “making it” inscribed into its meritocratic ideal is limited by neoliberal notions of success: fulfilling career potentials, accumulating property, developing personal relationships and expressing identities through consumerist"
|
||||
|
||||
> "It does not consider whether these aims are really universal, or whether some people may have valid alternate desires that would require extensive social change to realize. Nor does it question whether meritocracy is fair in the first place, or whether people who simply lack marketable personal attributes deserve a lesser standard of life."
|
||||
|
||||
> "Such a perspective may, for example, contemplate whether the kinds of work we are generally obliged to do are socially necessary or personally fulfilling. It may ask whether the demand to “enjoy responsibly” by balancing ever growing work and consumer pressures actually constitutes a meaningful or coherent life goal, or whether alternative goals could be more satisfactory. It can also imagine the possibility of greater political engagement from the majority of people, and what it means to take collective responsibility for social conditions, rather than following individual desires. And if all these ideas are framed in relation to neoliberal capitalist systems of economics, politics and culture, they can even consider whether the systems as such produce demands that are inherently unrealistic, antagonistic and socially destructive.
|
||||
The specific questions that then arise from such critical thinking are many and varied. For example, are there even enough jobs, especially satisfying, well-paid jobs, for everyone to do today? Is throwaway mass consumerism environmentally sustainable? Does an individualized focus on employment and consumerism detract from more meaningful social activity? Are the demands of globalized markets compatible with free and effective political decision-making? To what extent does corporate advertising, PR and lobbying help frame our political desires? And how might we need to change society to provide the kind of meaning, involvement and satisfaction we desire? If some of these questions seem overly radical even for progressives who perceive a need to address existing social problems, it should be clear by now that getting to the root of these problems may require re-evaluation of even the most deeply embedded assumptions about how societies work and how they could work."
|
||||
|
||||
---
|
||||
|
|
@ -4,6 +4,14 @@ from papis.document import Document
|
|||
from papis_extract.annotation import Annotation
|
||||
|
||||
|
||||
def test_value_comparison_works():
|
||||
sut = Annotation("myfile", content="Here be content!", note="and a note")
|
||||
other = Annotation(
|
||||
"myfile", content="Here be different content!", note="but still a note"
|
||||
)
|
||||
assert sut != other
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fmt_string,expected",
|
||||
[
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from papis.document import Document
|
||||
from papis_extract.annotation import Annotation
|
||||
|
||||
from papis_extract.annotation import Annotation
|
||||
from papis_extract.formatter import (
|
||||
format_count,
|
||||
format_csv,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue