papis-marvin/papis-marvin

#!/usr/bin/env python
# papis-short-help: Import iOS Marvin exported csv annotations
#
# This script can be used to import your highlights and notes from
# the iOS application 'Marvin Reader'. In the app, export your
# annotations as 'csv' format and then point the script to the
# resulting file.
# https://git.martyoeh.me/Marty/papis-marvin
import os
import sys
import re
import logging
from typing import Dict
import papis.api
import papis.pick
import papis.format
import papis.commands.edit
import papis.commands.list
import papis.commands.add
import papis.notes
import papis.config
import papis.database
import isbnlib
import papis.isbn

logger = logging.getLogger("marvin")
logger.setLevel(logging.DEBUG)

DEFAULT_CSV_PATH = "/home/marty/Nextcloud/Personal/Backups/Journal.csv"


def main(fpath, db):
    with open(fpath) as f:
        import csv

        csv = csv.DictReader(f)
        notes = get_all_annotations(db, csv)

    write_to_files(notes)


def get_all_annotations(db, csv) -> Dict:
    notes = {}
    note_file = ""
    for row in csv:
        # switch to next book
        if not is_same_book(row["Title"]):
            doc = get_document(db, row["Author"], row["Title"])
            if not doc:
                continue
            note_file = get_notefile(db, doc)

        text = format_entry(row)

        if note_file and text:
            if note_file not in notes.keys():
                notes[note_file] = []
            notes[note_file].append(text)
    return notes


def get_document(db, author, title):
    res = query_document(db, author, title)
    if not res:
        add_to_database(author, title)
        res = query_document(db, author, title)
    if not res:
        logger.warning(f"Nothing found for {author}: {title}.\nPlease create manually.")
        return
    return res


# TODO warn user/ let him pick with picker if multiple docs found
def query_document(db, author, title):
    title = strip_string(title)
    for query in [f"author:({author}) title:({title})"]:
        print(f"query: {query}")
        res = db.query(query)
        if len(res) >= 1:
            return res[0]


def add_to_database(author, title, confirm=True, edit=False):
    logger.info(f"Searching - '{title} {author}'")
    data = None
    try:
        data = papis.isbn.get_data(f"{title}")
    except isbnlib.ISBNLibException as e:
        logger.error(e)
    else:
        logger.warning(f"Found: {data}")
    if data:
        papis_data = papis.isbn.data_to_papis(data[0])
        papis.commands.add.run([], data=papis_data, confirm=confirm, edit=edit)


def get_notefile(db, document) -> str | None:
    if not document.has("notes"):
        notes_name = papis.config.getstring("notes-name")
        document["notes"] = papis.format.format(notes_name, document)
        document.save()
        db.update(document)

    notes_path = os.path.join(str(document.get_main_folder()), document["notes"])

    if not os.path.exists(notes_path):
        # TODO reimplement logger: logger.debug("Creating '%s'", notes_path)
        papis.notes.notes_path_ensured(document)
    return notes_path


# TODO implement custom formatting (akin to pubs-extract)
def format_entry(row) -> str:
    text = f"> {row['HighlightText']}"
    if row["EntryText"]:
        if text:
            text += "\n"
        else:
            text = "> "
        text += f"{row['EntryText']}"
    return text


_old_title = ""


def is_same_book(title):
    global _old_title

    same = _old_title == title
    _old_title = title
    if same:
        return True
    return False


def write_to_files(notes: Dict):
    # write to notes
    for f, entries in notes.items():
        if f:
            with open(f, "a") as note:
                logger.info(f"Editing {f}...")
                num_added = 0
                for entry in entries:
                    with open(f) as noteread:
                        if entry not in noteread.read():
                            note.write(f"{entry}\n\n")
                            num_added += 1
                logger.info(f"Added {num_added} entries to it.")


strip_pattern = re.compile(r"([^\s\w]|_)+\w*")


def strip_string(title) -> str:
    return strip_pattern.sub("", title)


if __name__ == "__main__":
    # use argument passed to command as file or default file here
    fpath = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_CSV_PATH

    main(fpath, papis.database.get())