From 36252f6f19ac5284687c228a56b9e8b12a970055 Mon Sep 17 00:00:00 2001 From: Marty Oehme <contact@martyoeh.me> Date: Wed, 12 Mar 2025 14:16:59 +0100 Subject: [PATCH] Add shaarli2hoarder --- pyproject.toml | 11 ++++- shaarli2hoarder/.gitignore | 14 ++++++ shaarli2hoarder/.python-version | 1 + shaarli2hoarder/README.md | 12 +++++ shaarli2hoarder/convert.py | 80 +++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 shaarli2hoarder/.gitignore create mode 100644 shaarli2hoarder/.python-version create mode 100644 shaarli2hoarder/README.md create mode 100644 shaarli2hoarder/convert.py diff --git a/pyproject.toml b/pyproject.toml index d8a4b28..93a02e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,16 @@ version = "0.1.0" description = "Add your description here" readme = "README.md" requires-python = ">=3.13" -dependencies = [] +dependencies = [ + # REQUIRED FOR CURRENT SHAARLI2HOARDER IMPLEMENTATION ONLY + # "beautifulsoup4>=4.13.3", + # "netscape-bookmarks-file-parser", + # "pyjwt>=2.10.1", + # "requests>=2.32.3", +] [tool.pyright] typeCheckingMode = "basic" + +[tool.uv.sources] +netscape-bookmarks-file-parser = { git = "https://github.com/FlyingWolFox/Netscape-Bookmarks-File-Parser.git" } diff --git a/shaarli2hoarder/.gitignore b/shaarli2hoarder/.gitignore new file mode 100644 index 0000000..97d03a5 --- /dev/null +++ b/shaarli2hoarder/.gitignore @@ -0,0 +1,14 @@ +*.json +*.html + +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv + diff --git a/shaarli2hoarder/.python-version b/shaarli2hoarder/.python-version new file mode 100644 index 0000000..24ee5b1 --- /dev/null +++ b/shaarli2hoarder/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/shaarli2hoarder/README.md b/shaarli2hoarder/README.md new file mode 100644 index 0000000..d89281d --- /dev/null +++ b/shaarli2hoarder/README.md @@ -0,0 +1,12 @@ +# Shaarli 2 Hoarder converter + +Convert your shaarli bookmarks to hoarder json format. + +Simply run it like the following `uv run python convert.py <shaarli-export-file>`, +pointing th efile at your exported html file from shaarli. + +It will print out the JSON representation of those bookmarks, +readable by Hoarder. + +Run it like the following `uv run python convert.py bookmarks.html > out.json` +to generate a valid json file which you can then import thorugh the hoarder interface. diff --git a/shaarli2hoarder/convert.py b/shaarli2hoarder/convert.py new file mode 100644 index 0000000..428e5b9 --- /dev/null +++ b/shaarli2hoarder/convert.py @@ -0,0 +1,80 @@ +import json +import sys + +from bs4 import BeautifulSoup + +if len(sys.argv) < 2: + print("ERROR: Pass the bookmarks file as argument.") + sys.exit(1) +path = sys.argv[1] + + +def parse_bookmark(html_content): + soup = BeautifulSoup(html_content, "html.parser") + + bookmarks = [] + + if len(soup.find_all("dl")) != 1: + print("WARNING! More than one Bookmark element found. File may be corrupt.") + + first = True + last_desc = "" + for el in soup.find_all("a"): + bm_el = {} + url = el["href"] + title = el.string.strip() if el.string else url + + # date elements + add_date = el.get("add_date", "") + last_modified = el.get("last_modified", "") + tag_string = el.get("tags", "") + tags = tag_string.split(",") if tag_string else [] + + # TODO: url contains '/shaare/' == note type + + # desc / note + desc_el = el.parent.find_next_sibling("dd") + # Have to fix the non-closed <dt> tabs :\ + # For now, in vim do: `:%s/<DT>.*/\0<\/DT>` to add a closing el to each line + description = desc_el.contents[0].strip() if desc_el else "" + if description and description == last_desc: + description = "" + if description: + last_desc = description + + # print(f"URL: {url}, TITLE: {title}") + # print(f"ADD: {add_date}, MOD: {last_modified}, TAGS: {tags}") + # print(f"DESC: {description.strip()}") + + content = {} + if "/shaare/" in url: + content = {"type": "text", "text": description} + # print(f"Detected note-style url ({url}) turning description to content.") + description = "" + else: + content = {"type": "link", "url": url} + + bm_el = { + "title": title, + "note": description, + "createdAt": int(last_modified if last_modified else add_date), + "content": content, + } + if tags: + bm_el["tags"] = tags + if description: + bm_el["note"] = description + bookmarks.append(bm_el) + + return bookmarks + + +with open(path) as f: + contents = f.readlines() + for i, line in enumerate(contents): + if "<DT>" in line: + contents[i] = f"{line.rstrip()}</DT>" + + bookmarks = parse_bookmark("\n".join(contents)) + + print(json.dumps({"bookmarks": bookmarks}, indent=2))