Add shaarli2hoarder
This commit is contained in:
parent
bd04d5bbde
commit
36252f6f19
5 changed files with 117 additions and 1 deletions
|
@ -4,7 +4,16 @@ version = "0.1.0"
|
|||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = []
|
||||
dependencies = [
|
||||
# REQUIRED FOR CURRENT SHAARLI2HOARDER IMPLEMENTATION ONLY
|
||||
# "beautifulsoup4>=4.13.3",
|
||||
# "netscape-bookmarks-file-parser",
|
||||
# "pyjwt>=2.10.1",
|
||||
# "requests>=2.32.3",
|
||||
]
|
||||
|
||||
[tool.pyright]
|
||||
typeCheckingMode = "basic"
|
||||
|
||||
[tool.uv.sources]
|
||||
netscape-bookmarks-file-parser = { git = "https://github.com/FlyingWolFox/Netscape-Bookmarks-File-Parser.git" }
|
||||
|
|
14
shaarli2hoarder/.gitignore
vendored
Normal file
14
shaarli2hoarder/.gitignore
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
*.json
|
||||
*.html
|
||||
|
||||
# Python-generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# Virtual environments
|
||||
.venv
|
||||
|
1
shaarli2hoarder/.python-version
Normal file
1
shaarli2hoarder/.python-version
Normal file
|
@ -0,0 +1 @@
|
|||
3.13
|
12
shaarli2hoarder/README.md
Normal file
12
shaarli2hoarder/README.md
Normal file
|
@ -0,0 +1,12 @@
|
|||
# Shaarli 2 Hoarder converter
|
||||
|
||||
Convert your shaarli bookmarks to hoarder json format.
|
||||
|
||||
Simply run it like the following `uv run python convert.py <shaarli-export-file>`,
|
||||
pointing th efile at your exported html file from shaarli.
|
||||
|
||||
It will print out the JSON representation of those bookmarks,
|
||||
readable by Hoarder.
|
||||
|
||||
Run it like the following `uv run python convert.py bookmarks.html > out.json`
|
||||
to generate a valid json file which you can then import thorugh the hoarder interface.
|
80
shaarli2hoarder/convert.py
Normal file
80
shaarli2hoarder/convert.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
import json
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("ERROR: Pass the bookmarks file as argument.")
|
||||
sys.exit(1)
|
||||
path = sys.argv[1]
|
||||
|
||||
|
||||
def parse_bookmark(html_content):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
bookmarks = []
|
||||
|
||||
if len(soup.find_all("dl")) != 1:
|
||||
print("WARNING! More than one Bookmark element found. File may be corrupt.")
|
||||
|
||||
first = True
|
||||
last_desc = ""
|
||||
for el in soup.find_all("a"):
|
||||
bm_el = {}
|
||||
url = el["href"]
|
||||
title = el.string.strip() if el.string else url
|
||||
|
||||
# date elements
|
||||
add_date = el.get("add_date", "")
|
||||
last_modified = el.get("last_modified", "")
|
||||
tag_string = el.get("tags", "")
|
||||
tags = tag_string.split(",") if tag_string else []
|
||||
|
||||
# TODO: url contains '/shaare/' == note type
|
||||
|
||||
# desc / note
|
||||
desc_el = el.parent.find_next_sibling("dd")
|
||||
# Have to fix the non-closed <dt> tabs :\
|
||||
# For now, in vim do: `:%s/<DT>.*/\0<\/DT>` to add a closing el to each line
|
||||
description = desc_el.contents[0].strip() if desc_el else ""
|
||||
if description and description == last_desc:
|
||||
description = ""
|
||||
if description:
|
||||
last_desc = description
|
||||
|
||||
# print(f"URL: {url}, TITLE: {title}")
|
||||
# print(f"ADD: {add_date}, MOD: {last_modified}, TAGS: {tags}")
|
||||
# print(f"DESC: {description.strip()}")
|
||||
|
||||
content = {}
|
||||
if "/shaare/" in url:
|
||||
content = {"type": "text", "text": description}
|
||||
# print(f"Detected note-style url ({url}) turning description to content.")
|
||||
description = ""
|
||||
else:
|
||||
content = {"type": "link", "url": url}
|
||||
|
||||
bm_el = {
|
||||
"title": title,
|
||||
"note": description,
|
||||
"createdAt": int(last_modified if last_modified else add_date),
|
||||
"content": content,
|
||||
}
|
||||
if tags:
|
||||
bm_el["tags"] = tags
|
||||
if description:
|
||||
bm_el["note"] = description
|
||||
bookmarks.append(bm_el)
|
||||
|
||||
return bookmarks
|
||||
|
||||
|
||||
with open(path) as f:
|
||||
contents = f.readlines()
|
||||
for i, line in enumerate(contents):
|
||||
if "<DT>" in line:
|
||||
contents[i] = f"{line.rstrip()}</DT>"
|
||||
|
||||
bookmarks = parse_bookmark("\n".join(contents))
|
||||
|
||||
print(json.dumps({"bookmarks": bookmarks}, indent=2))
|
Loading…
Reference in a new issue