shaarli2hoarder/convert.py
2025-02-12 18:25:51 +01:00

155 lines
9.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# target
# {
# 68 │ "createdAt": 1739262800,
# 69 │ "title": "Errands Simple (GNOME) Tasks app",
# 70 │ "tags": [
# 71 │ "linux",
# 72 │ "opensource",
# 73 │ "productivity",
# 74 │ "Task Management",
# 75 │ "GNOME Apps",
# 76 │ "To-Do List",
# 77 │ "Linux Utility"
# 78 │ ],
# 79 │ "content": {
# 80 │ "type": "link",
# 81 │ "url": "https://apps.gnome.org/List/"
# 82 │ },
# 83 │ "note": "Errands is an open source GNOME app!"
# 84 │ },
# from
# <DL><p>
# <DT><A HREF="https://ggia.berkeley.edu/" ADD_DATE="1557865058" LAST_MODIFIED="1616417694" PRIVATE="0" TAGS="life">Greater Good in Action</A>
# <DD>Science based practices for a meaningful life
# <DT><A HREF="https://apps.gnome.org/List/" ADD_DATE="1739262800" LAST_MODIFIED="1739262951" PRIVATE="0" TAGS="productivity,linux,opensource">Errands Simple (GNOME) Tasks app</A>
# <DD>Kept _very_ simple as a todo list application. Can sync with CalDAV/Nextcloud, or manually import lists. Can display today's tasks, multiple lists and has a trashcan functionality but not much more.
#
# Supports displaying tags, but have not even been able to find a way to filter by tag.
# <DT><A HREF="https://easyeda.com/" ADD_DATE="1739221882" LAST_MODIFIED="1739221883" PRIVATE="0" TAGS="hardware">EasyEDA - Online PCB design & circuit simulator</A>
# <DD>EasyEDA is a free and easy to use circuit design, circuit simulator and pcb design that runs in your web browser.
#
# PCB Layouting software. Has a nice free (beer) version but is not free (freedom) software. Closely integrated w/ JLCPCB so you can see which parts you can get for what money.
# <DT><A HREF="https://awesome-docker-compose.com/" ADD_DATE="1739221700" LAST_MODIFIED="1739221701" PRIVATE="0" TAGS="hosting,docker,library">Awesome Docker Compose</A>
# <DD>A collection of self-hosted apps you can quickly set up with Docker Compose.
#
# Nice collection of compose files. Fairly simple and not _super_ useful if you've done a couple composes, but they can provide good starter boilerplate until then.
# <DT><A HREF="https://www.kicad.org/" ADD_DATE="1739221520" LAST_MODIFIED="1739221521" PRIVATE="0" TAGS="hardware,opensource">KiCad EDA - Schematic Capture & PCB Design Software</A>
# <DD>Open source EDA / electronics CAD software for Windows, macOS and Linux. Use schematic capture, create PCB designs and view them in 3D, all forever free.
#
# Open source PCB schematic design.
# <DT><A HREF="https://github.com/madeofpendletonwool/PinePods" ADD_DATE="1739221291" LAST_MODIFIED="1739221295" PRIVATE="0" TAGS="podcast,opensource,audio,hosting">GitHub - madeofpendletonwool/PinePods: Pinepods is a complete podcast management system and allows you to play, download, and keep track of podcasts you enjoy. All self hosted and enjoyed on your own server!</A>
# <DD>Pinepods is a complete podcast management system and allows you to play, download, and keep track of podcasts you enjoy.
#
# Integrates with gpodder sync! Integrates with Nextcloud gpodder app! Can thus use android clients like AntennaPod to listen and still have your own server! This makes me happy!
# <DT><A HREF="https://github.com/usebruno/bruno" ADD_DATE="1739181843" LAST_MODIFIED="1739221293" PRIVATE="0" TAGS="networking,opensource">GitHub - usebruno/bruno: Opensource IDE For Exploring and Testing Api's (lightweight alternative to postman/insomnia)</A>
# <DD>Opensource IDE For Exploring and Testing Api's (lightweight alternative to postman/insomnia)
#
# FOSS and fully 'offline' (except for where you send your requests) alternative to Postman
# <DT><A HREF="https://codeberg.org/mergiraf/mergiraf" ADD_DATE="1739002063" LAST_MODIFIED="1739002064" PRIVATE="0" TAGS="git">mergiraf/mergiraf: A syntax-aware git merge driver for a growing collection of programming languages and file formats. - Codeberg.org</A>
# <DD>mergiraf - A syntax-aware git merge driver for a growing collection of programming languages and file formats.
#
# Interesting: A merge driver which tries to do its best to really only leave _conflicts_ over,
# using its knowledge of various syntax trees of languages.
# <DT><A HREF="https://www.visualjj.com/" ADD_DATE="1738942177" LAST_MODIFIED="1738942178" PRIVATE="0" TAGS="jujutsu">VisualJJ Jujutsu in Visual Studio Code</A>
# <DD>"Publish changes on GitHub in seconds. Effortlessly manage branches. Resolve conflicts with confidence."
#
# A visual (GUI) interface for JJ, including interesting PR merge ability with GitHub (which is a little more difficult with jj usually).
#
# Proprietary but currently free (beer) software.
# <DT><A HREF="https://github.com/kyoheiu/felix" ADD_DATE="1738920203" LAST_MODIFIED="1738920205" PRIVATE="0" TAGS="commandline">GitHub - kyoheiu/felix: tui file manager with vim-like key mapping</A>
# <DD>tui file manager with vim-like key mapping. Yet another file manager. Has nicely working-by-default (in wezterm) jumping by integrating zoxide and image display by integrating chafa.
# <DT><A HREF="https://github.com/dahlia/iterfzf" ADD_DATE="1738919873" LAST_MODIFIED="1738919875" PRIVATE="0" TAGS="python,commandline">GitHub - dahlia/iterfzf: Pythonic interface to fzf, a CLI fuzzy finder</A>
# <DD>Pythonic interface to fzf, a CLI fuzzy finder. Super nice library to use - does come bundled with fzf binary in the wheels though.
# <DT><A HREF="https://github.com/mrusme/neonmodem" ADD_DATE="1738919608" LAST_MODIFIED="1738919609" PRIVATE="0" TAGS="commandline">GitHub - mrusme/neonmodem: Neon Modem Overdrive</A>
# <DD>TUI application for: Lemmy, Hackernews, Lobsters, Discourse (forums). A little.. overloaded in my opinion (including big huge splash screen when starting) but at the very least nice to learn from its code.
# <DT><A HREF="https://dhall-lang.org/" ADD_DATE="1738859355" LAST_MODIFIED="1738859356" PRIVATE="0" TAGS="hosting,programming">The Dhall configuration language</A>
# <DD>Configuration language which seeks to be maintainable and can be mapped onto a wide variety of other config langs (yaml, toml, ini etc). Functional and stable, has 'behavior-driven' hashing to compare versions and semantic diffs. Very cool.
# <DT><A HREF="https://codeberg.org/flohmarkt/flohmarkt" ADD_DATE="1738836981" LAST_MODIFIED="1738836982" PRIVATE="0" TAGS="life,hosting,fediverse">flohmarkt/flohmarkt: federated decentral classified ad software using activitypub - Codeberg.org</A>
# <DD>Up-and-coming activitypub/fediverse implementation of a bulletin board software (similar to Craigslist/Kleinanzeigen/similar small announcement software), fully federated.
#
# Not entirely sure how it works with location-aware options, so you could search within a certain radius only which is an important part of any such list.
# <DT><A HREF="https://github.com/monasticacademy/httptap?tab=readme-ov-file" ADD_DATE="1738836703" LAST_MODIFIED="1738836704" PRIVATE="0" TAGS="networking,golang,commandline">GitHub - monasticacademy/httptap: View HTTP/HTTPS requests made by any Linux program</A>
# <DD>View HTTP/HTTPS requests made by any Linux program.
# </DL><p>
import json
import sys
from bs4 import BeautifulSoup
if len(sys.argv) < 2:
print("ERROR: Pass the bookmarks file as argument.")
sys.exit(1)
path = sys.argv[1]
def parse_bookmark(html_content):
soup = BeautifulSoup(html_content, "html.parser")
bookmarks = []
if len(soup.find_all("dl")) != 1:
print("WARNING! More than one Bookmark element found. File may be corrupt.")
first = True
last_desc = ""
for el in soup.find_all("a"):
bm_el = {}
url = el["href"]
title = el.string.strip() if el.string else url
# date elements
add_date = el.get("add_date", "")
last_modified = el.get("last_modified", "")
tag_string = el.get("tags", "")
tags = tag_string.split(",") if tag_string else []
# TODO: url contains '/shaare/' == note type
# desc / note
desc_el = el.parent.find_next_sibling("dd")
# Have to fix the non-closed <dt> tabs :\
# For now, in vim do: `:%s/<DT>.*/\0<\/DT>` to add a closing el to each line
description = desc_el.contents[0].strip() if desc_el else ""
if description and description == last_desc:
description = ""
if description:
last_desc = description
# print(f"URL: {url}, TITLE: {title}")
# print(f"ADD: {add_date}, MOD: {last_modified}, TAGS: {tags}")
# print(f"DESC: {description.strip()}")
content = {}
if "/shaare/" in url:
content = {"type": "text", "text": description}
# print(f"Detected note-style url ({url}) turning description to content.")
description = ""
else:
content = {"type": "link", "url": url}
bm_el = {
"title": title,
"note": description,
"createdAt": int(last_modified if last_modified else add_date),
"content": content,
}
if tags:
bm_el["tags"] = tags
if description:
bm_el["note"] = description
bookmarks.append(bm_el)
return bookmarks
with open(path) as f:
contents = f.readlines()
for i, line in enumerate(contents):
if "<DT>" in line:
contents[i] = f"{line.rstrip()}</DT>"
bookmarks = parse_bookmark("\n".join(contents))
print(json.dumps({"bookmarks": bookmarks}, indent=2))