Refactor wallabag conversion to have simple cli

2025-03-12 14:16:59 +01:00 · 2025-03-12 14:16:59 +01:00 · 59aaa74d76
commit 59aaa74d76
parent 813327f939
8 changed files with 149 additions and 79 deletions
--- a/wallabag2hoarder/convert.py
+++ b/wallabag2hoarder/convert.py
@ -1,52 +1,61 @@
 #!/usr/bin/env python

-from datetime import datetime
+import argparse
 import json
 import sys
 from pathlib import Path

-from jinja2 import Template
+from convert_netscape import Netscape_Converter
+from convert_native_json import JSON_Converter

-# USAGE: ./convert.py file exportfile

-if len(sys.argv) < 2 or not Path(sys.argv[1]).is_file():
-    print("Please provide a file to import as the first argument.")
-    sys.exit(1)
+def main():
+    parser = argparse.ArgumentParser(description="Process input file(s)")
+    parser.add_argument("input", help="Input file")
+    parser.add_argument("--output", help="Output file")
+    parser.add_argument(
+        "--flavour", choices=["html", "json"], default="json", help="Flavour of output"
+    )
+    # TODO implement
+    parser.add_argument(
+        "--num", type=int, default=10, help="Number of items to process"
+    )

-INPUT_FILE = Path(sys.argv[1])
-OUTPUT_FILE = (
-    Path(sys.argv[2]) if len(sys.argv) > 2 else Path("exported_bookmarks.json")
-)
-print(f"[DEBUG] inputfile: {INPUT_FILE}")
-print(f"[DEBUG] outputfile: {OUTPUT_FILE}")
+    args = parser.parse_args()
+    if not args.input:
+        print("Please provide a file to import as the first argument.")
+        sys.exit(1)

-# Read JSON file
-with open(INPUT_FILE, "r") as f:
-    data_in = json.load(f)
+    INPUT_FILE = Path(args.input)
+    if not INPUT_FILE.exists():
+        print(f"Input file {INPUT_FILE} can not be accessed.")
+        sys.exit(1)

-# NOTE: Wallabag annotation format is as follows:
-# [{'text': '', 'quote': "A while back they raised their prices, which lost them a lot of subscribers, because they were losing money per search at the old prices. They were actually still losing money per search on the new prices. They eventually lowered the prices back down a bit (and maybe raised them again? I've completely lost the plot on their pricing at this point) and have claimed that at 25,000 users they would be breaking even.", 'ranges': [{'start': '/p[6]', 'startOffset': '429', 'end': '/p[6]', 'endOffset': '844'}]}]
-# with /p signifying the paragraph? Hoarder only has a concept of offset, so probably have to transform the paragraphs into lengths and then add them up to convert from one format to the other.
+    # Read JSON file
+    with open(INPUT_FILE, "r") as f:
+        data = json.load(f)

-print(f"[DEBUG] Found {len(data_in)} wallabag entries.")
+    if args.num:
+        data = data[: args.num]

-data_out = {"bookmarks":[]}
-n = 0
-for entry in data_in:
-    bm = {
-        "createdAt": datetime.strptime(entry["created_at"], "%Y-%m-%dT%H:%M:%S%z").timestamp(),
-        "content": {"type": "link", "url": entry["url"]},
-        "title": entry["title"] if entry["title"] else None,
-        "tags": entry["tags"] + ["wallabag"],
-        # FIXME: Need to wait for better hoarder annotation handling to import them in a good format
-        # for now we just turn them _all_ into a single note.
-        # DOABLE WITH API? https://docs.hoarder.app/api/create-a-new-highlight
-        "note": json.dumps(entry["annotations"]) if entry["annotations"] else None,
-    }
-    data_out["bookmarks"].append(bm)
-    n += 1
-    if n > 50:
-        break
+    OUTPUT=""
+    OUTPUT_FILE = args.output
+    print(f"[DEBUG] input: {INPUT_FILE}")
+    print(f"[DEBUG] output: {OUTPUT_FILE}")
+    match args.flavour:
+        case "html":
+            print("[DEBUG] style: html")
+            OUTPUT = Netscape_Converter(data).convert()
+        case "json":
+            print("[DEBUG] style: json")
+            OUTPUT = JSON_Converter(data).convert()

-with open(OUTPUT_FILE, "w") as f:
-    json.dump(data_out, f)
+    if OUTPUT_FILE:
+        with open(OUTPUT_FILE, "w") as f:
+            f.write(OUTPUT)
+    else:
+        print(OUTPUT)
+
+
+if __name__ == "__main__":
+    main()