Add API converter
This commit is contained in:
parent
59aaa74d76
commit
6f79a12d2b
5 changed files with 190 additions and 7 deletions
|
@ -10,6 +10,7 @@ dependencies = [
|
|||
# "netscape-bookmarks-file-parser",
|
||||
# "pyjwt>=2.10.1",
|
||||
# "requests>=2.32.3",
|
||||
"requests>=2.32.3",
|
||||
]
|
||||
|
||||
[tool.pyright]
|
||||
|
|
70
uv.lock
70
uv.lock
|
@ -6,3 +6,73 @@ requires-python = ">=3.13"
|
|||
name = "2hoarder"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "requests" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "requests", specifier = ">=2.32.3" }]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2025.1.31"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "3.4.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 },
|
||||
{ url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 },
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 },
|
||||
{ url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 },
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.10"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.32.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "idna" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 },
|
||||
]
|
||||
|
|
|
@ -7,16 +7,18 @@ from pathlib import Path
|
|||
|
||||
from convert_netscape import Netscape_Converter
|
||||
from convert_native_json import JSON_Converter
|
||||
from convert_api import API_Converter
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process input file(s)")
|
||||
parser.add_argument("input", help="Input file")
|
||||
parser.add_argument("--output", help="Output file")
|
||||
parser.add_argument("--hoarder-url", help="Hoarder URL destination")
|
||||
parser.add_argument("--hoarder-key", help="Hoarder API key")
|
||||
parser.add_argument(
|
||||
"--flavour", choices=["html", "json"], default="json", help="Flavour of output"
|
||||
"--flavour", choices=["api", "html", "json"], default="json", help="Flavour of output"
|
||||
)
|
||||
# TODO implement
|
||||
parser.add_argument(
|
||||
"--num", type=int, default=10, help="Number of items to process"
|
||||
)
|
||||
|
@ -49,6 +51,15 @@ def main():
|
|||
case "json":
|
||||
print("[DEBUG] style: json")
|
||||
OUTPUT = JSON_Converter(data).convert()
|
||||
case "api":
|
||||
print("[DEBUG] style: api")
|
||||
if not args.hoarder_url or not args.hoarder_key:
|
||||
print("Please provide valid hoarder url and api key.")
|
||||
sys.exit(1)
|
||||
OUTPUT = API_Converter(data, args.hoarder_url, args.hoarder_key).convert()
|
||||
case _:
|
||||
print("No valid conversion flavour given.")
|
||||
sys.exit(1)
|
||||
|
||||
if OUTPUT_FILE:
|
||||
with open(OUTPUT_FILE, "w") as f:
|
||||
|
|
106
wallabag2hoarder/convert_api.py
Normal file
106
wallabag2hoarder/convert_api.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
import json
|
||||
|
||||
from base import Wallabag_Converter
|
||||
|
||||
# test_api_key: ak1_d202e69375c111461882_b0271d0e739ce0234f96
|
||||
from requests import Response, request
|
||||
|
||||
# NOTE: Wallabag annotation format is as follows:
|
||||
# [{'text': '', 'quote': "A while back they raised their prices, which lost them a lot of subscribers, because they were losing money per search at the old prices. They were actually still losing money per search on the new prices. They eventually lowered the prices back down a bit (and maybe raised them again? I've completely lost the plot on their pricing at this point) and have claimed that at 25,000 users they would be breaking even.", 'ranges': [{'start': '/p[6]', 'startOffset': '429', 'end': '/p[6]', 'endOffset': '844'}]}]
|
||||
# with /p signifying the paragraph? Hoarder only has a concept of offset, so probably have to transform the paragraphs into lengths and then add them up to convert from one format to the other.
|
||||
|
||||
|
||||
class API_Converter(Wallabag_Converter):
|
||||
def __init__(self, data: list[dict], hoarder_url: str, hoarder_key: str):
|
||||
self.data = data
|
||||
self.url = hoarder_url
|
||||
self.key = hoarder_key
|
||||
|
||||
self.api_url = f"{self.url}/api/v1"
|
||||
self.bm_url = f"{self.api_url}/bookmarks"
|
||||
self.hl_url = f"{self.api_url}/highlights"
|
||||
self.headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {self.key}",
|
||||
}
|
||||
|
||||
def convert(self) -> str:
|
||||
print(f"[DEBUG] Found {len(self.data)} wallabag entries.")
|
||||
|
||||
for entry in self.data:
|
||||
# bm = {
|
||||
# "content": {"type": "link", "url": entry["url"]},
|
||||
# }
|
||||
response = self._create_bookmark(entry).json()
|
||||
id = response["id"]
|
||||
|
||||
if "alreadyExists" in response and response["alreadyExists"]:
|
||||
print(f"[INFO] Skip adding url: {entry['url']} already exists.")
|
||||
|
||||
if entry["tags"]:
|
||||
self._create_tags(id, entry)
|
||||
|
||||
if entry["annotations"]:
|
||||
self._create_annotations(id, entry)
|
||||
|
||||
break
|
||||
return json.dumps("Done.")
|
||||
|
||||
def _create_bookmark(self, entry) -> Response:
|
||||
payload = json.dumps(
|
||||
{
|
||||
"title": entry["title"] if entry["title"] else None,
|
||||
"archived": True if entry["is_archived"] == 1 else False,
|
||||
"favourited": True if entry["is_starred"] == 1 else False,
|
||||
"type": "link",
|
||||
"url": entry["url"],
|
||||
"tags": entry["tags"] + ["_wallabag"],
|
||||
# "note": "string",
|
||||
# "summary": "string",
|
||||
# "createdAt": datetime.strptime(
|
||||
# entry["created_at"], "%Y-%m-%dT%H:%M:%S%z"
|
||||
# ).timestamp(),
|
||||
"createdAt": entry["created_at"],
|
||||
}
|
||||
)
|
||||
response = request("POST", self.bm_url, headers=self.headers, data=payload)
|
||||
return response
|
||||
|
||||
def _create_tags(self, id, entry) -> Response:
|
||||
payload = json.dumps({"tags": [{"tagName": tag} for tag in entry["tags"]]})
|
||||
print(f"[DEBUG] Found {len(entry['tags'])} tags for {entry['url']}.")
|
||||
tag_attach_url = f"{self.bm_url}/{id}/tags"
|
||||
response = request(
|
||||
"POST",
|
||||
tag_attach_url,
|
||||
headers=self.headers,
|
||||
data=payload,
|
||||
)
|
||||
print(f"[DEBUG] TAGS: {response.json()}")
|
||||
return response
|
||||
|
||||
def _create_annotations(self, entry_id, entry) -> Response:
|
||||
payload = json.dumps(
|
||||
{
|
||||
"bookmarkId": entry_id,
|
||||
"startOffset": 100,
|
||||
"endOffset": 200,
|
||||
"color": "yellow",
|
||||
"text": "mytext",
|
||||
"note": "mynote",
|
||||
}
|
||||
)
|
||||
annot_url = f"{self.api_url}/highlights"
|
||||
for annot in entry["annotations"]:
|
||||
response = request(
|
||||
"POST",
|
||||
annot_url,
|
||||
headers=self.headers,
|
||||
data=payload,
|
||||
)
|
||||
print(response.json())
|
||||
|
||||
return response
|
||||
|
||||
def _calc_annot_offsets(self, content): ...
|
|
@ -3,11 +3,6 @@ from datetime import datetime
|
|||
|
||||
from base import Wallabag_Converter
|
||||
|
||||
# NOTE: Wallabag annotation format is as follows:
|
||||
# [{'text': '', 'quote': "A while back they raised their prices, which lost them a lot of subscribers, because they were losing money per search at the old prices. They were actually still losing money per search on the new prices. They eventually lowered the prices back down a bit (and maybe raised them again? I've completely lost the plot on their pricing at this point) and have claimed that at 25,000 users they would be breaking even.", 'ranges': [{'start': '/p[6]', 'startOffset': '429', 'end': '/p[6]', 'endOffset': '844'}]}]
|
||||
# with /p signifying the paragraph? Hoarder only has a concept of offset, so probably have to transform the paragraphs into lengths and then add them up to convert from one format to the other.
|
||||
|
||||
|
||||
class JSON_Converter(Wallabag_Converter):
|
||||
def __init__(self, data: list[dict]):
|
||||
self.data = data
|
||||
|
|
Loading…
Reference in a new issue