dotfiles/qutebrowser/data/userscripts/qute-gemini

399 lines
14 KiB
Text
Raw Permalink Normal View History

#!/usr/bin/env python3
# qute-gemini - Open Gemini links in qutebrowser and render them as HTML
#
# SPDX-FileCopyrightText: 2019-2020 solderpunk
# SPDX-FileCopyrightText: 2020 Aaron Janse
# SPDX-FileCopyrightText: 2020 petedussin
# SPDX-FileCopyrightText: 2020-2021 Sotiris Papatheodorou
# SPDX-License-Identifier: GPL-3.0-or-later
# 2022-2023 Marty Oehme (added stand-alone script capability)
# Use it as a qutebrowser userscript to open gemini pages:
# Put this file in qutebrowser userscript folder and call command
# `:spawn --userscript qute-gemini "gemini://my-gemini-url.org"`
# or
# `:hint links userscript qute-gemini` to open from selected link
# Rename file to `qute-gemini-tab` (or create symlink) to open
# any gemini url as a new tab.
# Since the script also opens normal URLs you can even replace your
# normal link hint mapping with it (usually f or F for tabbed) and
# continue surfing like normal, only that you can now also access
# any gemini pages as if they were part of the normal http protocol.
import cgi
import html
import os
import socket
import ssl
import sys
import tempfile
import urllib.parse
from typing import Tuple
_version = "1.0.0"
_max_redirects = 5
_error_page_template = """<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>Error opening page: URL</title>
<style>
CSS
</style>
</head>
<body>
<h1>qute-gemini error</h1>
<p>Error while opening:<br/><a href="URL">URL_TEXT</a></p>
<p>DESCRIPTION</p>
</body>
</html>
"""
_status_code_desc = {
"1": "Gemini status code 1 Input. This is not implemented in qute-gemini.",
"10": "Gemini status code 10 Input. This is not implemented in qute-gemini.",
"11": """Gemini status code 11 Sensitive Input. This is not implemented
in qute-gemini.""",
"3": "Gemini status code 3 Redirect. Stopped after "
+ str(_max_redirects)
+ " redirects.",
"30": "Gemini status code 30 Temporary Redirect. Stopped after "
+ str(_max_redirects)
+ " redirects.",
"31": "Gemini status code 31 Permanent Redirect. Stopped after "
+ str(_max_redirects)
+ " redirects.",
"4": "Gemini status code 4 Temporary Failure. Server message: META",
"40": "Gemini status code 40 Temporary Failure. Server message: META",
"41": """Gemini status code 41 Server Unavailable.
The server is unavailable due to overload or maintenance. Server message: META""",
"42": """Gemini status code 42 CGI Error.
A CGI process, or similar system for generating dynamic content,
died unexpectedly or timed out. Server message: META""",
"43": """Gemini status code 43 Proxy Error.
A proxy request failed because the server was unable to successfully
complete a transaction with the remote host. Server message: META""",
"44": """Gemini status code 44 Slow Down. Rate limiting is in effect.
Please wait META seconds before making another request to this server.""",
"5": "Gemini status code 5 Permanent Failure. Server message: META",
"50": "Gemini status code 50 Permanent Failure. Server message: META",
"51": """Gemini status code 51 Not Found. The requested resource could
not be found but may be available in the future. Server message: META""",
"52": """Gemini status code 52 Gone. The resource requested is no longer
available and will not be available again. Server message: META""",
"53": """Gemini status code 53 Proxy Request Refused. The request was for
a resource at a domain not served by the server and the server does
not accept proxy requests. Server message: META""",
"59": """Gemini status code 59 Bad Request. The server was unable to
parse the client's request, presumably due to a malformed request.
Server message: META""",
"6": """Gemini status code 6 Client Certificate Required.
This is not implemented in qute-gemini.""",
}
def qute_url() -> str:
"""Get the URL passed to the script by qutebrowser."""
return os.environ.get("QUTE_URL", "")
def qute_fifo() -> str:
"""Get the FIFO or file to write qutebrowser commands to."""
return os.environ.get("QUTE_FIFO", "")
def html_href(url: str, description: str) -> str:
return "".join(['<a href="', url, '">', description, "</a>"])
def qute_gemini_css_path() -> str:
"""Return the path where the custom CSS file is expected to be."""
try:
base_dir = os.environ["XDG_DATA_HOME"]
except KeyError:
base_dir = os.path.join(os.environ["HOME"], ".local/share")
return os.path.join(base_dir, "qutebrowser/userscripts/qute-gemini.css")
def gemini_absolutise_url(base_url: str, relative_url: str) -> str:
"""Absolutise relative gemini URLs.
Adapted from gcat: https://github.com/aaronjanse/gcat
"""
if "://" not in relative_url:
# Python's URL tools somehow only work with known schemes?
base_url = base_url.replace("gemini://", "http://")
relative_url = urllib.parse.urljoin(base_url, relative_url)
relative_url = relative_url.replace("http://", "gemini://")
return relative_url
def gemini_fetch_url(url: str) -> Tuple[str, str, str, str, str]:
"""Fetch a Gemini URL and return the content as a string.
url: URL with gemini:// or no scheme.
Returns 4 strings: the content, the URL the content was fetched from, the
Gemini status code, the value of the meta field and an error message.
Adapted from gcat: https://github.com/aaronjanse/gcat
"""
# Parse the URL to get the hostname and port
parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
url = "gemini://" + url
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme != "gemini":
return "", "Received non-gemini:// URL: " + url, "59", "", "Non-gemini URL"
if parsed_url.port is not None:
useport = parsed_url.port
else:
useport = 1965
# Do the Gemini transaction, looping for redirects
redirects = 0
while True:
# Send the request
s = socket.create_connection((parsed_url.hostname, useport))
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
s = context.wrap_socket(s, server_hostname=parsed_url.netloc)
s.sendall((url + "\r\n").encode("UTF-8"))
# Get the status code and meta
fp = s.makefile("rb")
header = fp.readline().decode("UTF-8").strip()
status, meta = header.split()[:2]
# Follow up to 5 redirects
if status.startswith("3"):
url = gemini_absolutise_url(url, meta)
parsed_url = urllib.parse.urlparse(url)
redirects += 1
if redirects > _max_redirects:
# Too many redirects
break
# Otherwise we're done
else:
break
# Process the response
content = ""
error_msg = ""
# 2x Success
if status.startswith("2"):
media_type, media_type_opts = cgi.parse_header(meta)
# Decode according to declared charset defaulting to UTF-8
if meta.startswith("text/gemini"):
charset = media_type_opts.get("charset", "UTF-8")
content = fp.read().decode(charset)
else:
error_msg = "Expected media type text/gemini but received " + media_type
# Handle errors
else:
# Try matching a 2-digit and then a 1-digit status code
try:
error_msg = _status_code_desc[status[0:2]]
except KeyError:
try:
error_msg = _status_code_desc[status[0]]
except KeyError:
error_msg = "The server sent back something weird."
# Substitute the contents of meta into the error message if needed
error_msg = error_msg.replace("META", meta)
return content, url, status, meta, error_msg
def gemtext_to_html(
gemtext: str, url: str, original_url: str, status: str, meta: str
) -> str:
"""Convert gemtext to HTML.
title: Used as the document title.
url: The URL the gemtext was received from. Used to resolve
relative URLs in the gemtext content.
original_url: The URL the original request was made at.
status: The Gemini status code returned by the server.
meta: The meta returned by the server.
Returns the HTML representation as a string.
"""
# Accumulate converted gemtext lines
lines = [
'<?xml version="1.0" encoding="UTF-8"?>',
'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">',
"\t<head>",
"\t\t<title>" + html.escape(url) + "</title>",
"\t\t<style>",
get_css(),
"\t\t</style>",
"\t</head>",
"\t<body>",
"\t<article>",
]
in_pre = False
in_list = False
# Add an extra newline to ensure list tags are closed properly
for line in (gemtext + "\n").splitlines():
# Add the list closing tag
if not line.startswith("*") and in_list:
lines.append("\t\t</ul>")
in_list = False
# Blank line, ignore
if not line:
pass
# Link
elif line.startswith("=>"):
ln = line[2:].split(None, 1)
# Use the URL itself as the description if there is none
if len(ln) == 1:
ln.append(ln[0])
# Encode the link description
ln[1] = html.escape(ln[1])
# Resolve relative URLs
ln[0] = gemini_absolutise_url(url, ln[0])
lines.append("\t\t<p>" + html_href(ln[0], ln[1]) + "</p>")
# Preformated toggle
elif line.startswith("```"):
if in_pre:
lines.append("\t\t</pre>")
else:
lines.append("\t\t<pre>")
in_pre = not in_pre
# Preformated
elif in_pre:
lines.append(line)
# Header
elif line.startswith("###"):
lines.append("\t\t<h3>" + html.escape(line[3:].strip()) + "</h3>")
elif line.startswith("##"):
lines.append("\t\t<h2>" + html.escape(line[2:].strip()) + "</h2>")
elif line.startswith("#"):
lines.append("\t\t<h1>" + html.escape(line[1:].strip()) + "</h1>")
# List
elif line.startswith("*"):
if not in_list:
lines.append("\t\t<ul>")
in_list = True
lines.append("\t\t\t<li>" + html.escape(line[1:].strip()) + "</li>")
# Quote
elif line.startswith(">"):
lines.extend(
[
"\t\t<blockquote>",
"\t\t\t<p>" + line[1:].strip() + "</p>",
"\t\t</blockquote>",
]
)
# Normal text
else:
lines.append("\t\t<p>" + html.escape(line.strip()) + "</p>")
url_html = html_href(url, html.escape(url))
original_url_html = html_href(original_url, html.escape(original_url))
lines.extend(
[
"",
"\t</article>",
"\t<details>",
"\t\t<summary>",
"\t\t\tContent from " + url_html,
"\t\t</summary>",
"\t\t<dl>",
"\t\t\t<dt>Original URL</dt>",
"\t\t\t<dd>" + original_url_html + "</dd>",
"\t\t\t<dt>Status</dt>",
"\t\t\t<dd>" + status + "</dd>",
"\t\t\t<dt>Meta</dt>",
"\t\t\t<dd>" + meta + "</dd>",
"\t\t\t<dt>Fetched by</dt>",
'\t\t\t<dd><a href="https://git.sr.ht/~sotirisp/qute-gemini">qute-gemini '
+ str(_version)
+ "</a></dd>",
"\t\t</dl>",
"\t</details>",
"\t</body>",
"</html>",
]
)
return "\n".join(lines)
def get_css() -> str:
# Search for qute-gemini.css in the directory this script is located in
css_file = qute_gemini_css_path()
if os.path.isfile(css_file):
# Return the file contents
with open(css_file, "r") as f:
return f.read().strip()
else:
# Use no CSS
return ""
def qute_error_page(url: str, description: str) -> str:
"""Return a data URI error page like qutebrowser does.
url: The URL of the page that failed to load.
description: A description of the error.
Returns a data URI containing the error page.
"""
# Generate the HTML error page
html_page = _error_page_template.replace("URL", url)
html_page = html_page.replace("URL_TEXT", html.escape(url))
html_page = html_page.replace("DESCRIPTION", html.escape(description))
html_page = html_page.replace("CSS", get_css())
# URL encode and return as a data URI
return "data:text/html;charset=UTF-8," + urllib.parse.quote(html_page)
def open_gemini(url: str) -> str:
"""Open Gemini URL in qutebrowser."""
# Get the Gemini content
content, content_url, status, meta, error_msg = gemini_fetch_url(url)
if error_msg:
# Generate an error page in a data URI
open_url = qute_error_page(url, error_msg)
else:
# Success, convert to HTML in a temporary file
tmpf = tempfile.NamedTemporaryFile("w", suffix=".html", delete=False)
tmp_filename = tmpf.name
tmpf.close()
if not tmp_filename:
return ""
with open(tmp_filename, "w") as f:
f.write(gemtext_to_html(content, content_url, url, status, meta))
open_url = " file://" + tmp_filename
# Open the HTML file in qutebrowser
return open_url
def open_url(url: str, open_args: str) -> None:
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme == "gemini":
to_open = open_gemini(url)
else:
to_open = url
if not to_open:
return
fifo = qute_fifo()
if fifo and fifo != "":
with open(fifo, "w") as qfifo:
qfifo.write(f"open {open_args} {to_open}")
return
os.system(f"xdg-open {to_open}")
if __name__ == "__main__":
# Open in the current or a new tab depending on the script name
if sys.argv[0].endswith("-tab"):
open_args = "-b -r"
else:
open_args = ""
# Take url to open as argument or from qutebrowser url
if len(sys.argv) > 1:
url = sys.argv[1]
else:
url = qute_url()
# Select how to open the URL depending on its scheme
open_url(url, open_args)