From 7e2da571e75025689088ff118fe955c7b6e24b49 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Fri, 9 Dec 2022 12:51:10 +0100 Subject: [PATCH] qutebrowser: Add doi2scihub script Added script which takes you to the corresponding sci-hub entry for any DOI. DOIs can be passed in three ways: - via hinted link (shortcut `;p` to start hinting) - via selected text (select text then invoke `send-to-scihub` command with `"p`) - or from meta tags in current page (invoke `send-to-scihub` command with `"p` when on article page) It will grab the newest sci-hub link and attempt to bring you the corresponding pdf file. --- qutebrowser/.config/qutebrowser/alias.py | 2 + qutebrowser/.config/qutebrowser/maps.py | 2 + qutebrowser/.config/qutebrowser/url.py | 2 +- .../share/qutebrowser/userscripts/doi2scihub | 105 ++++++++++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100755 qutebrowser/.local/share/qutebrowser/userscripts/doi2scihub diff --git a/qutebrowser/.config/qutebrowser/alias.py b/qutebrowser/.config/qutebrowser/alias.py index 95026ea..e54b389 100644 --- a/qutebrowser/.config/qutebrowser/alias.py +++ b/qutebrowser/.config/qutebrowser/alias.py @@ -16,6 +16,8 @@ c.aliases["send-to-archive"] = "open https://web.archive.org/web/{url}" # save current page to pdf file c.aliases["save_to_pdf"] = "spawn --userscript pagetopdf.sh" +# open sci-hub pdf for doi +c.aliases["send-to-scihub"] = "spawn --userscript doi2scihub" # translate current page / selection with google translate c.aliases["translate-page-google"] = "spawn --userscript translate_google.sh" diff --git a/qutebrowser/.config/qutebrowser/maps.py b/qutebrowser/.config/qutebrowser/maps.py index 2b1a0d6..511070d 100644 --- a/qutebrowser/.config/qutebrowser/maps.py +++ b/qutebrowser/.config/qutebrowser/maps.py @@ -77,6 +77,8 @@ config.bind('"T', "translate-selection-google", mode="normal") config.bind('"q', "show-qr") config.bind(lleader + "r", "spawn --userscript readability") +config.bind('"p', "send-to-scihub", mode="normal") # view current page doi on scihub +config.bind(";p", "hint links run send-to-scihub") # view linked doi on scihub # set stylesheets for the browser to use config.bind( diff --git a/qutebrowser/.config/qutebrowser/url.py b/qutebrowser/.config/qutebrowser/url.py index c2d63f0..3048daf 100644 --- a/qutebrowser/.config/qutebrowser/url.py +++ b/qutebrowser/.config/qutebrowser/url.py @@ -1,7 +1,7 @@ from qutebrowser.api import interceptor c.url.searchengines = { - "#sci": "https://sci-hub.do/{}", + "sci": "https://sci-hub.ru/{}", "DEFAULT": "https://search.martyoeh.me/?q={}", "al": "https://wiki.archlinux.org/index.php/{}", "alt": "https://alternativeto.net/software/{}/?license=opensource", diff --git a/qutebrowser/.local/share/qutebrowser/userscripts/doi2scihub b/qutebrowser/.local/share/qutebrowser/userscripts/doi2scihub new file mode 100755 index 0000000..5f47a84 --- /dev/null +++ b/qutebrowser/.local/share/qutebrowser/userscripts/doi2scihub @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Goes to the sci-hub page for the current article, based on DOI. + +Can be invoked with DOI on a page selected, +through the hinting mode when selecting a DOI link +or on a publisher page (any page where doi meta-tags are set) - +works on ScienceDirect, Taylor&Francis, Springer, etc. + +Updates its sci-hub link based on the one listed on sci-hub wiki page. + +Based on the work in +https://github.com/cadadr/configuration/blob/4b6a241d04d113f322b960890a0d0a0ab783a7b3/dotfiles/qutebrowser/userscripts/doi +with much gratitude. +""" + + +import os +import re +import sys +import html.parser +import requests + + +mode = os.getenv("QUTE_MODE") + +text = None + + +class DoiTagParser(html.parser.HTMLParser): + doi = None + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + if self.doi == None and tag == "meta": + if ( + ("name", "citation_doi") in attrs + or ("name", "dc.identifier") in attrs + or ("scheme", "doi") in attrs + ): + for att in attrs: + if att[0] == "content": + self.doi = att[1] + break + + +class SciHubLinkParser(html.parser.HTMLParser): + current = None + link_patt = re.compile(r"^(?Phttps?://sci-hub\..+)/about$") + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + if self.current == None and tag == "a": + for att in attrs: + if att[0] == "href" and self.link_patt.match(att[1] or ""): + match = self.link_patt.match(att[1] or "") + self.current = match["url"] if match and match["url"] else None + + +def get_scihub_url(wiki_page: str = "https://wikiless.org/wiki/Sci-Hub"): + resp = requests.get(wiki_page) + parser = SciHubLinkParser() + parser.feed(resp.text) + return parser.current or "https://sci-hub.ru" + + +if mode == "hints": + text = os.getenv("QUTE_URL", "").strip() +elif mode == "command" and os.getenv("QUTE_SELECTED_TEXT"): + text = os.getenv("QUTE_SELECTED_TEXT", "").strip() +elif os.getenv("QUTE_HTML"): + # TODO implement html source-based doi search for current page here + # use python htmlparser and find metatags: e.g. citation_doi, dc.identifier + with open(os.getenv("QUTE_HTML", ""), "r") as source: + parser = DoiTagParser() + parser.feed(source.read()) + text = parser.doi + +with open(os.getenv("QUTE_FIFO", ""), "w") as fifo: + if not text: + fifo.write(f'message-warning "Could not find a valid DOI"') + sys.exit() + + # DOI syntax: https://www.doi.org/doi_handbook/2_Numbering.html#2.2. + # + # Note that this probably matches a subset of possible DOIs, as it + # seems that there’s no practical limitation on neither the length nor + # the contents of the DOI. But IMHO this is a healthy subset. + doi_re = re.compile( + # match possible URI prefix + r"(?P((https?)?://)?doi\.org/)?" + # match actual DOI + r"(?P[a-zA-Z0-9\./\-_]+)" + ) + + match = doi_re.match(text) + + if match is None or match["meat"] is None: + fifo.write( + f"message-warning \"'{text}' is probably not a DOI, or update regexp\"" + ) + + else: + url = get_scihub_url() + doi = match["meat"] + + fifo.write(f"open -t {url}/{doi}")