qutebrowser: Add doi2scihub script
Added script which takes you to the corresponding sci-hub entry for any DOI. DOIs can be passed in three ways: - via hinted link (shortcut `;p` to start hinting) - via selected text (select text then invoke `send-to-scihub` command with `"p`) - or from meta tags in current page (invoke `send-to-scihub` command with `"p` when on article page) It will grab the newest sci-hub link and attempt to bring you the corresponding pdf file.
This commit is contained in:
parent
b2ee02e474
commit
7e2da571e7
4 changed files with 110 additions and 1 deletions
|
@ -16,6 +16,8 @@ c.aliases["send-to-archive"] = "open https://web.archive.org/web/{url}"
|
||||||
|
|
||||||
# save current page to pdf file
|
# save current page to pdf file
|
||||||
c.aliases["save_to_pdf"] = "spawn --userscript pagetopdf.sh"
|
c.aliases["save_to_pdf"] = "spawn --userscript pagetopdf.sh"
|
||||||
|
# open sci-hub pdf for doi
|
||||||
|
c.aliases["send-to-scihub"] = "spawn --userscript doi2scihub"
|
||||||
|
|
||||||
# translate current page / selection with google translate
|
# translate current page / selection with google translate
|
||||||
c.aliases["translate-page-google"] = "spawn --userscript translate_google.sh"
|
c.aliases["translate-page-google"] = "spawn --userscript translate_google.sh"
|
||||||
|
|
|
@ -77,6 +77,8 @@ config.bind('"T', "translate-selection-google", mode="normal")
|
||||||
config.bind('"q', "show-qr")
|
config.bind('"q', "show-qr")
|
||||||
|
|
||||||
config.bind(lleader + "r", "spawn --userscript readability")
|
config.bind(lleader + "r", "spawn --userscript readability")
|
||||||
|
config.bind('"p', "send-to-scihub", mode="normal") # view current page doi on scihub
|
||||||
|
config.bind(";p", "hint links run send-to-scihub") # view linked doi on scihub
|
||||||
|
|
||||||
# set stylesheets for the browser to use
|
# set stylesheets for the browser to use
|
||||||
config.bind(
|
config.bind(
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from qutebrowser.api import interceptor
|
from qutebrowser.api import interceptor
|
||||||
|
|
||||||
c.url.searchengines = {
|
c.url.searchengines = {
|
||||||
"#sci": "https://sci-hub.do/{}",
|
"sci": "https://sci-hub.ru/{}",
|
||||||
"DEFAULT": "https://search.martyoeh.me/?q={}",
|
"DEFAULT": "https://search.martyoeh.me/?q={}",
|
||||||
"al": "https://wiki.archlinux.org/index.php/{}",
|
"al": "https://wiki.archlinux.org/index.php/{}",
|
||||||
"alt": "https://alternativeto.net/software/{}/?license=opensource",
|
"alt": "https://alternativeto.net/software/{}/?license=opensource",
|
||||||
|
|
105
qutebrowser/.local/share/qutebrowser/userscripts/doi2scihub
Executable file
105
qutebrowser/.local/share/qutebrowser/userscripts/doi2scihub
Executable file
|
@ -0,0 +1,105 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Goes to the sci-hub page for the current article, based on DOI.
|
||||||
|
|
||||||
|
Can be invoked with DOI on a page selected,
|
||||||
|
through the hinting mode when selecting a DOI link
|
||||||
|
or on a publisher page (any page where doi meta-tags are set) -
|
||||||
|
works on ScienceDirect, Taylor&Francis, Springer, etc.
|
||||||
|
|
||||||
|
Updates its sci-hub link based on the one listed on sci-hub wiki page.
|
||||||
|
|
||||||
|
Based on the work in
|
||||||
|
https://github.com/cadadr/configuration/blob/4b6a241d04d113f322b960890a0d0a0ab783a7b3/dotfiles/qutebrowser/userscripts/doi
|
||||||
|
with much gratitude.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import html.parser
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
mode = os.getenv("QUTE_MODE")
|
||||||
|
|
||||||
|
text = None
|
||||||
|
|
||||||
|
|
||||||
|
class DoiTagParser(html.parser.HTMLParser):
|
||||||
|
doi = None
|
||||||
|
|
||||||
|
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||||
|
if self.doi == None and tag == "meta":
|
||||||
|
if (
|
||||||
|
("name", "citation_doi") in attrs
|
||||||
|
or ("name", "dc.identifier") in attrs
|
||||||
|
or ("scheme", "doi") in attrs
|
||||||
|
):
|
||||||
|
for att in attrs:
|
||||||
|
if att[0] == "content":
|
||||||
|
self.doi = att[1]
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
class SciHubLinkParser(html.parser.HTMLParser):
|
||||||
|
current = None
|
||||||
|
link_patt = re.compile(r"^(?P<url>https?://sci-hub\..+)/about$")
|
||||||
|
|
||||||
|
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||||
|
if self.current == None and tag == "a":
|
||||||
|
for att in attrs:
|
||||||
|
if att[0] == "href" and self.link_patt.match(att[1] or ""):
|
||||||
|
match = self.link_patt.match(att[1] or "")
|
||||||
|
self.current = match["url"] if match and match["url"] else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_scihub_url(wiki_page: str = "https://wikiless.org/wiki/Sci-Hub"):
|
||||||
|
resp = requests.get(wiki_page)
|
||||||
|
parser = SciHubLinkParser()
|
||||||
|
parser.feed(resp.text)
|
||||||
|
return parser.current or "https://sci-hub.ru"
|
||||||
|
|
||||||
|
|
||||||
|
if mode == "hints":
|
||||||
|
text = os.getenv("QUTE_URL", "").strip()
|
||||||
|
elif mode == "command" and os.getenv("QUTE_SELECTED_TEXT"):
|
||||||
|
text = os.getenv("QUTE_SELECTED_TEXT", "").strip()
|
||||||
|
elif os.getenv("QUTE_HTML"):
|
||||||
|
# TODO implement html source-based doi search for current page here
|
||||||
|
# use python htmlparser and find metatags: e.g. citation_doi, dc.identifier
|
||||||
|
with open(os.getenv("QUTE_HTML", ""), "r") as source:
|
||||||
|
parser = DoiTagParser()
|
||||||
|
parser.feed(source.read())
|
||||||
|
text = parser.doi
|
||||||
|
|
||||||
|
with open(os.getenv("QUTE_FIFO", ""), "w") as fifo:
|
||||||
|
if not text:
|
||||||
|
fifo.write(f'message-warning "Could not find a valid DOI"')
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
# DOI syntax: https://www.doi.org/doi_handbook/2_Numbering.html#2.2.
|
||||||
|
#
|
||||||
|
# Note that this probably matches a subset of possible DOIs, as it
|
||||||
|
# seems that there’s no practical limitation on neither the length nor
|
||||||
|
# the contents of the DOI. But IMHO this is a healthy subset.
|
||||||
|
doi_re = re.compile(
|
||||||
|
# match possible URI prefix
|
||||||
|
r"(?P<blah>((https?)?://)?doi\.org/)?"
|
||||||
|
# match actual DOI
|
||||||
|
r"(?P<meat>[a-zA-Z0-9\./\-_]+)"
|
||||||
|
)
|
||||||
|
|
||||||
|
match = doi_re.match(text)
|
||||||
|
|
||||||
|
if match is None or match["meat"] is None:
|
||||||
|
fifo.write(
|
||||||
|
f"message-warning \"'{text}' is probably not a DOI, or update regexp\""
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
url = get_scihub_url()
|
||||||
|
doi = match["meat"]
|
||||||
|
|
||||||
|
fifo.write(f"open -t {url}/{doi}")
|
Loading…
Reference in a new issue