humblist/grab.nu
2025-11-21 17:16:20 +01:00

96 lines
3.1 KiB
Text
Executable file

#!/usr/bin/env nu
let debugging = false
def main [url: string]: nothing -> any {
get_humble_page $url |
grab_bundle_data |
extract_books |
url_sanitize_author_title |
fill_details_from_openlibrary |
to_bookwyrm_csv |
to csv
}
# TODO: Use for testing w/o hammering bundle page
def fake_get_page [file: string]: nothing -> string {
open $file
}
def get_humble_page [url: string]: nothing -> string {
if $debugging { fake_get_page intermediate/out.html } else { http get $url | into string }
}
def grab_bundle_data []: string -> record {
pup "script#webpack-bundle-page-data text{}" | from json
}
def extract_books []: record -> table {
get bundleData.tier_item_data |
transpose machine_id item |
insert human_name {$in.item?.human_name} |
insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} |
insert publisher {$in.item.publishers.0?.publisher-name} |
insert author {$in.item.developers.0?.developer-name | default ""} |
reject item
}
def url_sanitize_author_title []: table -> table {
insert human_name_sanitized {$in.human_name | default "" | str replace --all --regex " " "+"} |
insert author_sanitized {$in.author | default "" | str replace --all --regex " " "+"}
}
def fake_get_olid [author: string, title: string]: nothing -> string {
"OL52558571M"
}
def get_olid [author: string, title: string]: nothing -> string {
let authorsearch = if $author == "" {""} else {$"author=($author)"}
let titlesearch = if $title == "" {""} else {$"title=($title)"}
http get $"https://openlibrary.org/search.json?($titlesearch)&($authorsearch)" |
(get docs.0?.cover_edition_key? | default "")
}
def fake_get_ol_edition [olid: string]: nothing -> record {
open intermediate/work_result.json
}
def get_ol_edition [olid: string]: nothing -> record {
http get $"https://openlibrary.org/books/($olid).json"
}
def fill_details_from_openlibrary []: table -> table {
par-each { |row|
print -e $"Grabbing OLID for ($row.human_name) by ($row.author)"
let olid = if $debugging and $row.author_sanitized != "" {
fake_get_olid $row.author_sanitized $row.human_name_sanitized
} else {
get_olid $row.author_sanitized $row.human_name_sanitized
}
let result = if $olid != "" {
print -e $"Grabbing edition \(($olid)\) info for ($row.human_name)"
if $debugging {
fake_get_ol_edition $olid
} else {
get_ol_edition $olid
}
} else { }
$row |
insert openlibrary_key $olid |
insert isbn_13 $result.isbn_13?.0 |
insert isbn_10 $result.isbn_10?.0 |
insert publish_date $result.publish_date? |
upsert publisher $result.publishers?.0 |
upsert title ($result.title? | default $row.human_name?) |
insert subtitle ($result.subtitle? | default "") |
insert fulltitle ($result.full_title? | default "")
}
}
def to_bookwyrm_csv [] {
$in |
upsert title {|row| if $row.fulltitle? != "" {$row.fulltitle?} else {$"($row.title?) ($row.subtitle?)"} } |
select author title openlibrary_key isbn_10 isbn_13
}
def sample_func [write?: bool] {
echo "also here"
}