#!/usr/bin/env nu let debugging = false def main [url: string]: nothing -> any { get_humble_page $url | grab_bundle_data | extract_books | url_sanitize_author_title | fill_details_from_openlibrary | to_bookwyrm_csv | to csv } # TODO: Use for testing w/o hammering bundle page def fake_get_page [file: string]: nothing -> string { open $file } def get_humble_page [url: string]: nothing -> string { if $debugging { fake_get_page intermediate/out.html } else { http get $url | into string } } def grab_bundle_data []: string -> record { pup "script#webpack-bundle-page-data text{}" | from json } def extract_books []: record -> table { get bundleData.tier_item_data | transpose machine_id item | insert human_name {$in.item?.human_name} | insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} | insert publisher {$in.item.publishers.0?.publisher-name} | insert author {$in.item.developers.0?.developer-name | default ""} | reject item } def url_sanitize_author_title []: table -> table { insert human_name_sanitized {$in.human_name | default "" | str replace --all --regex " " "+"} | insert author_sanitized {$in.author | default "" | str replace --all --regex " " "+"} } def fake_get_olid [author: string, title: string]: nothing -> string { "OL52558571M" } def get_olid [author: string, title: string]: nothing -> string { let authorsearch = if $author == "" {""} else {$"author=($author)"} let titlesearch = if $title == "" {""} else {$"title=($title)"} http get $"https://openlibrary.org/search.json?($titlesearch)&($authorsearch)" | (get docs.0?.cover_edition_key? | default "") } def fake_get_ol_edition [olid: string]: nothing -> record { open intermediate/work_result.json } def get_ol_edition [olid: string]: nothing -> record { http get $"https://openlibrary.org/books/($olid).json" } def fill_details_from_openlibrary []: table -> table { par-each { |row| print -e $"Grabbing OLID for ($row.human_name) by ($row.author)" let olid = if $debugging and $row.author_sanitized != "" { fake_get_olid $row.author_sanitized $row.human_name_sanitized } else { get_olid $row.author_sanitized $row.human_name_sanitized } let result = if $olid != "" { print -e $"Grabbing edition \(($olid)\) info for ($row.human_name)" if $debugging { fake_get_ol_edition $olid } else { get_ol_edition $olid } } else { } $row | insert openlibrary_key $olid | insert isbn_13 $result.isbn_13?.0 | insert isbn_10 $result.isbn_10?.0 | insert publish_date $result.publish_date? | upsert publisher $result.publishers?.0 | upsert title ($result.title? | default $row.human_name?) | insert subtitle ($result.subtitle? | default "") | insert fulltitle ($result.full_title? | default "") } } def to_bookwyrm_csv [] { $in | upsert title {|row| if $row.fulltitle? != "" {$row.fulltitle?} else {$"($row.title?) ($row.subtitle?)"} } | select author title openlibrary_key isbn_10 isbn_13 } def sample_func [write?: bool] { echo "also here" }