96 lines
3.1 KiB
Text
Executable file
96 lines
3.1 KiB
Text
Executable file
#!/usr/bin/env nu
|
|
|
|
let debugging = false
|
|
|
|
def main [url: string]: nothing -> any {
|
|
get_humble_page $url |
|
|
grab_bundle_data |
|
|
extract_books |
|
|
url_sanitize_author_title |
|
|
fill_details_from_openlibrary |
|
|
to_bookwyrm_csv |
|
|
to csv
|
|
}
|
|
|
|
# TODO: Use for testing w/o hammering bundle page
|
|
def fake_get_page [file: string]: nothing -> string {
|
|
open $file
|
|
}
|
|
|
|
def get_humble_page [url: string]: nothing -> string {
|
|
if $debugging { fake_get_page intermediate/out.html } else { http get $url | into string }
|
|
}
|
|
|
|
def grab_bundle_data []: string -> record {
|
|
pup "script#webpack-bundle-page-data text{}" | from json
|
|
}
|
|
|
|
def extract_books []: record -> table {
|
|
get bundleData.tier_item_data |
|
|
transpose machine_id item |
|
|
insert human_name {$in.item?.human_name} |
|
|
insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} |
|
|
insert publisher {$in.item.publishers.0?.publisher-name} |
|
|
insert author {$in.item.developers.0?.developer-name | default ""} |
|
|
reject item
|
|
}
|
|
|
|
def url_sanitize_author_title []: table -> table {
|
|
insert human_name_sanitized {$in.human_name | default "" | str replace --all --regex " " "+"} |
|
|
insert author_sanitized {$in.author | default "" | str replace --all --regex " " "+"}
|
|
}
|
|
|
|
def fake_get_olid [author: string, title: string]: nothing -> string {
|
|
"OL52558571M"
|
|
}
|
|
def get_olid [author: string, title: string]: nothing -> string {
|
|
let authorsearch = if $author == "" {""} else {$"author=($author)"}
|
|
let titlesearch = if $title == "" {""} else {$"title=($title)"}
|
|
http get $"https://openlibrary.org/search.json?($titlesearch)&($authorsearch)" |
|
|
(get docs.0?.cover_edition_key? | default "")
|
|
}
|
|
|
|
def fake_get_ol_edition [olid: string]: nothing -> record {
|
|
open intermediate/work_result.json
|
|
}
|
|
def get_ol_edition [olid: string]: nothing -> record {
|
|
http get $"https://openlibrary.org/books/($olid).json"
|
|
}
|
|
|
|
def fill_details_from_openlibrary []: table -> table {
|
|
par-each { |row|
|
|
print -e $"Grabbing OLID for ($row.human_name) by ($row.author)"
|
|
let olid = if $debugging and $row.author_sanitized != "" {
|
|
fake_get_olid $row.author_sanitized $row.human_name_sanitized
|
|
} else {
|
|
get_olid $row.author_sanitized $row.human_name_sanitized
|
|
}
|
|
let result = if $olid != "" {
|
|
print -e $"Grabbing edition \(($olid)\) info for ($row.human_name)"
|
|
if $debugging {
|
|
fake_get_ol_edition $olid
|
|
} else {
|
|
get_ol_edition $olid
|
|
}
|
|
} else { }
|
|
$row |
|
|
insert openlibrary_key $olid |
|
|
insert isbn_13 $result.isbn_13?.0 |
|
|
insert isbn_10 $result.isbn_10?.0 |
|
|
insert publish_date $result.publish_date? |
|
|
upsert publisher $result.publishers?.0 |
|
|
upsert title ($result.title? | default $row.human_name?) |
|
|
insert subtitle ($result.subtitle? | default "") |
|
|
insert fulltitle ($result.full_title? | default "")
|
|
}
|
|
}
|
|
|
|
def to_bookwyrm_csv [] {
|
|
$in |
|
|
upsert title {|row| if $row.fulltitle? != "" {$row.fulltitle?} else {$"($row.title?) ($row.subtitle?)"} } |
|
|
select author title openlibrary_key isbn_10 isbn_13
|
|
}
|
|
|
|
def sample_func [write?: bool] {
|
|
echo "also here"
|
|
}
|