initial commit
This commit is contained in:
commit
380a50ec33
6 changed files with 430 additions and 0 deletions
96
grab.nu
Executable file
96
grab.nu
Executable file
|
|
@ -0,0 +1,96 @@
|
|||
#!/usr/bin/env nu
|
||||
|
||||
let debugging = false
|
||||
|
||||
def main [url: string]: nothing -> any {
|
||||
get_humble_page $url |
|
||||
grab_bundle_data |
|
||||
extract_books |
|
||||
url_sanitize_author_title |
|
||||
fill_details_from_openlibrary |
|
||||
to_bookwyrm_csv |
|
||||
to csv
|
||||
}
|
||||
|
||||
# TODO: Use for testing w/o hammering bundle page
|
||||
def fake_get_page [file: string]: nothing -> string {
|
||||
open $file
|
||||
}
|
||||
|
||||
def get_humble_page [url: string]: nothing -> string {
|
||||
if $debugging { fake_get_page intermediate/out.html } else { http get $url | into string }
|
||||
}
|
||||
|
||||
def grab_bundle_data []: string -> record {
|
||||
pup "script#webpack-bundle-page-data text{}" | from json
|
||||
}
|
||||
|
||||
def extract_books []: record -> table {
|
||||
get bundleData.tier_item_data |
|
||||
transpose machine_id item |
|
||||
insert human_name {$in.item?.human_name} |
|
||||
insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} |
|
||||
insert publisher {$in.item.publishers.0?.publisher-name} |
|
||||
insert author {$in.item.developers.0?.developer-name | default ""} |
|
||||
reject item
|
||||
}
|
||||
|
||||
def url_sanitize_author_title []: table -> table {
|
||||
insert human_name_sanitized {$in.human_name | default "" | str replace --all --regex " " "+"} |
|
||||
insert author_sanitized {$in.author | default "" | str replace --all --regex " " "+"}
|
||||
}
|
||||
|
||||
def fake_get_olid [author: string, title: string]: nothing -> string {
|
||||
"OL52558571M"
|
||||
}
|
||||
def get_olid [author: string, title: string]: nothing -> string {
|
||||
let authorsearch = if $author == "" {""} else {$"author=($author)"}
|
||||
let titlesearch = if $title == "" {""} else {$"title=($title)"}
|
||||
http get $"https://openlibrary.org/search.json?($titlesearch)&($authorsearch)" |
|
||||
(get docs.0?.cover_edition_key? | default "")
|
||||
}
|
||||
|
||||
def fake_get_ol_edition [olid: string]: nothing -> record {
|
||||
open intermediate/work_result.json
|
||||
}
|
||||
def get_ol_edition [olid: string]: nothing -> record {
|
||||
http get $"https://openlibrary.org/books/($olid).json"
|
||||
}
|
||||
|
||||
def fill_details_from_openlibrary []: table -> table {
|
||||
par-each { |row|
|
||||
print -e $"Grabbing OLID for ($row.human_name) by ($row.author)"
|
||||
let olid = if $debugging and $row.author_sanitized != "" {
|
||||
fake_get_olid $row.author_sanitized $row.human_name_sanitized
|
||||
} else {
|
||||
get_olid $row.author_sanitized $row.human_name_sanitized
|
||||
}
|
||||
let result = if $olid != "" {
|
||||
print -e $"Grabbing edition \(($olid)\) info for ($row.human_name)"
|
||||
if $debugging {
|
||||
fake_get_ol_edition $olid
|
||||
} else {
|
||||
get_ol_edition $olid
|
||||
}
|
||||
} else { }
|
||||
$row |
|
||||
insert openlibrary_key $olid |
|
||||
insert isbn_13 $result.isbn_13?.0 |
|
||||
insert isbn_10 $result.isbn_10?.0 |
|
||||
insert publish_date $result.publish_date? |
|
||||
upsert publisher $result.publishers?.0 |
|
||||
upsert title ($result.title? | default $row.human_name?) |
|
||||
insert subtitle ($result.subtitle? | default "") |
|
||||
insert fulltitle ($result.full_title? | default "")
|
||||
}
|
||||
}
|
||||
|
||||
def to_bookwyrm_csv [] {
|
||||
$in |
|
||||
upsert title {|row| if $row.fulltitle? != "" {$row.fulltitle?} else {$"($row.title?) ($row.subtitle?)"} } |
|
||||
select author title openlibrary_key isbn_10 isbn_13
|
||||
}
|
||||
|
||||
def sample_func [write?: bool] {
|
||||
echo "also here"
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue