humblist/grab.nu

#!/usr/bin/env nu

let debugging = false

def main [url: string]: nothing -> any {
  get_humble_page $url |
  grab_bundle_data |
  extract_books |
  url_sanitize_author_title |
  fill_details_from_openlibrary |
  to_bookwyrm_csv |
    to csv
}

# TODO: Use for testing w/o hammering bundle page
def fake_get_page [file: string]: nothing -> string {
  open $file
}

def get_humble_page [url: string]: nothing -> string {
  if $debugging { fake_get_page intermediate/out.html } else { http get $url | into string }
}

def grab_bundle_data []: string -> record {
  pup "script#webpack-bundle-page-data text{}" | from json
}

def extract_books []: record -> table {
  get bundleData.tier_item_data |
  transpose machine_id item |
  insert human_name {$in.item?.human_name} |
  insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} |
  insert publisher {$in.item.publishers.0?.publisher-name} |
  insert author {$in.item.developers.0?.developer-name | default ""} |
  reject item
}

def url_sanitize_author_title []: table -> table {
  insert human_name_sanitized {$in.human_name | default "" | str replace --all --regex " " "+"} |
  insert author_sanitized {$in.author | default "" | str replace --all --regex " " "+"}
}

def fake_get_olid [author: string, title: string]: nothing -> string {
    "OL52558571M"
}
def get_olid [author: string, title: string]: nothing -> string {
  let authorsearch = if $author == "" {""} else {$"author=($author)"}
  let titlesearch = if $title == "" {""} else {$"title=($title)"}
  http get $"https://openlibrary.org/search.json?($titlesearch)&($authorsearch)" |
  (get docs.0?.cover_edition_key? | default "")
}

def fake_get_ol_edition [olid: string]: nothing -> record {
  open intermediate/work_result.json
}
def get_ol_edition [olid: string]: nothing -> record {
  http get $"https://openlibrary.org/books/($olid).json"
}

def fill_details_from_openlibrary []: table -> table {
  par-each { |row|
    print -e $"Grabbing OLID for ($row.human_name) by ($row.author)"
    let olid = if $debugging and $row.author_sanitized != "" {
            fake_get_olid $row.author_sanitized $row.human_name_sanitized
        } else {
            get_olid $row.author_sanitized $row.human_name_sanitized
        }
    let result = if $olid != "" {
        print -e $"Grabbing edition \(($olid)\) info for ($row.human_name)"
        if $debugging {
            fake_get_ol_edition $olid
        } else {
            get_ol_edition $olid
        }
    } else { }
    $row |
        insert openlibrary_key $olid |
        insert isbn_13 $result.isbn_13?.0 |
        insert isbn_10 $result.isbn_10?.0 |
        insert publish_date $result.publish_date? |
        upsert publisher $result.publishers?.0 |
        upsert title ($result.title? | default $row.human_name?) |
        insert subtitle ($result.subtitle? | default "") |
        insert fulltitle ($result.full_title? | default "")
  }
}

def to_bookwyrm_csv [] {
    $in |
        upsert title {|row| if $row.fulltitle? != "" {$row.fulltitle?} else {$"($row.title?) ($row.subtitle?)"} } |
        select author title openlibrary_key isbn_10 isbn_13
}

def sample_func [write?: bool] {
  echo "also here"
}