initial commit
This commit is contained in:
commit
380a50ec33
6 changed files with 430 additions and 0 deletions
252
.gitignore
vendored
Normal file
252
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
/test/
|
||||
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/linux,markdown,ansible,terraform,vim,nushell,python
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,markdown,ansible,terraform,vim,nushell,python
|
||||
|
||||
### Ansible ###
|
||||
*.retry
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
### Python Patch ###
|
||||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
||||
poetry.toml
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
### Terraform ###
|
||||
# Local .terraform directories
|
||||
**/.terraform/*
|
||||
|
||||
# .tfstate files
|
||||
*.tfstate
|
||||
*.tfstate.*
|
||||
|
||||
# Crash log files
|
||||
crash.log
|
||||
crash.*.log
|
||||
|
||||
# Exclude all .tfvars files, which are likely to contain sensitive data, such as
|
||||
# password, private keys, and other secrets. These should not be part of version
|
||||
# control as they are data points which are potentially sensitive and subject
|
||||
# to change depending on the environment.
|
||||
*.tfvars
|
||||
*.tfvars.json
|
||||
|
||||
# Ignore override files as they are usually used to override resources locally and so
|
||||
# are not checked in
|
||||
override.tf
|
||||
override.tf.json
|
||||
*_override.tf
|
||||
*_override.tf.json
|
||||
|
||||
# Include override files you do wish to add to version control using negated pattern
|
||||
# !example_override.tf
|
||||
|
||||
# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
|
||||
# example: *tfplan*
|
||||
|
||||
# Ignore CLI configuration files
|
||||
.terraformrc
|
||||
terraform.rc
|
||||
|
||||
### Vim ###
|
||||
# Swap
|
||||
[._]*.s[a-v][a-z]
|
||||
!*.svg # comment out if you don't need vector files
|
||||
[._]*.sw[a-p]
|
||||
[._]s[a-rt-v][a-z]
|
||||
[._]ss[a-gi-z]
|
||||
[._]sw[a-p]
|
||||
|
||||
# Session
|
||||
Session.vim
|
||||
Sessionx.vim
|
||||
|
||||
# Temporary
|
||||
.netrwhist
|
||||
# Auto-generated tag files
|
||||
tags
|
||||
# Persistent undo
|
||||
[._]*.un~
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/linux,markdown,ansible,terraform,vim,nushell,python
|
||||
42
README.md
Normal file
42
README.md
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# Create a Bookwyrm-ready import list from humble bundle
|
||||
|
||||
Usage: `./grab.nu <link-to-humble-book-bundle-page>`
|
||||
|
||||
For example: `./grab.nu https://www.humblebundle.com/books/data-engineering-science-oreilly-books | save -f data-eng.csv`
|
||||
|
||||
Will download book info in parallel, but often does not find enough info for _all_ books.
|
||||
|
||||
## Internal logic
|
||||
|
||||
- get humble bundle page
|
||||
`http get https://www.humblebundle.com/books/software-architecture-pearson-books`
|
||||
- extract correct element for json data
|
||||
`open out.html | pup "script#webpack-bundle-page-data text{}" | from json`
|
||||
|
||||
- get transposed version
|
||||
`open books.json | get bundleData.tier_item_data | transpose machine_id item | insert human_name {$in.item.human_name} | insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} | insert publisher {$in.item.publishers.0.publisher-name} | reject item`
|
||||
- get details of books into tabled format
|
||||
`open books_transposed.json | where machine_id != "code_org" | insert human_name {$in.item.human_name} | insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} | insert publisher {$in.item.publishers.0.publisher-name} | insert author {$in.item.developers.0.developer-name} | reject item | save books.csv`
|
||||
|
||||
- search openlibrary for the book
|
||||
`http get https://openlibrary.org/search.json?author=Oliver+Goldman&title=Effective+Software+Architecture | save search_result.json`
|
||||
- get isbn from openlibrary (currently first result, primary edition)
|
||||
`open search_result.json | get docs.0.cover_edition_key | http get $"https://openlibrary.org/books/($in).json" | save work_result.json`
|
||||
- fill out other information from edition info if available
|
||||
- publishers
|
||||
- publish_date
|
||||
- languages
|
||||
- authors?
|
||||
- title
|
||||
- subtitle
|
||||
- isbn_13
|
||||
- isbn_10 if exists
|
||||
- olid (Openlibrary ID) = key above
|
||||
|
||||
- save it into csv
|
||||
- title
|
||||
- author_text (First Last, First Last)
|
||||
- remote_id (link, nothing)
|
||||
- openlibrary_key (olid above)
|
||||
- isbn_10
|
||||
- isbn_13
|
||||
96
grab.nu
Executable file
96
grab.nu
Executable file
|
|
@ -0,0 +1,96 @@
|
|||
#!/usr/bin/env nu
|
||||
|
||||
let debugging = false
|
||||
|
||||
def main [url: string]: nothing -> any {
|
||||
get_humble_page $url |
|
||||
grab_bundle_data |
|
||||
extract_books |
|
||||
url_sanitize_author_title |
|
||||
fill_details_from_openlibrary |
|
||||
to_bookwyrm_csv |
|
||||
to csv
|
||||
}
|
||||
|
||||
# TODO: Use for testing w/o hammering bundle page
|
||||
def fake_get_page [file: string]: nothing -> string {
|
||||
open $file
|
||||
}
|
||||
|
||||
def get_humble_page [url: string]: nothing -> string {
|
||||
if $debugging { fake_get_page intermediate/out.html } else { http get $url | into string }
|
||||
}
|
||||
|
||||
def grab_bundle_data []: string -> record {
|
||||
pup "script#webpack-bundle-page-data text{}" | from json
|
||||
}
|
||||
|
||||
def extract_books []: record -> table {
|
||||
get bundleData.tier_item_data |
|
||||
transpose machine_id item |
|
||||
insert human_name {$in.item?.human_name} |
|
||||
insert cover_art {$in.item.resolved_paths.front_page_art_imgix_retina} |
|
||||
insert publisher {$in.item.publishers.0?.publisher-name} |
|
||||
insert author {$in.item.developers.0?.developer-name | default ""} |
|
||||
reject item
|
||||
}
|
||||
|
||||
def url_sanitize_author_title []: table -> table {
|
||||
insert human_name_sanitized {$in.human_name | default "" | str replace --all --regex " " "+"} |
|
||||
insert author_sanitized {$in.author | default "" | str replace --all --regex " " "+"}
|
||||
}
|
||||
|
||||
def fake_get_olid [author: string, title: string]: nothing -> string {
|
||||
"OL52558571M"
|
||||
}
|
||||
def get_olid [author: string, title: string]: nothing -> string {
|
||||
let authorsearch = if $author == "" {""} else {$"author=($author)"}
|
||||
let titlesearch = if $title == "" {""} else {$"title=($title)"}
|
||||
http get $"https://openlibrary.org/search.json?($titlesearch)&($authorsearch)" |
|
||||
(get docs.0?.cover_edition_key? | default "")
|
||||
}
|
||||
|
||||
def fake_get_ol_edition [olid: string]: nothing -> record {
|
||||
open intermediate/work_result.json
|
||||
}
|
||||
def get_ol_edition [olid: string]: nothing -> record {
|
||||
http get $"https://openlibrary.org/books/($olid).json"
|
||||
}
|
||||
|
||||
def fill_details_from_openlibrary []: table -> table {
|
||||
par-each { |row|
|
||||
print -e $"Grabbing OLID for ($row.human_name) by ($row.author)"
|
||||
let olid = if $debugging and $row.author_sanitized != "" {
|
||||
fake_get_olid $row.author_sanitized $row.human_name_sanitized
|
||||
} else {
|
||||
get_olid $row.author_sanitized $row.human_name_sanitized
|
||||
}
|
||||
let result = if $olid != "" {
|
||||
print -e $"Grabbing edition \(($olid)\) info for ($row.human_name)"
|
||||
if $debugging {
|
||||
fake_get_ol_edition $olid
|
||||
} else {
|
||||
get_ol_edition $olid
|
||||
}
|
||||
} else { }
|
||||
$row |
|
||||
insert openlibrary_key $olid |
|
||||
insert isbn_13 $result.isbn_13?.0 |
|
||||
insert isbn_10 $result.isbn_10?.0 |
|
||||
insert publish_date $result.publish_date? |
|
||||
upsert publisher $result.publishers?.0 |
|
||||
upsert title ($result.title? | default $row.human_name?) |
|
||||
insert subtitle ($result.subtitle? | default "") |
|
||||
insert fulltitle ($result.full_title? | default "")
|
||||
}
|
||||
}
|
||||
|
||||
def to_bookwyrm_csv [] {
|
||||
$in |
|
||||
upsert title {|row| if $row.fulltitle? != "" {$row.fulltitle?} else {$"($row.title?) ($row.subtitle?)"} } |
|
||||
select author title openlibrary_key isbn_10 isbn_13
|
||||
}
|
||||
|
||||
def sample_func [write?: bool] {
|
||||
echo "also here"
|
||||
}
|
||||
18
output/maths-and-physics.csv
Normal file
18
output/maths-and-physics.csv
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
author,title,openlibrary_key,isbn_10,isbn_13
|
||||
Deborah J. Rumsey,Statistics All-In-One for Dummies ,OL37984899M,,9781119902560
|
||||
Galen C. Duree Jr.,Optics For Dummies ,,,
|
||||
Mary Jane Sterling,"Pre-Calculus For Dummies, 3rd Edition ",,,
|
||||
Mark Ryan,"Calculus For Dummies, 2nd Edition ",,,
|
||||
Mark Ryan,Geometry Essentials For Dummies ,,,
|
||||
Mary Jane Sterling,Algebra I essentials for dummies,OL27015895M,0470618345,9780470618349
|
||||
Patrick Jones,Calculus: 1001 Practice Problems For Dummies (+ Free Online Practice) ,,,
|
||||
Mike Pauken,Thermodynamics For Dummies ,,,
|
||||
Mary Jane Sterling,Algebra II Essentials For Dummies ,,,
|
||||
Steven Holzner,"Physics I For Dummies, 3rd Edition ",,,
|
||||
Steven Holzner,Physics II for dummies ,OL25370848M,0470538066,9780470538067
|
||||
Steven Holzner,Physics essentials for dummies,OL27169436M,0470618418,9780470618417
|
||||
The Experts at Dummies,Physics I: 501 Practice Problems For Dummies (+ Free Online Practice) ,,,
|
||||
Barry Schoenborn,Math For Real Life For Dummies ,,,
|
||||
The Experts at Dummies,"Physics I Workbook For Dummies with Online Practice, 3rd Edition ",,,
|
||||
Mary Jane Sterling,"Trigonometry For Dummies, 3rd Edition ",,,
|
||||
www.buildon.org,buildOn ,,,
|
||||
|
21
output/software-architecture.csv
Normal file
21
output/software-architecture.csv
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
author,title,openlibrary_key,isbn_10,isbn_13
|
||||
Zimmerman et al,Patterns for API Design ,,,
|
||||
Code.org,Code.org ,,,
|
||||
Vernon / Jaskula,Strategic Monoliths and Microservices Driving Innovation Using Purposeful Architecture,OL34773512M,,9780137355464
|
||||
Wiegers,Software Requirements Essentials Core Practices for Successful Business Analysis,OL48201406M,,9780138190224
|
||||
Emison,Serverless as a Game Changer ,,,
|
||||
Moghe,The Async-First Playbook ,,,
|
||||
Metz,"Practical Object-Oriented Design, 2/e ",,,
|
||||
Susanne Kaiser,Architecture for Flow ,,,
|
||||
Hofer / Schwentner,"Domain Storytelling A Collaborative, Visual, and Agile Way to Build Domain-Driven Software",OL34795902M,,9780137458912
|
||||
Higginbotham,Principles of Web API Design Delivering Value with APIs and Microservices,OL34773513M,,9780137355631
|
||||
Sites,Understanding Software Dynamics ,,,
|
||||
Oliver Goldman,Effective Software Architecture Building Better Software Faster,OL52558571M,,9780138249311
|
||||
Lowy,Righting Software ,OL29450091M,,9780136524038
|
||||
Charpentier,Functional and Concurrent Programming ,,,
|
||||
Farley,Modern Software Engineering Doing What Really Works to Build Better Software Faster,OL34779880M,,9780137314911
|
||||
Erder et al,Continuous Architecture in Practice ,,,
|
||||
Srinath Perera,Software Architecture and Decision-Making ,,,
|
||||
Bass et al,"Software Architecture in Practice, 4/e ",,,
|
||||
Vlad Khononov,Balancing Coupling in Software Design ,,,
|
||||
Vernon,Domain-Driven Design Distilled ,OL26836801M,0134434420,9780134434421
|
||||
|
1
output/software-architecture.nuon
Normal file
1
output/software-architecture.nuon
Normal file
|
|
@ -0,0 +1 @@
|
|||
[[author, title, openlibrary_key, "isbn_10", "isbn_13"]; ["Zimmerman et al", "Patterns for API Design ", "", null, null], ["Srinath Perera", "Software Architecture and Decision-Making ", "", null, null], ["Bass et al", "Software Architecture in Practice, 4/e ", "", null, null], ["Vlad Khononov", "Balancing Coupling in Software Design ", "", null, null], [Vernon, "Domain-Driven Design Distilled ", "OL26836801M", "0134434420", "9780134434421"], [Moghe, "The Async-First Playbook ", "", null, null], ["Code.org", "Code.org ", "", null, null], ["Susanne Kaiser", "Architecture for Flow ", "", null, null], ["Hofer / Schwentner", "Domain Storytelling A Collaborative, Visual, and Agile Way to Build Domain-Driven Software", "OL34795902M", null, "9780137458912"], [Charpentier, "Functional and Concurrent Programming ", "", null, null], [Emison, "Serverless as a Game Changer ", "", null, null], ["Erder et al", "Continuous Architecture in Practice ", "", null, null], [Metz, "Practical Object-Oriented Design, 2/e ", "", null, null], ["Vernon / Jaskula", "Strategic Monoliths and Microservices Driving Innovation Using Purposeful Architecture", "OL34773512M", null, "9780137355464"], [Wiegers, "Software Requirements Essentials Core Practices for Successful Business Analysis", "OL48201406M", null, "9780138190224"], [Farley, "Modern Software Engineering Doing What Really Works to Build Better Software Faster", "OL34779880M", null, "9780137314911"], [Sites, "Understanding Software Dynamics ", "", null, null], ["Oliver Goldman", "Effective Software Architecture Building Better Software Faster", "OL52558571M", null, "9780138249311"], [Lowy, "Righting Software ", "OL29450091M", null, "9780136524038"], [Higginbotham, "Principles of Web API Design Delivering Value with APIs and Microservices", "OL34773513M", null, "9780137355631"]]
|
||||
Loading…
Add table
Add a link
Reference in a new issue