Parse JSON Package downloads
This commit is contained in:
parent
97e4d256bb
commit
cbda8dca55
1 changed files with 34 additions and 3 deletions
37
popcorn.py
37
popcorn.py
|
|
@ -5,7 +5,9 @@ app = marimo.App(width="medium")
|
||||||
|
|
||||||
with app.setup:
|
with app.setup:
|
||||||
# Initialization code that runs beimpofore all other cells
|
# Initialization code that runs beimpofore all other cells
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import lets_plot as lp
|
import lets_plot as lp
|
||||||
import marimo as mo
|
import marimo as mo
|
||||||
|
|
@ -58,15 +60,31 @@ def _(sizes_df):
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(sizes_df):
|
def _():
|
||||||
|
df = (
|
||||||
|
pl.scan_ndjson("data/daily/*", include_file_paths="file")
|
||||||
|
.head(200) # FIXME: take out after debug
|
||||||
|
.with_columns(
|
||||||
|
pl.col("file")
|
||||||
|
.str.replace(r"data/daily/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
||||||
|
.str.to_date()
|
||||||
|
.alias("date")
|
||||||
|
)
|
||||||
|
.select("date", pl.col("Packages").struct.unnest())
|
||||||
|
.fill_null(0)
|
||||||
|
.unpivot(index="date", variable_name="package", value_name="downloads")
|
||||||
|
.collect()
|
||||||
|
)
|
||||||
|
df
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
(
|
(
|
||||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||||
+ lp.geom_point()
|
+ lp.geom_point()
|
||||||
+ lp.geom_smooth(method="lowess")
|
+ lp.geom_smooth(method="lowess")
|
||||||
+ lp.labs(
|
+ lp.labs(
|
||||||
title="",
|
title="",
|
||||||
subtitle="",
|
|
||||||
caption="",
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
@ -116,5 +134,18 @@ def _(sizes_df):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
# further ideas:
|
||||||
|
# - which kernels have been DL when? (simplified for semver)
|
||||||
|
# - when did specific kernels enter the repos?
|
||||||
|
#
|
||||||
|
# - which arches are/were most prevalent over time?
|
||||||
|
# - have the arches been mostly even relative to each other?
|
||||||
|
#
|
||||||
|
# - what does unique install mean?
|
||||||
|
#
|
||||||
|
# - which Packages had the most unique versions, least versions
|
||||||
|
# - which pkg had the most download of a single version?
|
||||||
|
# - for which pkg were the version dls the most spread out?
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run()
|
app.run()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue