From cbda8dca5527028fe86a913d9091694bd49f29ba Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Sat, 27 Sep 2025 11:34:52 +0200 Subject: [PATCH] Parse JSON Package downloads --- popcorn.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/popcorn.py b/popcorn.py index 29e9192..e05abd6 100644 --- a/popcorn.py +++ b/popcorn.py @@ -5,7 +5,9 @@ app = marimo.App(width="medium") with app.setup: # Initialization code that runs beimpofore all other cells + import json import re + from pathlib import Path import lets_plot as lp import marimo as mo @@ -58,15 +60,31 @@ def _(sizes_df): @app.cell -def _(sizes_df): +def _(): + df = ( + pl.scan_ndjson("data/daily/*", include_file_paths="file") + .head(200) # FIXME: take out after debug + .with_columns( + pl.col("file") + .str.replace(r"data/daily/(\d{4}-\d{2}-\d{2}).json", "${1}") + .str.to_date() + .alias("date") + ) + .select("date", pl.col("Packages").struct.unnest()) + .fill_null(0) + .unpivot(index="date", variable_name="package", value_name="downloads") + .collect() + ) + df + return + + ( lp.ggplot(sizes_df, lp.aes(x="date", y="size")) + lp.geom_point() + lp.geom_smooth(method="lowess") + lp.labs( title="", - subtitle="", - caption="", ) ) return @@ -116,5 +134,18 @@ def _(sizes_df): return +# further ideas: +# - which kernels have been DL when? (simplified for semver) +# - when did specific kernels enter the repos? +# +# - which arches are/were most prevalent over time? +# - have the arches been mostly even relative to each other? +# +# - what does unique install mean? +# +# - which Packages had the most unique versions, least versions +# - which pkg had the most download of a single version? +# - for which pkg were the version dls the most spread out? + if __name__ == "__main__": app.run()