diff --git a/popcorn.py b/popcorn.py index 29e9192..e05abd6 100644 --- a/popcorn.py +++ b/popcorn.py @@ -5,7 +5,9 @@ app = marimo.App(width="medium") with app.setup: # Initialization code that runs beimpofore all other cells + import json import re + from pathlib import Path import lets_plot as lp import marimo as mo @@ -58,15 +60,31 @@ def _(sizes_df): @app.cell -def _(sizes_df): +def _(): + df = ( + pl.scan_ndjson("data/daily/*", include_file_paths="file") + .head(200) # FIXME: take out after debug + .with_columns( + pl.col("file") + .str.replace(r"data/daily/(\d{4}-\d{2}-\d{2}).json", "${1}") + .str.to_date() + .alias("date") + ) + .select("date", pl.col("Packages").struct.unnest()) + .fill_null(0) + .unpivot(index="date", variable_name="package", value_name="downloads") + .collect() + ) + df + return + + ( lp.ggplot(sizes_df, lp.aes(x="date", y="size")) + lp.geom_point() + lp.geom_smooth(method="lowess") + lp.labs( title="", - subtitle="", - caption="", ) ) return @@ -116,5 +134,18 @@ def _(sizes_df): return +# further ideas: +# - which kernels have been DL when? (simplified for semver) +# - when did specific kernels enter the repos? +# +# - which arches are/were most prevalent over time? +# - have the arches been mostly even relative to each other? +# +# - what does unique install mean? +# +# - which Packages had the most unique versions, least versions +# - which pkg had the most download of a single version? +# - for which pkg were the version dls the most spread out? + if __name__ == "__main__": app.run()