From 17016059393a6e6a8d4b9bcb94978dcc61807ff9 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Sat, 27 Sep 2025 11:27:29 +0200 Subject: [PATCH] Add basic size analysis --- popcorn.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/popcorn.py b/popcorn.py index 0f1a8da..f6add6c 100644 --- a/popcorn.py +++ b/popcorn.py @@ -16,3 +16,64 @@ def _(): mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis""") return + +@app.cell +def _(): + def parse_size(size_str): + try: + return float(re.search(r"(\d+.?\d+) kB", size_str).group(1)) + except AttributeError: + return None + + sizes_df = ( + pl.read_csv("data/file_sizes.csv") + .with_columns( + pl.col("name") + .str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}") + .str.to_date() + .alias("date"), + pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num") + ) + .select(["date", "size_num", "size", "modified"]) + ) + sizes_df_null = sizes_df.filter(pl.col("size_num").is_null()) + sizes_df = sizes_df.filter(pl.col("size").is_not_null()) + return + + +@app.cell +def _(sizes_df_null): + sizes_df_null.select(["date", "size"]).style.tab_header( + title="Missing Days", + subtitle="Days with 0B size due to missing on the popcorn server.", + ) + + +@app.cell +def _(sizes_df): + ( + lp.ggplot(sizes_df, lp.aes(x="date", y="size")) + + lp.geom_point() + + lp.labs( + title="File sizes", + subtitle="Size of daily popcorn files over time", + caption="Raw json file size, without any formatting, removal of markers, characters or newlines.", + ) + ) + + +@app.cell +def _(sizes_df): + ( + lp.ggplot(sizes_df, lp.aes(x="date", y="size")) + + lp.geom_dotplot() + + lp.labs( + title="", + subtitle="", + caption="", + ) + ) + + +if __name__ == "__main__": + app.run()