Add basic size analysis

This commit is contained in:
Marty Oehme 2025-09-27 11:27:29 +02:00
parent ae1895e47f
commit 1701605939
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -16,3 +16,64 @@ def _():
mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis""")
return
@app.cell
def _():
def parse_size(size_str):
try:
return float(re.search(r"(\d+.?\d+) kB", size_str).group(1))
except AttributeError:
return None
sizes_df = (
pl.read_csv("data/file_sizes.csv")
.with_columns(
pl.col("name")
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
.str.to_date()
.alias("date"),
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num")
)
.select(["date", "size_num", "size", "modified"])
)
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null())
sizes_df = sizes_df.filter(pl.col("size").is_not_null())
return
@app.cell
def _(sizes_df_null):
sizes_df_null.select(["date", "size"]).style.tab_header(
title="Missing Days",
subtitle="Days with 0B size due to missing on the popcorn server.",
)
@app.cell
def _(sizes_df):
(
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
+ lp.geom_point()
+ lp.labs(
title="File sizes",
subtitle="Size of daily popcorn files over time",
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
)
)
@app.cell
def _(sizes_df):
(
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
+ lp.geom_dotplot()
+ lp.labs(
title="",
subtitle="",
caption="",
)
)
if __name__ == "__main__":
app.run()