analysis-voidlinux-popcorn/popcorn.py

79 lines
1.9 KiB
Python

import marimo
__generated_with = "0.16.2"
app = marimo.App(width="medium")
with app.setup:
# Initialization code that runs beimpofore all other cells
import lets_plot as lp
import marimo as mo
import polars as pl
import re
@app.cell(hide_code=True)
def _():
mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis""")
return
@app.cell
def _():
def parse_size(size_str):
try:
return float(re.search(r"(\d+.?\d+) kB", size_str).group(1))
except AttributeError:
return None
sizes_df = (
pl.read_csv("data/file_sizes.csv")
.with_columns(
pl.col("name")
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
.str.to_date()
.alias("date"),
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num")
)
.select(["date", "size_num", "size", "modified"])
)
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null())
sizes_df = sizes_df.filter(pl.col("size").is_not_null())
return
@app.cell
def _(sizes_df_null):
sizes_df_null.select(["date", "size"]).style.tab_header(
title="Missing Days",
subtitle="Days with 0B size due to missing on the popcorn server.",
)
@app.cell
def _(sizes_df):
(
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
+ lp.geom_point()
+ lp.labs(
title="File sizes",
subtitle="Size of daily popcorn files over time",
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
)
)
@app.cell
def _(sizes_df):
(
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
+ lp.geom_dotplot()
+ lp.labs(
title="",
subtitle="",
caption="",
)
)
if __name__ == "__main__":
app.run()