79 lines
1.9 KiB
Python
79 lines
1.9 KiB
Python
import marimo
|
|
|
|
__generated_with = "0.16.2"
|
|
app = marimo.App(width="medium")
|
|
|
|
with app.setup:
|
|
# Initialization code that runs beimpofore all other cells
|
|
import lets_plot as lp
|
|
import marimo as mo
|
|
import polars as pl
|
|
import re
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
def parse_size(size_str):
|
|
try:
|
|
return float(re.search(r"(\d+.?\d+) kB", size_str).group(1)) # pyright: ignore[reportOptionalMemberAccess]
|
|
except AttributeError:
|
|
return None
|
|
|
|
sizes_df = (
|
|
pl.read_csv("data/file_sizes.csv")
|
|
.with_columns(
|
|
pl.col("name")
|
|
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
|
.str.to_date()
|
|
.alias("date"),
|
|
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num")
|
|
)
|
|
.select(["date", "size_num", "size", "modified"])
|
|
)
|
|
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null())
|
|
sizes_df = sizes_df.filter(pl.col("size").is_not_null())
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(sizes_df_null):
|
|
sizes_df_null.select(["date", "size"]).style.tab_header(
|
|
title="Missing Days",
|
|
subtitle="Days with 0B size due to missing on the popcorn server.",
|
|
)
|
|
|
|
|
|
@app.cell
|
|
def _(sizes_df):
|
|
(
|
|
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
|
+ lp.geom_point()
|
|
+ lp.labs(
|
|
title="File sizes",
|
|
subtitle="Size of daily popcorn files over time",
|
|
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
|
)
|
|
)
|
|
|
|
|
|
@app.cell
|
|
def _(sizes_df):
|
|
(
|
|
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
|
+ lp.geom_dotplot()
|
|
+ lp.labs(
|
|
title="",
|
|
subtitle="",
|
|
caption="",
|
|
)
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|