Add basic size analysis
This commit is contained in:
parent
ae1895e47f
commit
1701605939
1 changed files with 61 additions and 0 deletions
61
popcorn.py
61
popcorn.py
|
|
@ -16,3 +16,64 @@ def _():
|
|||
mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
def parse_size(size_str):
|
||||
try:
|
||||
return float(re.search(r"(\d+.?\d+) kB", size_str).group(1))
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
sizes_df = (
|
||||
pl.read_csv("data/file_sizes.csv")
|
||||
.with_columns(
|
||||
pl.col("name")
|
||||
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
||||
.str.to_date()
|
||||
.alias("date"),
|
||||
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num")
|
||||
)
|
||||
.select(["date", "size_num", "size", "modified"])
|
||||
)
|
||||
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null())
|
||||
sizes_df = sizes_df.filter(pl.col("size").is_not_null())
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df_null):
|
||||
sizes_df_null.select(["date", "size"]).style.tab_header(
|
||||
title="Missing Days",
|
||||
subtitle="Days with 0B size due to missing on the popcorn server.",
|
||||
)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df):
|
||||
(
|
||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||
+ lp.geom_point()
|
||||
+ lp.labs(
|
||||
title="File sizes",
|
||||
subtitle="Size of daily popcorn files over time",
|
||||
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df):
|
||||
(
|
||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||
+ lp.geom_dotplot()
|
||||
+ lp.labs(
|
||||
title="",
|
||||
subtitle="",
|
||||
caption="",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue