Add kernel analysis plots
This commit is contained in:
parent
4d5aa73de7
commit
ce38024569
1 changed files with 85 additions and 7 deletions
92
popcorn.py
92
popcorn.py
|
|
@ -305,21 +305,99 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r""" ## Kernel Analysis """)
|
||||
return
|
||||
|
||||
|
||||
# - which kernels have been DL when? (simplified for semver)
|
||||
@app.cell
|
||||
def _(df_lazy):
|
||||
kernel_df_lazy = df_lazy.select("date", "XuKernel")
|
||||
kernel_df = (
|
||||
kernel_df_lazy.with_columns(pl.col("XuKernel").struct.unnest())
|
||||
def _():
|
||||
kernel_df_lazy = (
|
||||
pl.scan_csv(
|
||||
f"{DATA_CLEAN_DIR}/kernels/*.csv",
|
||||
schema={
|
||||
"date": pl.Date,
|
||||
"kernel": pl.String,
|
||||
"downloads": pl.UInt16,
|
||||
},
|
||||
)
|
||||
.fill_null(0)
|
||||
# .unpivot(index="date", variable_name="kernel", value_name="downloads")
|
||||
.collect()
|
||||
.with_columns(pl.col("kernel").str.replace(r"(\d+\.\d+\.\d+).*", "${1}"))
|
||||
.with_columns(
|
||||
pl.col("kernel")
|
||||
.str.replace(r"(\d+).*", "${1}")
|
||||
.str.to_integer(dtype=pl.UInt8)
|
||||
.alias("major_ver"),
|
||||
pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"),
|
||||
)
|
||||
.head(LIMIT_ROWS) # FIXME: take out after debug
|
||||
)
|
||||
|
||||
df_lazy.collect()
|
||||
kernel_df_v99 = (
|
||||
kernel_df_lazy.filter(pl.col("major_ver") == 99).collect().select("date")
|
||||
)
|
||||
|
||||
kernel_df_lazy = kernel_df_lazy.filter(pl.col("major_ver") != 99)
|
||||
(
|
||||
lp.ggplot(
|
||||
kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String))
|
||||
.group_by("major_ver")
|
||||
.agg(pl.col("downloads").sum())
|
||||
.sort("major_ver")
|
||||
.collect(),
|
||||
lp.aes("major_ver", "downloads"),
|
||||
)
|
||||
+ lp.geom_bar(stat="identity")
|
||||
+ lp.labs(
|
||||
title="Kernel versions used",
|
||||
caption="For each daily download, add up the currently running kernel version",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(kernel_df_v99: pl.DataFrame):
|
||||
mo.md(
|
||||
rf"""
|
||||
|
||||
When looking at the kernel versions used, we see a very strong jump between major kernel version
|
||||
4 and major kernel version 5.
|
||||
|
||||
For this analysis we had to exclude {kernel_df_v99.select(pl.len()).item()} rows which were
|
||||
apparently from the future, as they were running variations of major kernel version 99. In all
|
||||
likelihood there is a custom kernel version out there which reports its own major version as 99.
|
||||
The strange version starts appearing on {kernel_df_v99.select("date").row(0)[0]} and shows up
|
||||
all the way until {kernel_df_v99.select("date").row(-1)[0]}.
|
||||
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(kernel_df_lazy: pl.LazyFrame):
|
||||
(
|
||||
lp.ggplot(
|
||||
kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String))
|
||||
.select(["date", "major_ver", "downloads"])
|
||||
.sort("date")
|
||||
.group_by_dynamic("date", every="1w", group_by="major_ver")
|
||||
.agg(pl.col("downloads").sum())
|
||||
.collect(),
|
||||
lp.aes("date", "downloads", color="major_ver"),
|
||||
)
|
||||
+ lp.geom_line()
|
||||
+ lp.labs(
|
||||
title="Kernels over time",
|
||||
caption="For each daily download, count used kernel versions",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(
|
||||
r"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue