Add relative package ownership plot

This commit is contained in:
Marty Oehme 2025-10-06 21:34:31 +02:00
parent 43bd80e30e
commit a5ca2a241f
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -158,8 +158,10 @@ def _():
@app.cell @app.cell
def _(df_pkg_lazy: pl.LazyFrame): def _(df_pkg_lazy: pl.LazyFrame):
pkg_per_day = df_pkg_lazy.group_by("date").agg(pl.col("count").sum()).sort("date")
def _(): def _():
weekly_packages = df_pkg_lazy.group_by_dynamic("date", every="1w").agg( weekly_packages = pkg_per_day.group_by_dynamic("date", every="1w").agg(
pl.col("count").sum() pl.col("count").sum()
) )
return ( return (
@ -179,6 +181,34 @@ def _(df_pkg_lazy: pl.LazyFrame):
return return
@app.cell
def plt_pkg_relative(pkg_per_day: pl.LazyFrame, df_unique_installs: pl.DataFrame):
def _():
relative_packages = (
pkg_per_day.with_columns(df_unique_installs["unique"])
.with_columns((pl.col("count") / pl.col("unique")).alias("relative"))
.group_by_dynamic("date", every="1w")
.agg(pl.col("relative").mean())
)
return (
lp.ggplot(
relative_packages.collect(engine="streaming"),
lp.aes("date", "relative"),
)
+ lp.geom_line()
+ lp.geom_smooth(method="loess")
+ lp.labs(
title="Package ownership per user",
subtitle="Average relative weekly package ownership",
caption="Calculated by total amount of packages per day over unique installations",
y="number of packages",
)
)
_()
return
@app.cell(hide_code=True) @app.cell(hide_code=True)
def _(): def _():
mo.md( mo.md(
@ -256,12 +286,13 @@ def _(df_pkg_lazy: pl.LazyFrame):
@app.cell @app.cell
def _(): def _():
( df_unique_installs = pl.read_csv(
lp.ggplot(
pl.read_csv(
f"{DATA_DIR}/unique_installs.csv", f"{DATA_DIR}/unique_installs.csv",
schema={"date": pl.Date, "unique": pl.UInt16}, schema={"date": pl.Date, "unique": pl.UInt16},
), )
(
lp.ggplot(
df_unique_installs,
lp.aes("date", "unique"), lp.aes("date", "unique"),
) )
+ lp.geom_line() + lp.geom_line()