diff --git a/notebooks/popcorn.py b/notebooks/popcorn.py index c0312c5..bf09e0e 100644 --- a/notebooks/popcorn.py +++ b/notebooks/popcorn.py @@ -158,8 +158,10 @@ def _(): @app.cell def _(df_pkg_lazy: pl.LazyFrame): + pkg_per_day = df_pkg_lazy.group_by("date").agg(pl.col("count").sum()).sort("date") + def _(): - weekly_packages = df_pkg_lazy.group_by_dynamic("date", every="1w").agg( + weekly_packages = pkg_per_day.group_by_dynamic("date", every="1w").agg( pl.col("count").sum() ) return ( @@ -179,6 +181,34 @@ def _(df_pkg_lazy: pl.LazyFrame): return +@app.cell +def plt_pkg_relative(pkg_per_day: pl.LazyFrame, df_unique_installs: pl.DataFrame): + def _(): + relative_packages = ( + pkg_per_day.with_columns(df_unique_installs["unique"]) + .with_columns((pl.col("count") / pl.col("unique")).alias("relative")) + .group_by_dynamic("date", every="1w") + .agg(pl.col("relative").mean()) + ) + return ( + lp.ggplot( + relative_packages.collect(engine="streaming"), + lp.aes("date", "relative"), + ) + + lp.geom_line() + + lp.geom_smooth(method="loess") + + lp.labs( + title="Package ownership per user", + subtitle="Average relative weekly package ownership", + caption="Calculated by total amount of packages per day over unique installations", + y="number of packages", + ) + ) + + _() + return + + @app.cell(hide_code=True) def _(): mo.md( @@ -256,12 +286,13 @@ def _(df_pkg_lazy: pl.LazyFrame): @app.cell def _(): + df_unique_installs = pl.read_csv( + f"{DATA_DIR}/unique_installs.csv", + schema={"date": pl.Date, "unique": pl.UInt16}, + ) ( lp.ggplot( - pl.read_csv( - f"{DATA_DIR}/unique_installs.csv", - schema={"date": pl.Date, "unique": pl.UInt16}, - ), + df_unique_installs, lp.aes("date", "unique"), ) + lp.geom_line()