Add relative package ownership plot
This commit is contained in:
parent
43bd80e30e
commit
a5ca2a241f
1 changed files with 36 additions and 5 deletions
|
|
@ -158,8 +158,10 @@ def _():
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(df_pkg_lazy: pl.LazyFrame):
|
def _(df_pkg_lazy: pl.LazyFrame):
|
||||||
|
pkg_per_day = df_pkg_lazy.group_by("date").agg(pl.col("count").sum()).sort("date")
|
||||||
|
|
||||||
def _():
|
def _():
|
||||||
weekly_packages = df_pkg_lazy.group_by_dynamic("date", every="1w").agg(
|
weekly_packages = pkg_per_day.group_by_dynamic("date", every="1w").agg(
|
||||||
pl.col("count").sum()
|
pl.col("count").sum()
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
|
|
@ -179,6 +181,34 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def plt_pkg_relative(pkg_per_day: pl.LazyFrame, df_unique_installs: pl.DataFrame):
|
||||||
|
def _():
|
||||||
|
relative_packages = (
|
||||||
|
pkg_per_day.with_columns(df_unique_installs["unique"])
|
||||||
|
.with_columns((pl.col("count") / pl.col("unique")).alias("relative"))
|
||||||
|
.group_by_dynamic("date", every="1w")
|
||||||
|
.agg(pl.col("relative").mean())
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
lp.ggplot(
|
||||||
|
relative_packages.collect(engine="streaming"),
|
||||||
|
lp.aes("date", "relative"),
|
||||||
|
)
|
||||||
|
+ lp.geom_line()
|
||||||
|
+ lp.geom_smooth(method="loess")
|
||||||
|
+ lp.labs(
|
||||||
|
title="Package ownership per user",
|
||||||
|
subtitle="Average relative weekly package ownership",
|
||||||
|
caption="Calculated by total amount of packages per day over unique installations",
|
||||||
|
y="number of packages",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
_()
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _():
|
def _():
|
||||||
mo.md(
|
mo.md(
|
||||||
|
|
@ -256,12 +286,13 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
|
df_unique_installs = pl.read_csv(
|
||||||
|
f"{DATA_DIR}/unique_installs.csv",
|
||||||
|
schema={"date": pl.Date, "unique": pl.UInt16},
|
||||||
|
)
|
||||||
(
|
(
|
||||||
lp.ggplot(
|
lp.ggplot(
|
||||||
pl.read_csv(
|
df_unique_installs,
|
||||||
f"{DATA_DIR}/unique_installs.csv",
|
|
||||||
schema={"date": pl.Date, "unique": pl.UInt16},
|
|
||||||
),
|
|
||||||
lp.aes("date", "unique"),
|
lp.aes("date", "unique"),
|
||||||
)
|
)
|
||||||
+ lp.geom_line()
|
+ lp.geom_line()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue