Update wordings and lazy collects

This commit is contained in:
Marty Oehme 2025-09-30 08:14:46 +02:00
parent 9d64e93486
commit e393768d30
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -154,7 +154,8 @@ def _():
.fill_null(0)
.head(LIMIT_ROWS) # FIXME: take out after debug
)
df_pkg_lazy.collect()
# give small df preview
df_pkg_lazy.head(100).collect()
return
@ -179,19 +180,18 @@ def _():
@app.cell
def _(df_pkg_lazy: pl.LazyFrame):
def _():
weekly_downloads = (
weekly_packages = (
df_pkg_lazy.sort("date")
.group_by_dynamic("date", every="1w")
.agg(pl.col("downloads").sum())
.sort("date")
.collect()
)
return (
lp.ggplot(weekly_downloads, lp.aes("date", "downloads"))
lp.ggplot(weekly_packages.collect(), lp.aes("date", "downloads"))
+ lp.geom_line()
+ lp.geom_smooth(method="loess")
+ lp.labs(
title="Weekly package installations",
title="Weekly package ownership",
caption="Count of all installed packages aggregated for each week",
)
)
@ -215,9 +215,7 @@ def _():
@app.cell
def _(df_pkg_lazy: pl.LazyFrame):
def _():
weekday_downloads = (
df_pkg_lazy.sort("date")
.with_columns(
weekday_downloads = df_pkg_lazy.sort("date").with_columns(
pl.col("date")
.dt.weekday()
.sort()
@ -234,11 +232,8 @@ def _(df_pkg_lazy: pl.LazyFrame):
)
.alias("weekday")
)
.collect()
)
return (
lp.ggplot(weekday_downloads, lp.aes("weekday", "downloads"))
lp.ggplot(weekday_downloads.collect(), lp.aes("weekday", "downloads"))
+ lp.geom_bar()
+ lp.labs(
title="Weekday downloads",
@ -253,13 +248,11 @@ def _(df_pkg_lazy: pl.LazyFrame):
@app.cell
def _(df_pkg_lazy: pl.LazyFrame):
def _():
month_agg_downloads = (
df_pkg_lazy.sort("date")
.with_columns(pl.col("date").dt.month().alias("month"))
.collect()
month_agg_downloads = df_pkg_lazy.sort("date").with_columns(
pl.col("date").dt.month().alias("month")
)
return (
lp.ggplot(month_agg_downloads, lp.aes("month", "downloads"))
lp.ggplot(month_agg_downloads.collect(), lp.aes("month", "downloads"))
+ lp.geom_bar()
+ lp.labs(
title="Monthwise downloads",