Update wordings and lazy collects

This commit is contained in:
Marty Oehme 2025-09-30 08:14:46 +02:00
parent 9d64e93486
commit e393768d30
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -154,7 +154,8 @@ def _():
.fill_null(0) .fill_null(0)
.head(LIMIT_ROWS) # FIXME: take out after debug .head(LIMIT_ROWS) # FIXME: take out after debug
) )
df_pkg_lazy.collect() # give small df preview
df_pkg_lazy.head(100).collect()
return return
@ -179,19 +180,18 @@ def _():
@app.cell @app.cell
def _(df_pkg_lazy: pl.LazyFrame): def _(df_pkg_lazy: pl.LazyFrame):
def _(): def _():
weekly_downloads = ( weekly_packages = (
df_pkg_lazy.sort("date") df_pkg_lazy.sort("date")
.group_by_dynamic("date", every="1w") .group_by_dynamic("date", every="1w")
.agg(pl.col("downloads").sum()) .agg(pl.col("downloads").sum())
.sort("date") .sort("date")
.collect()
) )
return ( return (
lp.ggplot(weekly_downloads, lp.aes("date", "downloads")) lp.ggplot(weekly_packages.collect(), lp.aes("date", "downloads"))
+ lp.geom_line() + lp.geom_line()
+ lp.geom_smooth(method="loess") + lp.geom_smooth(method="loess")
+ lp.labs( + lp.labs(
title="Weekly package installations", title="Weekly package ownership",
caption="Count of all installed packages aggregated for each week", caption="Count of all installed packages aggregated for each week",
) )
) )
@ -215,30 +215,25 @@ def _():
@app.cell @app.cell
def _(df_pkg_lazy: pl.LazyFrame): def _(df_pkg_lazy: pl.LazyFrame):
def _(): def _():
weekday_downloads = ( weekday_downloads = df_pkg_lazy.sort("date").with_columns(
df_pkg_lazy.sort("date") pl.col("date")
.with_columns( .dt.weekday()
pl.col("date") .sort()
.dt.weekday() .replace_strict(
.sort() {
.replace_strict( 1: "Mon",
{ 2: "Tue",
1: "Mon", 3: "Wed",
2: "Tue", 4: "Thu",
3: "Wed", 5: "Fri",
4: "Thu", 6: "Sat",
5: "Fri", 7: "Sun",
6: "Sat", }
7: "Sun",
}
)
.alias("weekday")
) )
.collect() .alias("weekday")
) )
return ( return (
lp.ggplot(weekday_downloads, lp.aes("weekday", "downloads")) lp.ggplot(weekday_downloads.collect(), lp.aes("weekday", "downloads"))
+ lp.geom_bar() + lp.geom_bar()
+ lp.labs( + lp.labs(
title="Weekday downloads", title="Weekday downloads",
@ -253,13 +248,11 @@ def _(df_pkg_lazy: pl.LazyFrame):
@app.cell @app.cell
def _(df_pkg_lazy: pl.LazyFrame): def _(df_pkg_lazy: pl.LazyFrame):
def _(): def _():
month_agg_downloads = ( month_agg_downloads = df_pkg_lazy.sort("date").with_columns(
df_pkg_lazy.sort("date") pl.col("date").dt.month().alias("month")
.with_columns(pl.col("date").dt.month().alias("month"))
.collect()
) )
return ( return (
lp.ggplot(month_agg_downloads, lp.aes("month", "downloads")) lp.ggplot(month_agg_downloads.collect(), lp.aes("month", "downloads"))
+ lp.geom_bar() + lp.geom_bar()
+ lp.labs( + lp.labs(
title="Monthwise downloads", title="Monthwise downloads",