Turn main dataframe into lazyframe

This commit is contained in:
Marty Oehme 2025-09-28 21:08:48 +02:00
parent 07c45ca205
commit 86b3659f0f
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -109,7 +109,7 @@ def _():
@app.cell
def _():
df = (
df_lazy = (
pl.scan_ndjson("data/daily/*", include_file_paths="file")
.head(LIMIT_ROWS) # FIXME: take out after debug
.with_columns(
@ -121,22 +121,21 @@ def _():
.select("date", pl.col("Packages").struct.unnest())
.fill_null(0)
.unpivot(index="date", variable_name="package", value_name="downloads")
.collect()
)
df
df_lazy
return
@app.cell
def _(df: pl.DataFrame):
def _(df_lazy: pl.LazyFrame):
def _():
weekly_downloads = (
df.sort("date")
df_lazy.sort("date")
.group_by_dynamic("date", every="1w")
.agg(pl.col("downloads").sum())
.sort("date")
.collect()
)
return (
lp.ggplot(weekly_downloads, lp.aes("date", "downloads"))
+ lp.geom_line()
@ -145,30 +144,33 @@ def _(df: pl.DataFrame):
title="Weekly downloads",
)
)
_()
return
@app.cell
def _(df: pl.DataFrame):
def _(df_lazy: pl.LazyFrame):
def _():
weekday_downloads = df.sort("date").with_columns(
pl.col("date")
.dt.weekday()
.sort()
.replace_strict(
{
1: "Mon",
2: "Tue",
3: "Wed",
4: "Thu",
5: "Fri",
6: "Sat",
7: "Sun",
}
weekday_downloads = (
df_lazy.sort("date")
.with_columns(
pl.col("date")
.dt.weekday()
.sort()
.replace_strict(
{
1: "Mon",
2: "Tue",
3: "Wed",
4: "Thu",
5: "Fri",
6: "Sat",
7: "Sun",
}
)
.alias("weekday")
)
.alias("weekday")
.collect()
)
return (
@ -179,7 +181,6 @@ def _(df: pl.DataFrame):
caption="Downloads aggregated per day of the week they took place.",
)
)
_()
return