Turn main dataframe into lazyframe
This commit is contained in:
parent
07c45ca205
commit
86b3659f0f
1 changed files with 25 additions and 24 deletions
49
popcorn.py
49
popcorn.py
|
|
@ -109,7 +109,7 @@ def _():
|
|||
|
||||
@app.cell
|
||||
def _():
|
||||
df = (
|
||||
df_lazy = (
|
||||
pl.scan_ndjson("data/daily/*", include_file_paths="file")
|
||||
.head(LIMIT_ROWS) # FIXME: take out after debug
|
||||
.with_columns(
|
||||
|
|
@ -121,22 +121,21 @@ def _():
|
|||
.select("date", pl.col("Packages").struct.unnest())
|
||||
.fill_null(0)
|
||||
.unpivot(index="date", variable_name="package", value_name="downloads")
|
||||
.collect()
|
||||
)
|
||||
df
|
||||
df_lazy
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(df: pl.DataFrame):
|
||||
def _(df_lazy: pl.LazyFrame):
|
||||
def _():
|
||||
weekly_downloads = (
|
||||
df.sort("date")
|
||||
df_lazy.sort("date")
|
||||
.group_by_dynamic("date", every="1w")
|
||||
.agg(pl.col("downloads").sum())
|
||||
.sort("date")
|
||||
.collect()
|
||||
)
|
||||
|
||||
return (
|
||||
lp.ggplot(weekly_downloads, lp.aes("date", "downloads"))
|
||||
+ lp.geom_line()
|
||||
|
|
@ -145,30 +144,33 @@ def _(df: pl.DataFrame):
|
|||
title="Weekly downloads",
|
||||
)
|
||||
)
|
||||
|
||||
_()
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(df: pl.DataFrame):
|
||||
def _(df_lazy: pl.LazyFrame):
|
||||
def _():
|
||||
weekday_downloads = df.sort("date").with_columns(
|
||||
pl.col("date")
|
||||
.dt.weekday()
|
||||
.sort()
|
||||
.replace_strict(
|
||||
{
|
||||
1: "Mon",
|
||||
2: "Tue",
|
||||
3: "Wed",
|
||||
4: "Thu",
|
||||
5: "Fri",
|
||||
6: "Sat",
|
||||
7: "Sun",
|
||||
}
|
||||
weekday_downloads = (
|
||||
df_lazy.sort("date")
|
||||
.with_columns(
|
||||
pl.col("date")
|
||||
.dt.weekday()
|
||||
.sort()
|
||||
.replace_strict(
|
||||
{
|
||||
1: "Mon",
|
||||
2: "Tue",
|
||||
3: "Wed",
|
||||
4: "Thu",
|
||||
5: "Fri",
|
||||
6: "Sat",
|
||||
7: "Sun",
|
||||
}
|
||||
)
|
||||
.alias("weekday")
|
||||
)
|
||||
.alias("weekday")
|
||||
.collect()
|
||||
)
|
||||
|
||||
return (
|
||||
|
|
@ -179,7 +181,6 @@ def _(df: pl.DataFrame):
|
|||
caption="Downloads aggregated per day of the week they took place.",
|
||||
)
|
||||
)
|
||||
|
||||
_()
|
||||
return
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue