Turn main dataframe into lazyframe
This commit is contained in:
parent
07c45ca205
commit
86b3659f0f
1 changed files with 25 additions and 24 deletions
49
popcorn.py
49
popcorn.py
|
|
@ -109,7 +109,7 @@ def _():
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
df = (
|
df_lazy = (
|
||||||
pl.scan_ndjson("data/daily/*", include_file_paths="file")
|
pl.scan_ndjson("data/daily/*", include_file_paths="file")
|
||||||
.head(LIMIT_ROWS) # FIXME: take out after debug
|
.head(LIMIT_ROWS) # FIXME: take out after debug
|
||||||
.with_columns(
|
.with_columns(
|
||||||
|
|
@ -121,22 +121,21 @@ def _():
|
||||||
.select("date", pl.col("Packages").struct.unnest())
|
.select("date", pl.col("Packages").struct.unnest())
|
||||||
.fill_null(0)
|
.fill_null(0)
|
||||||
.unpivot(index="date", variable_name="package", value_name="downloads")
|
.unpivot(index="date", variable_name="package", value_name="downloads")
|
||||||
.collect()
|
|
||||||
)
|
)
|
||||||
df
|
df_lazy
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(df: pl.DataFrame):
|
def _(df_lazy: pl.LazyFrame):
|
||||||
def _():
|
def _():
|
||||||
weekly_downloads = (
|
weekly_downloads = (
|
||||||
df.sort("date")
|
df_lazy.sort("date")
|
||||||
.group_by_dynamic("date", every="1w")
|
.group_by_dynamic("date", every="1w")
|
||||||
.agg(pl.col("downloads").sum())
|
.agg(pl.col("downloads").sum())
|
||||||
.sort("date")
|
.sort("date")
|
||||||
|
.collect()
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
lp.ggplot(weekly_downloads, lp.aes("date", "downloads"))
|
lp.ggplot(weekly_downloads, lp.aes("date", "downloads"))
|
||||||
+ lp.geom_line()
|
+ lp.geom_line()
|
||||||
|
|
@ -145,30 +144,33 @@ def _(df: pl.DataFrame):
|
||||||
title="Weekly downloads",
|
title="Weekly downloads",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
_()
|
_()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(df: pl.DataFrame):
|
def _(df_lazy: pl.LazyFrame):
|
||||||
def _():
|
def _():
|
||||||
weekday_downloads = df.sort("date").with_columns(
|
weekday_downloads = (
|
||||||
pl.col("date")
|
df_lazy.sort("date")
|
||||||
.dt.weekday()
|
.with_columns(
|
||||||
.sort()
|
pl.col("date")
|
||||||
.replace_strict(
|
.dt.weekday()
|
||||||
{
|
.sort()
|
||||||
1: "Mon",
|
.replace_strict(
|
||||||
2: "Tue",
|
{
|
||||||
3: "Wed",
|
1: "Mon",
|
||||||
4: "Thu",
|
2: "Tue",
|
||||||
5: "Fri",
|
3: "Wed",
|
||||||
6: "Sat",
|
4: "Thu",
|
||||||
7: "Sun",
|
5: "Fri",
|
||||||
}
|
6: "Sat",
|
||||||
|
7: "Sun",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.alias("weekday")
|
||||||
)
|
)
|
||||||
.alias("weekday")
|
.collect()
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
@ -179,7 +181,6 @@ def _(df: pl.DataFrame):
|
||||||
caption="Downloads aggregated per day of the week they took place.",
|
caption="Downloads aggregated per day of the week they took place.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
_()
|
_()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue