Provide raw lazyframe
This commit is contained in:
parent
499a819c90
commit
4984289f69
1 changed files with 11 additions and 20 deletions
31
popcorn.py
31
popcorn.py
|
|
@ -118,7 +118,9 @@ def _():
|
|||
.str.to_date()
|
||||
.alias("date")
|
||||
)
|
||||
.select("date", pl.col("Packages").struct.unnest())
|
||||
)
|
||||
df_pkg_lazy = (
|
||||
df_lazy.select("date", pl.col("Packages").struct.unnest())
|
||||
.fill_null(0)
|
||||
.unpivot(index="date", variable_name="package", value_name="downloads")
|
||||
)
|
||||
|
|
@ -126,10 +128,10 @@ def _():
|
|||
|
||||
|
||||
@app.cell
|
||||
def _(df_lazy: pl.LazyFrame):
|
||||
def _(df_pkg_lazy: pl.LazyFrame):
|
||||
def _():
|
||||
weekly_downloads = (
|
||||
df_lazy.sort("date")
|
||||
df_pkg_lazy.sort("date")
|
||||
.group_by_dynamic("date", every="1w")
|
||||
.agg(pl.col("downloads").sum())
|
||||
.sort("date")
|
||||
|
|
@ -149,10 +151,10 @@ def _(df_lazy: pl.LazyFrame):
|
|||
|
||||
|
||||
@app.cell
|
||||
def _(df_lazy: pl.LazyFrame):
|
||||
def _(df_pkg_lazy: pl.LazyFrame):
|
||||
def _():
|
||||
weekday_downloads = (
|
||||
df_lazy.sort("date")
|
||||
df_pkg_lazy.sort("date")
|
||||
.with_columns(
|
||||
pl.col("date")
|
||||
.dt.weekday()
|
||||
|
|
@ -187,10 +189,10 @@ def _(df_lazy: pl.LazyFrame):
|
|||
|
||||
|
||||
@app.cell
|
||||
def _(df_lazy: pl.LazyFrame):
|
||||
def _(df_pkg_lazy: pl.LazyFrame):
|
||||
def _():
|
||||
month_agg_downloads = (
|
||||
df_lazy.sort("date")
|
||||
df_pkg_lazy.sort("date")
|
||||
.with_columns(pl.col("date").dt.month().alias("month"))
|
||||
.collect()
|
||||
)
|
||||
|
|
@ -208,19 +210,8 @@ def _(df_lazy: pl.LazyFrame):
|
|||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
df_unique_downloads = (
|
||||
pl.scan_ndjson("data/daily/*", include_file_paths="file")
|
||||
.head(LIMIT_ROWS) # FIXME: take out after debug
|
||||
.with_columns(
|
||||
pl.col("file")
|
||||
.str.replace(r"data/daily/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
||||
.str.to_date()
|
||||
.alias("date")
|
||||
)
|
||||
.select(["date", "UniqueInstalls"])
|
||||
.collect()
|
||||
)
|
||||
def _(df_lazy:pl.LazyFrame):
|
||||
df_unique_downloads = df_lazy.select(["date", "UniqueInstalls"]).collect()
|
||||
(
|
||||
lp.ggplot(df_unique_downloads, lp.aes("date", "UniqueInstalls"))
|
||||
+ lp.geom_line()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue