Change to streaming engine for lazy operations
This commit is contained in:
parent
e393768d30
commit
07bd122aaf
3 changed files with 286 additions and 11 deletions
22
popcorn.py
22
popcorn.py
|
|
@ -155,7 +155,7 @@ def _():
|
|||
.head(LIMIT_ROWS) # FIXME: take out after debug
|
||||
)
|
||||
# give small df preview
|
||||
df_pkg_lazy.head(100).collect()
|
||||
df_pkg_lazy.head(100).collect(engine="streaming")
|
||||
return
|
||||
|
||||
|
||||
|
|
@ -187,7 +187,7 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
.sort("date")
|
||||
)
|
||||
return (
|
||||
lp.ggplot(weekly_packages.collect(), lp.aes("date", "downloads"))
|
||||
lp.ggplot(weekly_packages.collect(engine="streaming"), lp.aes("date", "downloads"))
|
||||
+ lp.geom_line()
|
||||
+ lp.geom_smooth(method="loess")
|
||||
+ lp.labs(
|
||||
|
|
@ -233,7 +233,7 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
.alias("weekday")
|
||||
)
|
||||
return (
|
||||
lp.ggplot(weekday_downloads.collect(), lp.aes("weekday", "downloads"))
|
||||
lp.ggplot(weekday_downloads.collect(engine="streaming"), lp.aes("weekday", "downloads"))
|
||||
+ lp.geom_bar()
|
||||
+ lp.labs(
|
||||
title="Weekday downloads",
|
||||
|
|
@ -252,7 +252,7 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
pl.col("date").dt.month().alias("month")
|
||||
)
|
||||
return (
|
||||
lp.ggplot(month_agg_downloads.collect(), lp.aes("month", "downloads"))
|
||||
lp.ggplot(month_agg_downloads.collect(engine="streaming"), lp.aes("month", "downloads"))
|
||||
+ lp.geom_bar()
|
||||
+ lp.labs(
|
||||
title="Monthwise downloads",
|
||||
|
|
@ -296,7 +296,7 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
lp.ggplot(
|
||||
df_pkg_dl.sort("downloads", descending=True)
|
||||
.head(DISPLAY_LIMIT)
|
||||
.collect(),
|
||||
.collect(engine="streaming"),
|
||||
lp.aes("package", "downloads"),
|
||||
)
|
||||
+ lp.geom_bar(stat="identity")
|
||||
|
|
@ -308,7 +308,7 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
df_pkg_dl.sort("downloads", descending=False)
|
||||
# this seems arbitrary but gives a better result?
|
||||
.head(DISPLAY_LIMIT)
|
||||
.collect(),
|
||||
.collect(engine="streaming"),
|
||||
lp.aes("package", "downloads"),
|
||||
)
|
||||
+ lp.geom_bar(stat="identity")
|
||||
|
|
@ -328,7 +328,7 @@ def _(df_pkg_lazy: pl.LazyFrame):
|
|||
def _(df_pkg_dl: pl.LazyFrame):
|
||||
def _():
|
||||
return (
|
||||
lp.ggplot(df_pkg_dl.collect(), lp.aes("downloads"))
|
||||
lp.ggplot(df_pkg_dl.collect(engine="streaming"), lp.aes("downloads"))
|
||||
+ lp.geom_freqpoly(stat="bin")
|
||||
+ lp.labs(
|
||||
title="Package installation count distribution",
|
||||
|
|
@ -343,7 +343,7 @@ def _(df_pkg_dl: pl.LazyFrame):
|
|||
def _(df_pkg_dl: pl.LazyFrame):
|
||||
def _():
|
||||
def get_num(df: pl.LazyFrame) -> int:
|
||||
return df.count().collect().item(0, 0)
|
||||
return df.count().collect(engine="streaming").item(0, 0)
|
||||
|
||||
one_install = df_pkg_dl.sort("downloads", descending=False).filter(
|
||||
pl.col("downloads") == 1
|
||||
|
|
@ -399,7 +399,7 @@ def _():
|
|||
)
|
||||
|
||||
kernel_df_v99 = (
|
||||
kernel_df_lazy.filter(pl.col("major_ver") == 99).collect().select("date")
|
||||
kernel_df_lazy.filter(pl.col("major_ver") == 99).collect(engine="streaming").select("date")
|
||||
)
|
||||
|
||||
kernel_df_lazy = kernel_df_lazy.filter(pl.col("major_ver") != 99)
|
||||
|
|
@ -409,7 +409,7 @@ def _():
|
|||
.group_by("major_ver")
|
||||
.agg(pl.col("downloads").sum())
|
||||
.sort("major_ver")
|
||||
.collect(),
|
||||
.collect(engine="streaming"),
|
||||
lp.aes("major_ver", "downloads"),
|
||||
)
|
||||
+ lp.geom_bar(stat="identity")
|
||||
|
|
@ -448,7 +448,7 @@ def _(kernel_df_lazy: pl.LazyFrame):
|
|||
.sort("date")
|
||||
.group_by_dynamic("date", every="1w", group_by="major_ver")
|
||||
.agg(pl.col("downloads").sum())
|
||||
.collect()
|
||||
.collect(engine="streaming")
|
||||
)
|
||||
|
||||
(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue