diff --git a/popcorn.py b/popcorn.py index 6ad3057..0e66df6 100644 --- a/popcorn.py +++ b/popcorn.py @@ -154,7 +154,8 @@ def _(): .fill_null(0) .head(LIMIT_ROWS) # FIXME: take out after debug ) - df_pkg_lazy.collect() + # give small df preview + df_pkg_lazy.head(100).collect() return @@ -179,19 +180,18 @@ def _(): @app.cell def _(df_pkg_lazy: pl.LazyFrame): def _(): - weekly_downloads = ( + weekly_packages = ( df_pkg_lazy.sort("date") .group_by_dynamic("date", every="1w") .agg(pl.col("downloads").sum()) .sort("date") - .collect() ) return ( - lp.ggplot(weekly_downloads, lp.aes("date", "downloads")) + lp.ggplot(weekly_packages.collect(), lp.aes("date", "downloads")) + lp.geom_line() + lp.geom_smooth(method="loess") + lp.labs( - title="Weekly package installations", + title="Weekly package ownership", caption="Count of all installed packages aggregated for each week", ) ) @@ -215,30 +215,25 @@ def _(): @app.cell def _(df_pkg_lazy: pl.LazyFrame): def _(): - weekday_downloads = ( - df_pkg_lazy.sort("date") - .with_columns( - pl.col("date") - .dt.weekday() - .sort() - .replace_strict( - { - 1: "Mon", - 2: "Tue", - 3: "Wed", - 4: "Thu", - 5: "Fri", - 6: "Sat", - 7: "Sun", - } - ) - .alias("weekday") + weekday_downloads = df_pkg_lazy.sort("date").with_columns( + pl.col("date") + .dt.weekday() + .sort() + .replace_strict( + { + 1: "Mon", + 2: "Tue", + 3: "Wed", + 4: "Thu", + 5: "Fri", + 6: "Sat", + 7: "Sun", + } ) - .collect() + .alias("weekday") ) - return ( - lp.ggplot(weekday_downloads, lp.aes("weekday", "downloads")) + lp.ggplot(weekday_downloads.collect(), lp.aes("weekday", "downloads")) + lp.geom_bar() + lp.labs( title="Weekday downloads", @@ -253,13 +248,11 @@ def _(df_pkg_lazy: pl.LazyFrame): @app.cell def _(df_pkg_lazy: pl.LazyFrame): def _(): - month_agg_downloads = ( - df_pkg_lazy.sort("date") - .with_columns(pl.col("date").dt.month().alias("month")) - .collect() + month_agg_downloads = df_pkg_lazy.sort("date").with_columns( + pl.col("date").dt.month().alias("month") ) return ( - lp.ggplot(month_agg_downloads, lp.aes("month", "downloads")) + lp.ggplot(month_agg_downloads.collect(), lp.aes("month", "downloads")) + lp.geom_bar() + lp.labs( title="Monthwise downloads",