Make limits for quick iteration toggleable

This commit is contained in:
Marty Oehme 2025-09-30 11:25:15 +02:00
parent 63ae4a1982
commit 0618814c49
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -12,7 +12,7 @@ with app.setup:
import marimo as mo
import polars as pl
LIMIT_ROWS = 500_000
LIMIT_ROWS = 50_000
DATA_RAW_DIR = "data/raw"
DATA_CLEAN_DIR = "data/cleaned"
DATA_PARQUET_DIR = "data/parquet"
@ -153,8 +153,9 @@ def _():
)
.drop("file")
.fill_null(0)
.head(LIMIT_ROWS) # FIXME: take out after debug
)
if LIMIT_ROWS: # NOTE: this is only for debugging purposes
df_pkg_lazy = df_pkg_lazy.head(LIMIT_ROWS)
# give small df preview
df_pkg_lazy.head(100).collect(engine="streaming")
return
@ -396,14 +397,13 @@ def _():
.alias("major_ver"),
pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"),
)
.head(LIMIT_ROWS) # FIXME: take out after debug
)
kernel_df_v99 = (
kernel_df_lazy.filter(pl.col("major_ver") == 99).collect(engine="streaming").select("date")
)
kernel_df_lazy = kernel_df_lazy.filter(pl.col("major_ver") != 99)
(
lp.ggplot(
kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String))