From c9e12252fd0e57741703d51d2578ebb212ff60b2 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Wed, 8 Oct 2025 21:14:01 +0200 Subject: [PATCH] Re-enable in-text computations --- _quarto.yml | 2 +- notebooks/popcorn.py | 48 +++++++++++++++++++++++--------------------- popcorn.qmd | 21 +++++++++++++------ 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 497a539..a8ec81f 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -5,7 +5,7 @@ project: - popcorn.qmd execute: - cache: true + cache: false theme: # light: flatly diff --git a/notebooks/popcorn.py b/notebooks/popcorn.py index df90527..bfc39dd 100644 --- a/notebooks/popcorn.py +++ b/notebooks/popcorn.py @@ -455,15 +455,9 @@ def plt_kernel_versions(): .alias("major_ver"), pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"), ) + .filter(pl.col("major_ver") != 99) ) - kernel_df_v99 = ( - kernel_df_lazy.filter(pl.col("major_ver") == 99) - .collect(engine="streaming") - .select("date") - ) - kernel_df_lazy = kernel_df_lazy.filter(pl.col("major_ver") != 99) - ( lp.ggplot( kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String)) @@ -482,23 +476,31 @@ def plt_kernel_versions(): return -@app.cell(hide_code=True) -def _(kernel_df_v99: pl.DataFrame): - mo.md( - rf""" - - When looking at the kernel versions used, we see a very strong jump between major kernel version - 4 and major kernel version 5. - - For this analysis we had to exclude {kernel_df_v99.select(pl.len()).item()} rows which were - apparently from the future, as they were running variations of major kernel version 99. In all - likelihood there is a custom kernel version out there which reports its own major version as 99. - The strange version starts appearing on {kernel_df_v99.select("date").row(0)[0]} and shows up - all the way until {kernel_df_v99.select("date").row(-1)[0]}. - - """ +@app.cell +def df_kernel_v99(): + kernel_df_v99 = ( + pl.scan_csv( + f"{DATA_DIR}/kernels.csv", + schema={ + "date": pl.Date, + "kernel": pl.String, + "downloads": pl.UInt16, + }, + ) + .fill_null(0) + .with_columns(pl.col("kernel").str.replace(r"(\d+\.\d+\.\d+).*", "${1}")) + .with_columns( + pl.col("kernel") + .str.replace(r"(\d+).*", "${1}") + .str.to_integer(dtype=pl.UInt8) + .alias("major_ver"), + pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"), + ) + .filter(pl.col("major_ver") == 99) + .collect(engine="streaming") + .select("date") ) - return + kernel_df_v99 @app.cell diff --git a/popcorn.qmd b/popcorn.qmd index 30aa8cf..f2b7893 100644 --- a/popcorn.qmd +++ b/popcorn.qmd @@ -321,11 +321,11 @@ thirty_plus = df_pkg_dl.sort("count", descending=False).filter((pl.col("count") ``` To be more precise with the numbers: -There are `python f"{get_num(one_ten_installs):,}"` packages which have between one +There are `{python} f"{get_num(one_ten_installs):,}"` packages which have between one and ten installations in the data, `python f"{get_num(ten_twenty_installs):,}"` packages between eleven and 20 installations, and -`python f"{get_num(twenty_thirty):,}"` packages between 21 and 30 installations. -`python f"{get_num(thirty_plus):,}"` packages have over 30 installations. +`{python} f"{get_num(twenty_thirty):,}"` packages between 21 and 30 installations. +`{python} f"{get_num(thirty_plus):,}"` packages have over 30 installations. For now, these are the explorations I have done for the package data collected. I think it is interesting to see, especially the evolution of package installations over time, @@ -384,13 +384,22 @@ and kernel 6 right after the report slump happened. This, in all likelihood, accounts for the slight imbalance between the numbers, and will shift over the coming months. +```{python} +from notebooks.popcorn import df_kernel_v99 +_, defs = df_kernel_v99.run() # pyright: ignore +kernel_df_v99 = defs["kernel_df_v99"] +v99_num_rows = f"{kernel_df_v99.select(pl.len()).item()}" +v99_start_date = f"{kernel_df_v99.select("date").row(0)[0]}" +v99_end_date = f"{kernel_df_v99.select("date").row(-1)[0]}" +``` + Just like with kernel suffixes, for this analysis we also had to exclude -{kernel_df_v99.select(pl.len()).item()} rows which were apparently from the +`{python} v99_num_rows` rows which were apparently from the future --- as they were running variations of major kernel version 99. In all likelihood there is a custom compiled kernel version out there which reports its own major version as 99. The strange version starts appearing on -{kernel_df_v99.select("date").row(0)0} and shows up all the way until -{kernel_df_v99.select("date").row(-1)[0]}. +`{python} v99_start_date` and shows up all the way until +`{python} v99_end_date`. Let's turn to the actual adoption of kernels over time in the next visualization.