Re-enable in-text computations

This commit is contained in:
Marty Oehme 2025-10-08 21:14:01 +02:00
parent 2c5cf37b2c
commit c9e12252fd
Signed by: Marty
GPG key ID: 4E535BC19C61886E
3 changed files with 41 additions and 30 deletions

View file

@ -5,7 +5,7 @@ project:
- popcorn.qmd - popcorn.qmd
execute: execute:
cache: true cache: false
theme: theme:
# light: flatly # light: flatly

View file

@ -455,15 +455,9 @@ def plt_kernel_versions():
.alias("major_ver"), .alias("major_ver"),
pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"), pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"),
) )
.filter(pl.col("major_ver") != 99)
) )
kernel_df_v99 = (
kernel_df_lazy.filter(pl.col("major_ver") == 99)
.collect(engine="streaming")
.select("date")
)
kernel_df_lazy = kernel_df_lazy.filter(pl.col("major_ver") != 99)
( (
lp.ggplot( lp.ggplot(
kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String)) kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String))
@ -482,23 +476,31 @@ def plt_kernel_versions():
return return
@app.cell(hide_code=True) @app.cell
def _(kernel_df_v99: pl.DataFrame): def df_kernel_v99():
mo.md( kernel_df_v99 = (
rf""" pl.scan_csv(
f"{DATA_DIR}/kernels.csv",
When looking at the kernel versions used, we see a very strong jump between major kernel version schema={
4 and major kernel version 5. "date": pl.Date,
"kernel": pl.String,
For this analysis we had to exclude {kernel_df_v99.select(pl.len()).item()} rows which were "downloads": pl.UInt16,
apparently from the future, as they were running variations of major kernel version 99. In all },
likelihood there is a custom kernel version out there which reports its own major version as 99.
The strange version starts appearing on {kernel_df_v99.select("date").row(0)[0]} and shows up
all the way until {kernel_df_v99.select("date").row(-1)[0]}.
"""
) )
return .fill_null(0)
.with_columns(pl.col("kernel").str.replace(r"(\d+\.\d+\.\d+).*", "${1}"))
.with_columns(
pl.col("kernel")
.str.replace(r"(\d+).*", "${1}")
.str.to_integer(dtype=pl.UInt8)
.alias("major_ver"),
pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"),
)
.filter(pl.col("major_ver") == 99)
.collect(engine="streaming")
.select("date")
)
kernel_df_v99
@app.cell @app.cell

View file

@ -321,11 +321,11 @@ thirty_plus = df_pkg_dl.sort("count", descending=False).filter((pl.col("count")
``` ```
To be more precise with the numbers: To be more precise with the numbers:
There are `python f"{get_num(one_ten_installs):,}"` packages which have between one There are `{python} f"{get_num(one_ten_installs):,}"` packages which have between one
and ten installations in the data, `python f"{get_num(ten_twenty_installs):,}"` and ten installations in the data, `python f"{get_num(ten_twenty_installs):,}"`
packages between eleven and 20 installations, and packages between eleven and 20 installations, and
`python f"{get_num(twenty_thirty):,}"` packages between 21 and 30 installations. `{python} f"{get_num(twenty_thirty):,}"` packages between 21 and 30 installations.
`python f"{get_num(thirty_plus):,}"` packages have over 30 installations. `{python} f"{get_num(thirty_plus):,}"` packages have over 30 installations.
For now, these are the explorations I have done for the package data collected. For now, these are the explorations I have done for the package data collected.
I think it is interesting to see, especially the evolution of package installations over time, I think it is interesting to see, especially the evolution of package installations over time,
@ -384,13 +384,22 @@ and kernel 6 right after the report slump happened.
This, in all likelihood, accounts for the slight imbalance between the numbers, This, in all likelihood, accounts for the slight imbalance between the numbers,
and will shift over the coming months. and will shift over the coming months.
```{python}
from notebooks.popcorn import df_kernel_v99
_, defs = df_kernel_v99.run() # pyright: ignore
kernel_df_v99 = defs["kernel_df_v99"]
v99_num_rows = f"{kernel_df_v99.select(pl.len()).item()}"
v99_start_date = f"{kernel_df_v99.select("date").row(0)[0]}"
v99_end_date = f"{kernel_df_v99.select("date").row(-1)[0]}"
```
Just like with kernel suffixes, for this analysis we also had to exclude Just like with kernel suffixes, for this analysis we also had to exclude
{kernel_df_v99.select(pl.len()).item()} rows which were apparently from the `{python} v99_num_rows` rows which were apparently from the
future --- as they were running variations of major kernel version 99. In all future --- as they were running variations of major kernel version 99. In all
likelihood there is a custom compiled kernel version out there which reports its own likelihood there is a custom compiled kernel version out there which reports its own
major version as 99. The strange version starts appearing on major version as 99. The strange version starts appearing on
{kernel_df_v99.select("date").row(0)0} and shows up all the way until `{python} v99_start_date` and shows up all the way until
{kernel_df_v99.select("date").row(-1)[0]}. `{python} v99_end_date`.
Let's turn to the actual adoption of kernels over time in the next visualization. Let's turn to the actual adoption of kernels over time in the next visualization.