Re-enable in-text computations

This commit is contained in:
Marty Oehme 2025-10-08 21:14:01 +02:00
parent 2c5cf37b2c
commit c9e12252fd
Signed by: Marty
GPG key ID: 4E535BC19C61886E
3 changed files with 41 additions and 30 deletions

View file

@ -5,7 +5,7 @@ project:
- popcorn.qmd
execute:
cache: true
cache: false
theme:
# light: flatly

View file

@ -455,15 +455,9 @@ def plt_kernel_versions():
.alias("major_ver"),
pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"),
)
.filter(pl.col("major_ver") != 99)
)
kernel_df_v99 = (
kernel_df_lazy.filter(pl.col("major_ver") == 99)
.collect(engine="streaming")
.select("date")
)
kernel_df_lazy = kernel_df_lazy.filter(pl.col("major_ver") != 99)
(
lp.ggplot(
kernel_df_lazy.with_columns(pl.col("major_ver").cast(pl.String))
@ -482,23 +476,31 @@ def plt_kernel_versions():
return
@app.cell(hide_code=True)
def _(kernel_df_v99: pl.DataFrame):
mo.md(
rf"""
When looking at the kernel versions used, we see a very strong jump between major kernel version
4 and major kernel version 5.
For this analysis we had to exclude {kernel_df_v99.select(pl.len()).item()} rows which were
apparently from the future, as they were running variations of major kernel version 99. In all
likelihood there is a custom kernel version out there which reports its own major version as 99.
The strange version starts appearing on {kernel_df_v99.select("date").row(0)[0]} and shows up
all the way until {kernel_df_v99.select("date").row(-1)[0]}.
"""
@app.cell
def df_kernel_v99():
kernel_df_v99 = (
pl.scan_csv(
f"{DATA_DIR}/kernels.csv",
schema={
"date": pl.Date,
"kernel": pl.String,
"downloads": pl.UInt16,
},
)
return
.fill_null(0)
.with_columns(pl.col("kernel").str.replace(r"(\d+\.\d+\.\d+).*", "${1}"))
.with_columns(
pl.col("kernel")
.str.replace(r"(\d+).*", "${1}")
.str.to_integer(dtype=pl.UInt8)
.alias("major_ver"),
pl.col("kernel").str.replace(r"(\d+\.\d+).*", "${1}").alias("minor_ver"),
)
.filter(pl.col("major_ver") == 99)
.collect(engine="streaming")
.select("date")
)
kernel_df_v99
@app.cell

View file

@ -321,11 +321,11 @@ thirty_plus = df_pkg_dl.sort("count", descending=False).filter((pl.col("count")
```
To be more precise with the numbers:
There are `python f"{get_num(one_ten_installs):,}"` packages which have between one
There are `{python} f"{get_num(one_ten_installs):,}"` packages which have between one
and ten installations in the data, `python f"{get_num(ten_twenty_installs):,}"`
packages between eleven and 20 installations, and
`python f"{get_num(twenty_thirty):,}"` packages between 21 and 30 installations.
`python f"{get_num(thirty_plus):,}"` packages have over 30 installations.
`{python} f"{get_num(twenty_thirty):,}"` packages between 21 and 30 installations.
`{python} f"{get_num(thirty_plus):,}"` packages have over 30 installations.
For now, these are the explorations I have done for the package data collected.
I think it is interesting to see, especially the evolution of package installations over time,
@ -384,13 +384,22 @@ and kernel 6 right after the report slump happened.
This, in all likelihood, accounts for the slight imbalance between the numbers,
and will shift over the coming months.
```{python}
from notebooks.popcorn import df_kernel_v99
_, defs = df_kernel_v99.run() # pyright: ignore
kernel_df_v99 = defs["kernel_df_v99"]
v99_num_rows = f"{kernel_df_v99.select(pl.len()).item()}"
v99_start_date = f"{kernel_df_v99.select("date").row(0)[0]}"
v99_end_date = f"{kernel_df_v99.select("date").row(-1)[0]}"
```
Just like with kernel suffixes, for this analysis we also had to exclude
{kernel_df_v99.select(pl.len()).item()} rows which were apparently from the
`{python} v99_num_rows` rows which were apparently from the
future --- as they were running variations of major kernel version 99. In all
likelihood there is a custom compiled kernel version out there which reports its own
major version as 99. The strange version starts appearing on
{kernel_df_v99.select("date").row(0)0} and shows up all the way until
{kernel_df_v99.select("date").row(-1)[0]}.
`{python} v99_start_date` and shows up all the way until
`{python} v99_end_date`.
Let's turn to the actual adoption of kernels over time in the next visualization.