diff --git a/popcorn.py b/popcorn.py index 5d945ec..d095f4d 100644 --- a/popcorn.py +++ b/popcorn.py @@ -16,7 +16,12 @@ with app.setup: @app.cell(hide_code=True) def _(): - mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis""") + mo.md(r"""# Void Linux 'Popcorn' package repository stat analysis + + This notebook analyses the daily package repository statistics files, + colloquially known as 'popcorn' files, that are generated by the Void Linux + package manager `xbps` and uploaded by users who have opted in to share. + """) return @@ -102,6 +107,10 @@ def _(): between 2019 and the start of 2021, the amount of different stuff downloaded grew rapidly, with the pace picking up again starting 2023. + There are a few outliers with a size of 0 kB, which we will remove from the + data. There are also a few days where the modification date of the file + does not correspond to the represented statistical date. + """ ) return @@ -127,6 +136,23 @@ def _(): return +@app.cell(hide_code=True) +def _(): + mo.md( + r""" + ## Download statistics + + Now that we have an idea of how the overall interest in the distribution + has changed over time, let's look at the actual download statistics. + + The popcorn files contain two main pieces of information: the number of + unique installs (i.e. unique machines downloading packages) and the number + of downloads per package. We will look at both of these in turn. + """ + ) + return + + @app.cell def _(df_pkg_lazy: pl.LazyFrame): def _(): @@ -299,6 +325,12 @@ def _(sizes_df): return +@app.cell +def _(df_lazy): + kernel_df = df_lazy.select("date", pl.col("Kernels").struct.unnest()) + kernel_df + + # further ideas: # # - daily download habits: