diff --git a/popcorn.py b/popcorn.py index e05abd6..f5f3fca 100644 --- a/popcorn.py +++ b/popcorn.py @@ -45,6 +45,39 @@ def _(): return sizes_df, sizes_df_raw +@app.cell(hide_code=True) +def _(): + mo.md( + r""" + ## Daily statistics file size + + The simplest operation we can do is look at the overall file size for each + of the daily statistics files over time. The files consist of a long list + of packages which have been downloaded from the repositories that day, + along with the number of downloads. It also consists of the same list + separated by specifically downloaded versions of packages, so if somebody + downloads v0.9.1 and somebody else downloads v0.9.3 this would count both + downloads separately. + + Another count is the number of different Kernels that have been used to + download (or downloaded?) from the repositories. + + These are the major things that will lead to size increases in the file, + but not just for an increased amount of downloads --- we will get to those shortly. + + No, an increase in file size here mainly suggests an increase in the + 'breadth' of files on offer in the repository, whether that be a wider + variety of program versions or more different packages that people are + interested in. + + So while the overall amount of downloads gives a general estimate of the + interest in the distribution, this can show a more 'distributor'-aligned + view on how many different aisles of the buffet people are eating from. + """ + ) + return + + @app.cell def _(sizes_df): ( @@ -57,6 +90,21 @@ def _(sizes_df): caption="Raw json file size, without any formatting, removal of markers, characters or newlines.", ) ) + return + + +@app.cell(hide_code=True) +def _(): + mo.md( + r""" + + As we can see, the difference over time is massive. Especially early on, + between 2019 and the start of 2021, the amount of different stuff + downloaded grew rapidly, with the pace picking up again starting 2023. + + """ + ) + return @app.cell