Parse JSON Package downloads
This commit is contained in:
parent
97e4d256bb
commit
cbda8dca55
1 changed files with 34 additions and 3 deletions
37
popcorn.py
37
popcorn.py
|
|
@ -5,7 +5,9 @@ app = marimo.App(width="medium")
|
|||
|
||||
with app.setup:
|
||||
# Initialization code that runs beimpofore all other cells
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import lets_plot as lp
|
||||
import marimo as mo
|
||||
|
|
@ -58,15 +60,31 @@ def _(sizes_df):
|
|||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df):
|
||||
def _():
|
||||
df = (
|
||||
pl.scan_ndjson("data/daily/*", include_file_paths="file")
|
||||
.head(200) # FIXME: take out after debug
|
||||
.with_columns(
|
||||
pl.col("file")
|
||||
.str.replace(r"data/daily/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
||||
.str.to_date()
|
||||
.alias("date")
|
||||
)
|
||||
.select("date", pl.col("Packages").struct.unnest())
|
||||
.fill_null(0)
|
||||
.unpivot(index="date", variable_name="package", value_name="downloads")
|
||||
.collect()
|
||||
)
|
||||
df
|
||||
return
|
||||
|
||||
|
||||
(
|
||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||
+ lp.geom_point()
|
||||
+ lp.geom_smooth(method="lowess")
|
||||
+ lp.labs(
|
||||
title="",
|
||||
subtitle="",
|
||||
caption="",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
|
@ -116,5 +134,18 @@ def _(sizes_df):
|
|||
return
|
||||
|
||||
|
||||
# further ideas:
|
||||
# - which kernels have been DL when? (simplified for semver)
|
||||
# - when did specific kernels enter the repos?
|
||||
#
|
||||
# - which arches are/were most prevalent over time?
|
||||
# - have the arches been mostly even relative to each other?
|
||||
#
|
||||
# - what does unique install mean?
|
||||
#
|
||||
# - which Packages had the most unique versions, least versions
|
||||
# - which pkg had the most download of a single version?
|
||||
# - for which pkg were the version dls the most spread out?
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue