Finish package stats section
This commit is contained in:
parent
707632fb7d
commit
9687eb662b
2 changed files with 255 additions and 32 deletions
|
|
@ -82,10 +82,31 @@ def plt_filesize(sizes_df):
|
|||
+ lp.geom_point()
|
||||
+ lp.geom_smooth(method="lm")
|
||||
+ lp.labs(
|
||||
title="Size growth",
|
||||
subtitle="Cumulative filesize of daily popcorn statistics over time",
|
||||
title="Report size",
|
||||
subtitle="Filesize of popcorn statistics reports each day",
|
||||
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
||||
y="filesize in kB",
|
||||
y="filesize in KB",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def plt_filesize_cumulative(sizes_df: pl.DataFrame):
|
||||
(
|
||||
lp.ggplot(
|
||||
sizes_df.with_columns(
|
||||
(pl.col("filesize").cum_sum() / 1024 / 1024).alias("filesize_cum")
|
||||
),
|
||||
lp.aes(x="date", y="filesize_cum"),
|
||||
)
|
||||
+ lp.geom_line()
|
||||
# + lp.geom_smooth(method="lm")
|
||||
+ lp.labs(
|
||||
title="Report size growth",
|
||||
subtitle="Cumulative filesize of all popcorn statistics reports up to that day",
|
||||
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
||||
y="filesize in MB",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
|
@ -293,14 +314,17 @@ def plt_unique_installs():
|
|||
)
|
||||
(
|
||||
lp.ggplot(
|
||||
df_unique_installs,
|
||||
df_unique_installs.sort("date")
|
||||
.group_by_dynamic("date", every="1w")
|
||||
.agg(pl.col("unique").mean()),
|
||||
lp.aes("date", "unique"),
|
||||
)
|
||||
+ lp.geom_line()
|
||||
+ lp.geom_smooth()
|
||||
+ lp.geom_smooth(method="loess")
|
||||
+ lp.labs(
|
||||
title="Unique daily uploads",
|
||||
caption="Daily number of unique providers for package update statistics opting in to popcorn.",
|
||||
title="Unique installations",
|
||||
subtitle="Weekly statistics upload averages",
|
||||
caption="Daily number of unique providers for package update statistics opting in to data collection.",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
|
@ -317,6 +341,7 @@ def plt_top_packages(df_pkg_lazy: pl.LazyFrame):
|
|||
[
|
||||
lp.ggplot(
|
||||
df_pkg_dl.sort("count", descending=True)
|
||||
.filter(pl.col("package") != "PopCorn")
|
||||
.head(DISPLAY_LIMIT)
|
||||
.collect(engine="streaming"),
|
||||
lp.aes("package", "count"),
|
||||
|
|
@ -346,6 +371,16 @@ def plt_top_packages(df_pkg_lazy: pl.LazyFrame):
|
|||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def tab_rarest_packages(df_pkg_dl: pl.LazyFrame):
|
||||
(
|
||||
df_pkg_dl.sort("count", descending=False)
|
||||
# this seems arbitrary but gives a better result?
|
||||
.filter(pl.col("count") == 1)
|
||||
.collect(engine="streaming")
|
||||
)
|
||||
return
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def plt_package_distribution(df_pkg_dl: pl.LazyFrame):
|
||||
def _():
|
||||
|
|
@ -360,6 +395,28 @@ def plt_package_distribution(df_pkg_dl: pl.LazyFrame):
|
|||
_()
|
||||
return
|
||||
|
||||
@app.cell
|
||||
def tab_percentiles(df_pkg_dl: pl.LazyFrame):
|
||||
def get_num(df: pl.LazyFrame) -> int:
|
||||
return df.count().collect(engine="streaming").item(0, 0)
|
||||
|
||||
one_ten_installs = df_pkg_dl.sort("count", descending=False).filter(
|
||||
(pl.col("count") >= 1) & (pl.col("count") < 10)
|
||||
)
|
||||
ten_twenty_installs = df_pkg_dl.sort("count", descending=False).filter(
|
||||
(pl.col("count") >= 10) & (pl.col("count") < 20)
|
||||
)
|
||||
twenty_thirty = df_pkg_dl.sort("count", descending=False).filter(
|
||||
(pl.col("count") >= 20) & (pl.col("count") < 30)
|
||||
)
|
||||
thirty_plus = df_pkg_dl.sort("count", descending=False).filter((pl.col("count") >= 30))
|
||||
pl.DataFrame([
|
||||
get_num(one_ten_installs),
|
||||
get_num(ten_twenty_installs),
|
||||
get_num(twenty_thirty),
|
||||
get_num(thirty_plus),
|
||||
])
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue