Add mod time checking
This commit is contained in:
parent
42fd0e5a24
commit
97e4d256bb
1 changed files with 58 additions and 17 deletions
75
popcorn.py
75
popcorn.py
|
|
@ -5,10 +5,11 @@ app = marimo.App(width="medium")
|
||||||
|
|
||||||
with app.setup:
|
with app.setup:
|
||||||
# Initialization code that runs beimpofore all other cells
|
# Initialization code that runs beimpofore all other cells
|
||||||
|
import re
|
||||||
|
|
||||||
import lets_plot as lp
|
import lets_plot as lp
|
||||||
import marimo as mo
|
import marimo as mo
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
|
|
@ -25,28 +26,21 @@ def _():
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
sizes_df = (
|
sizes_df_raw = (
|
||||||
pl.read_csv("data/file_sizes.csv")
|
pl.read_csv("data/file_sizes.csv")
|
||||||
.with_columns(
|
.with_columns(
|
||||||
pl.col("name")
|
pl.col("name")
|
||||||
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
||||||
.str.to_date()
|
.str.to_date()
|
||||||
.alias("date"),
|
.alias("date"),
|
||||||
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num")
|
pl.col("size")
|
||||||
|
.map_elements(lambda x: parse_size(x), return_dtype=pl.Float32)
|
||||||
|
.alias("size_num"),
|
||||||
)
|
)
|
||||||
.select(["date", "size_num", "size", "modified"])
|
.select(["date", "size_num", "size", "modified"])
|
||||||
)
|
)
|
||||||
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null())
|
sizes_df = sizes_df_raw.filter(pl.col("size_num").is_not_null())
|
||||||
sizes_df = sizes_df.filter(pl.col("size").is_not_null())
|
return sizes_df, sizes_df_raw
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
|
||||||
def _(sizes_df_null):
|
|
||||||
sizes_df_null.select(["date", "size"]).style.tab_header(
|
|
||||||
title="Missing Days",
|
|
||||||
subtitle="Days with 0B size due to missing on the popcorn server.",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
|
|
@ -54,9 +48,10 @@ def _(sizes_df):
|
||||||
(
|
(
|
||||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||||
+ lp.geom_point()
|
+ lp.geom_point()
|
||||||
|
+ lp.geom_smooth(method="lm")
|
||||||
+ lp.labs(
|
+ lp.labs(
|
||||||
title="File sizes",
|
title="Size growth",
|
||||||
subtitle="Size of daily popcorn files over time",
|
subtitle="Size of daily popcorn statistics files over time",
|
||||||
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -66,13 +61,59 @@ def _(sizes_df):
|
||||||
def _(sizes_df):
|
def _(sizes_df):
|
||||||
(
|
(
|
||||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||||
+ lp.geom_dotplot()
|
+ lp.geom_point()
|
||||||
|
+ lp.geom_smooth(method="lowess")
|
||||||
+ lp.labs(
|
+ lp.labs(
|
||||||
title="",
|
title="",
|
||||||
subtitle="",
|
subtitle="",
|
||||||
caption="",
|
caption="",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
mo.md(
|
||||||
|
r"""
|
||||||
|
## Odds and Ends
|
||||||
|
There are some missing days in the statistics.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(sizes_df_raw):
|
||||||
|
sizes_df_null = sizes_df_raw.filter(pl.col("size_num").is_null())
|
||||||
|
sizes_df_null.select(["date", "size"]).style.tab_header(
|
||||||
|
title="Missing Days",
|
||||||
|
subtitle="Days with 0B size due to missing on the popcorn server.",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(sizes_df):
|
||||||
|
def _():
|
||||||
|
different_modification_date = sizes_df.with_columns(
|
||||||
|
pl.col("modified")
|
||||||
|
.str.to_datetime(format="%F %T %:z", strict=False)
|
||||||
|
.alias("modified_dt"),
|
||||||
|
).filter(pl.col("date") != pl.col("modified_dt").dt.date())
|
||||||
|
# This does not work well what are we showing?
|
||||||
|
# 'true' capture date on X but then what on Y - the
|
||||||
|
# same date for each? the difference in dt?
|
||||||
|
return (
|
||||||
|
lp.ggplot(
|
||||||
|
different_modification_date,
|
||||||
|
lp.aes("date", "modified_dt"),
|
||||||
|
)
|
||||||
|
+ lp.geom_freqpoly()
|
||||||
|
)
|
||||||
|
|
||||||
|
_()
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue