Add mod time checking

This commit is contained in:
Marty Oehme 2025-09-27 11:34:52 +02:00
parent 42fd0e5a24
commit 97e4d256bb
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -5,10 +5,11 @@ app = marimo.App(width="medium")
with app.setup: with app.setup:
# Initialization code that runs beimpofore all other cells # Initialization code that runs beimpofore all other cells
import re
import lets_plot as lp import lets_plot as lp
import marimo as mo import marimo as mo
import polars as pl import polars as pl
import re
@app.cell(hide_code=True) @app.cell(hide_code=True)
@ -25,28 +26,21 @@ def _():
except AttributeError: except AttributeError:
return None return None
sizes_df = ( sizes_df_raw = (
pl.read_csv("data/file_sizes.csv") pl.read_csv("data/file_sizes.csv")
.with_columns( .with_columns(
pl.col("name") pl.col("name")
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}") .str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
.str.to_date() .str.to_date()
.alias("date"), .alias("date"),
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num") pl.col("size")
.map_elements(lambda x: parse_size(x), return_dtype=pl.Float32)
.alias("size_num"),
) )
.select(["date", "size_num", "size", "modified"]) .select(["date", "size_num", "size", "modified"])
) )
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null()) sizes_df = sizes_df_raw.filter(pl.col("size_num").is_not_null())
sizes_df = sizes_df.filter(pl.col("size").is_not_null()) return sizes_df, sizes_df_raw
return
@app.cell
def _(sizes_df_null):
sizes_df_null.select(["date", "size"]).style.tab_header(
title="Missing Days",
subtitle="Days with 0B size due to missing on the popcorn server.",
)
@app.cell @app.cell
@ -54,9 +48,10 @@ def _(sizes_df):
( (
lp.ggplot(sizes_df, lp.aes(x="date", y="size")) lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
+ lp.geom_point() + lp.geom_point()
+ lp.geom_smooth(method="lm")
+ lp.labs( + lp.labs(
title="File sizes", title="Size growth",
subtitle="Size of daily popcorn files over time", subtitle="Size of daily popcorn statistics files over time",
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.", caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
) )
) )
@ -66,13 +61,59 @@ def _(sizes_df):
def _(sizes_df): def _(sizes_df):
( (
lp.ggplot(sizes_df, lp.aes(x="date", y="size")) lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
+ lp.geom_dotplot() + lp.geom_point()
+ lp.geom_smooth(method="lowess")
+ lp.labs( + lp.labs(
title="", title="",
subtitle="", subtitle="",
caption="", caption="",
) )
) )
return
@app.cell
def _():
mo.md(
r"""
## Odds and Ends
There are some missing days in the statistics.
"""
)
return
@app.cell
def _(sizes_df_raw):
sizes_df_null = sizes_df_raw.filter(pl.col("size_num").is_null())
sizes_df_null.select(["date", "size"]).style.tab_header(
title="Missing Days",
subtitle="Days with 0B size due to missing on the popcorn server.",
)
return
@app.cell
def _(sizes_df):
def _():
different_modification_date = sizes_df.with_columns(
pl.col("modified")
.str.to_datetime(format="%F %T %:z", strict=False)
.alias("modified_dt"),
).filter(pl.col("date") != pl.col("modified_dt").dt.date())
# This does not work well what are we showing?
# 'true' capture date on X but then what on Y - the
# same date for each? the difference in dt?
return (
lp.ggplot(
different_modification_date,
lp.aes("date", "modified_dt"),
)
+ lp.geom_freqpoly()
)
_()
return
if __name__ == "__main__": if __name__ == "__main__":