Add mod time checking
This commit is contained in:
parent
42fd0e5a24
commit
97e4d256bb
1 changed files with 58 additions and 17 deletions
75
popcorn.py
75
popcorn.py
|
|
@ -5,10 +5,11 @@ app = marimo.App(width="medium")
|
|||
|
||||
with app.setup:
|
||||
# Initialization code that runs beimpofore all other cells
|
||||
import re
|
||||
|
||||
import lets_plot as lp
|
||||
import marimo as mo
|
||||
import polars as pl
|
||||
import re
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
|
|
@ -25,28 +26,21 @@ def _():
|
|||
except AttributeError:
|
||||
return None
|
||||
|
||||
sizes_df = (
|
||||
sizes_df_raw = (
|
||||
pl.read_csv("data/file_sizes.csv")
|
||||
.with_columns(
|
||||
pl.col("name")
|
||||
.str.replace(r"data/(\d{4}-\d{2}-\d{2}).json", "${1}")
|
||||
.str.to_date()
|
||||
.alias("date"),
|
||||
pl.col("size").map_elements(lambda x: parse_size(x), return_dtype=pl.Float32).alias("size_num")
|
||||
pl.col("size")
|
||||
.map_elements(lambda x: parse_size(x), return_dtype=pl.Float32)
|
||||
.alias("size_num"),
|
||||
)
|
||||
.select(["date", "size_num", "size", "modified"])
|
||||
)
|
||||
sizes_df_null = sizes_df.filter(pl.col("size_num").is_null())
|
||||
sizes_df = sizes_df.filter(pl.col("size").is_not_null())
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df_null):
|
||||
sizes_df_null.select(["date", "size"]).style.tab_header(
|
||||
title="Missing Days",
|
||||
subtitle="Days with 0B size due to missing on the popcorn server.",
|
||||
)
|
||||
sizes_df = sizes_df_raw.filter(pl.col("size_num").is_not_null())
|
||||
return sizes_df, sizes_df_raw
|
||||
|
||||
|
||||
@app.cell
|
||||
|
|
@ -54,9 +48,10 @@ def _(sizes_df):
|
|||
(
|
||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||
+ lp.geom_point()
|
||||
+ lp.geom_smooth(method="lm")
|
||||
+ lp.labs(
|
||||
title="File sizes",
|
||||
subtitle="Size of daily popcorn files over time",
|
||||
title="Size growth",
|
||||
subtitle="Size of daily popcorn statistics files over time",
|
||||
caption="Raw json file size, without any formatting, removal of markers, characters or newlines.",
|
||||
)
|
||||
)
|
||||
|
|
@ -66,13 +61,59 @@ def _(sizes_df):
|
|||
def _(sizes_df):
|
||||
(
|
||||
lp.ggplot(sizes_df, lp.aes(x="date", y="size"))
|
||||
+ lp.geom_dotplot()
|
||||
+ lp.geom_point()
|
||||
+ lp.geom_smooth(method="lowess")
|
||||
+ lp.labs(
|
||||
title="",
|
||||
subtitle="",
|
||||
caption="",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(
|
||||
r"""
|
||||
## Odds and Ends
|
||||
There are some missing days in the statistics.
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df_raw):
|
||||
sizes_df_null = sizes_df_raw.filter(pl.col("size_num").is_null())
|
||||
sizes_df_null.select(["date", "size"]).style.tab_header(
|
||||
title="Missing Days",
|
||||
subtitle="Days with 0B size due to missing on the popcorn server.",
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(sizes_df):
|
||||
def _():
|
||||
different_modification_date = sizes_df.with_columns(
|
||||
pl.col("modified")
|
||||
.str.to_datetime(format="%F %T %:z", strict=False)
|
||||
.alias("modified_dt"),
|
||||
).filter(pl.col("date") != pl.col("modified_dt").dt.date())
|
||||
# This does not work well what are we showing?
|
||||
# 'true' capture date on X but then what on Y - the
|
||||
# same date for each? the difference in dt?
|
||||
return (
|
||||
lp.ggplot(
|
||||
different_modification_date,
|
||||
lp.aes("date", "modified_dt"),
|
||||
)
|
||||
+ lp.geom_freqpoly()
|
||||
)
|
||||
|
||||
_()
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue