diff --git a/clean.py b/clean.py index 1cbecbf..3cb6707 100644 --- a/clean.py +++ b/clean.py @@ -2,33 +2,19 @@ import csv import json from datetime import datetime from pathlib import Path +from typing import Generator -def json_to_daily_pkg( - input_dir: Path, output_dir: Path, force: bool = False -) -> int | None: +def json_to_daily_pkg(input_dir: Path, output_dir: Path, force: bool = False) -> None: if not input_dir.is_dir(): raise ValueError output_dir.mkdir(exist_ok=True, parents=True) # only clean if raw files have been updated since last time - if not force: - last_raw_update: datetime = datetime(1, 1, 1) - oldest_clean: datetime = datetime.now() - for file in input_dir.glob("*.json"): - mdate = datetime.fromtimestamp(file.stat().st_mtime) - if mdate > last_raw_update: - last_raw_update = mdate - for file in output_dir.glob("*.csv"): - mdate = datetime.fromtimestamp(file.stat().st_mtime) - if mdate < oldest_clean: - oldest_clean = mdate - if oldest_clean > last_raw_update: - print( - "INFO: Skip creating cleaned data," - f" last processing {oldest_clean} newer than {last_raw_update}" - ) - return + if not force and not newer_than_last_clean( + input_dir.glob("*.json"), output_dir.glob("*.csv"), desc="daily packages" + ): + return for j in input_dir.glob("*.json"): with open(j) as fr: @@ -55,6 +41,29 @@ def json_to_daily_pkg( # print(output_file, p_date, p_name, p_count) +def newer_than_last_clean( + input_glob: Generator, output_glob: Generator, desc: str | None = None +) -> bool: + last_raw_update: datetime = datetime(1, 1, 1) + oldest_clean: datetime = datetime.now() + for file in input_glob: + mdate = datetime.fromtimestamp(file.stat().st_mtime) + if mdate > last_raw_update: + last_raw_update = mdate + for file in output_glob: + mdate = datetime.fromtimestamp(file.stat().st_mtime) + if mdate < oldest_clean: + oldest_clean = mdate + if oldest_clean > last_raw_update: + print( + "INFO: Skip creating cleaned data" + f"{f' for {desc}' if desc else ''}" + f", last processing {oldest_clean} newer than {last_raw_update}" + ) + return False + return True + + def main(input: str, output: str) -> None: json_to_daily_pkg(Path(input) / "daily", Path(output) / "daily", force=True)