diff --git a/code/kernels.py b/code/kernels.py new file mode 100644 index 0000000..389f575 --- /dev/null +++ b/code/kernels.py @@ -0,0 +1,63 @@ +import csv +import json +from collections.abc import Generator +from datetime import datetime +from pathlib import Path +from typing import Any, cast + + +def package_kernel_csv(input_dir: Path, output_dir: Path) -> None: + output_file = output_dir / "kernels.csv" + with output_file.open("w") as fw: + writer = csv.writer(fw) + writer.writerow(["date", "kernel", "downloads"]) + + for j in input_dir.glob("*.json"): + with open(j) as fr: + date = j.stem + data: dict[str, Any] = {} + try: + data = cast(dict[str, object], json.load(fr)) + except json.JSONDecodeError: + print(f"WARN: Could not decode JSON data for file {j}") + continue + + if "XuKernel" not in data or not isinstance(data["XuKernel"], dict): + print( + f"WARN: No correct json structure containing 'XuKernel' field in file {j}" + ) + continue + + for entry in data["XuKernel"]: + p_name = cast(str, entry) + p_count = cast(int, data["XuKernel"][entry]) + p_date = date + writer.writerow([p_date, p_name, p_count]) + # print(output_file, p_date, p_name, p_count) + + +def ensure_dirs(input_dir: Path, output_dir: Path): + if not input_dir.is_dir(): + raise ValueError + output_dir.mkdir(exist_ok=True, parents=True) + + +def main(input: str, output: str) -> None: + input_dir = Path(input) + output_dir = Path(output) + ensure_dirs(input_dir, output_dir) + # json_to_daily_pkg(Path(input) / "daily", Path(output) / "daily", force=True) + # json_to_unique_csv(Path(input) / "daily", Path(output), force=True) + + package_kernel_csv(input_dir, output_dir) + + +if __name__ == "__main__": + import sys + + if not len(sys.argv) == 3: + print("Please provide exactly one input directory and one output directory.") + sys.exit(1) + inp = sys.argv[1] + out = sys.argv[2] + main(inp, out)