import csv import json from pathlib import Path from typing import cast def packages_csv(input_dir: Path, output_dir: Path) -> None: output_file = output_dir / "packages.csv" with output_file.open("w") as fw: writer = csv.writer(fw) writer.writerow(["date", "package", "version", "count"]) for j in input_dir.glob("*.json"): with open(j) as fr: date = j.stem data: dict[str, object] = {} try: data = json.load(fr) except json.JSONDecodeError: print(f"WARN: Could not decode JSON data for file {j}") continue if "Versions" not in data or not isinstance(data["Versions"], dict): print( f"WARN: No correct json structure containing 'Versions' field in file {j}" ) continue data_versions = cast(dict[str, dict[str, int]], data["Versions"]) for package_name, package_versions in data_versions.items(): if not isinstance(package_versions, dict): print( f"WARN: No correct json version structure containing versions in the Version field in file {j}" ) continue for version, count in package_versions.items(): p_name = package_name p_version = version v_count = count p_date = date writer.writerow([p_date, p_name, p_version, v_count]) def ensure_dirs(input_dir: Path, output_dir: Path): if not input_dir.is_dir(): raise ValueError output_dir.mkdir(exist_ok=True, parents=True) def main(input: str, output: str) -> None: input_dir = Path(input) output_dir = Path(output) ensure_dirs(input_dir, output_dir) packages_csv(input_dir, output_dir) if __name__ == "__main__": import sys if not len(sys.argv) == 3: print("Please provide exactly one input directory and one output directory.") sys.exit(1) inp = sys.argv[1] out = sys.argv[2] main(inp, out)