70 lines
2.3 KiB
Python
70 lines
2.3 KiB
Python
import csv
|
|
import json
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
|
|
def json_to_daily_pkg(
|
|
input_dir: Path, output_dir: Path, force: bool = False
|
|
) -> int | None:
|
|
if not input_dir.is_dir():
|
|
raise ValueError
|
|
output_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
# only clean if raw files have been updated since last time
|
|
if not force:
|
|
last_raw_update: datetime = datetime(1, 1, 1)
|
|
oldest_clean: datetime = datetime.now()
|
|
for file in input_dir.glob("*.json"):
|
|
mdate = datetime.fromtimestamp(file.stat().st_mtime)
|
|
if mdate > last_raw_update:
|
|
last_raw_update = mdate
|
|
for file in output_dir.glob("*.csv"):
|
|
mdate = datetime.fromtimestamp(file.stat().st_mtime)
|
|
if mdate < oldest_clean:
|
|
oldest_clean = mdate
|
|
if oldest_clean > last_raw_update:
|
|
print(
|
|
"INFO: Skip creating cleaned data,"
|
|
f" last processing {oldest_clean} newer than {last_raw_update}"
|
|
)
|
|
return
|
|
|
|
for j in input_dir.glob("*.json"):
|
|
with open(j) as fr:
|
|
date = j.stem
|
|
output_file = output_dir / f"{date}.csv"
|
|
try:
|
|
data = json.load(fr)
|
|
except json.JSONDecodeError:
|
|
print(f"WARN: Could not decode JSON data for file {j}")
|
|
|
|
if "Packages" not in data:
|
|
print(
|
|
f"WARN: No correct json structure containing 'Packages' field in file {j}"
|
|
)
|
|
|
|
with open(output_file, "w") as fw:
|
|
writer = csv.writer(fw)
|
|
writer.writerow(["date", "package", "downloads"])
|
|
for entry in data["Packages"]:
|
|
p_name = entry
|
|
p_count = data["Packages"][entry]
|
|
p_date = date
|
|
writer.writerow([p_date, p_name, p_count])
|
|
# print(output_file, p_date, p_name, p_count)
|
|
|
|
|
|
def main(input: str, output: str) -> None:
|
|
json_to_daily_pkg(Path(input) / "daily", Path(output) / "daily", force=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if not len(sys.argv) == 3:
|
|
print("Please provide exactly one input directory and one output directory.")
|
|
sys.exit(1)
|
|
inp = sys.argv[1]
|
|
out = sys.argv[2]
|
|
main(inp, out)
|