Validate CSV output schemas

Also moved code dir to src.
There are reasons to do standard things in standard ways. While it is
possible to get the `code/` directory to work, and recognize it as a
package path, this requires wrangling the pyproject.toml file.
Additionally, any import from the `code.something` path automatically
shadows the python stdlib `code` module. While it may not be necessary,
it still is good to not shadow standard library modules.
This commit is contained in:
Marty Oehme 2025-09-30 22:14:30 +02:00
parent de96b67fac
commit 2faeda87c3
Signed by: Marty
GPG key ID: 4E535BC19C61886E
14 changed files with 111 additions and 7 deletions

View file

@ -1,56 +0,0 @@
import csv
import json
from pathlib import Path
def unique_install_csv(input_dir: Path, output_dir: Path) -> None:
output_file = output_dir / "unique_installs.csv"
with open(output_file, "w") as fw:
writer = csv.writer(fw)
writer.writerow(["date", "unique"])
for j in input_dir.glob("*.json"):
with open(j) as fr:
date = j.stem
data: dict[str, object] = {}
try:
data = json.load(fr)
except json.JSONDecodeError:
print(f"WARN: Could not decode JSON data for file {j}")
continue
if "UniqueInstalls" not in data or not isinstance(
data["UniqueInstalls"], int
):
print(
f"WARN: No correct json structure containing 'UniqueInstalls' field in file {j}"
)
continue
p_date = date
p_count = data["UniqueInstalls"]
writer.writerow([p_date, p_count])
def ensure_dirs(input_dir: Path, output_dir: Path):
if not input_dir.is_dir():
raise ValueError
output_dir.mkdir(exist_ok=True, parents=True)
def main(input: str, output: str) -> None:
input_dir = Path(input)
output_dir = Path(output)
ensure_dirs(input_dir, output_dir)
unique_install_csv(input_dir, output_dir)
if __name__ == "__main__":
import sys
if not len(sys.argv) == 3:
print("Please provide exactly one input directory and one output directory.")
sys.exit(1)
inp = sys.argv[1]
out = sys.argv[2]
main(inp, out)