Move data cleaning scripts to clean module

This commit is contained in:
Marty Oehme 2025-10-09 16:19:52 +02:00
parent 87be49f30f
commit 3b78a84782
Signed by: Marty
GPG key ID: 4E535BC19C61886E
7 changed files with 5 additions and 5 deletions

41
code/clean/files.py Normal file
View file

@ -0,0 +1,41 @@
import csv
from pathlib import Path
def filesize_csv(input_dir: Path, output_dir: Path) -> None:
output_file = output_dir / "files.csv"
with output_file.open("w") as fw:
writer = csv.writer(fw)
writer.writerow(["date", "filename", "mtime", "filesize"])
for j in input_dir.glob("*.json"):
p_date = j.stem
p_fname = j.name
stat = j.stat()
p_mtime = stat.st_mtime
p_size = stat.st_size
writer.writerow([p_date, p_fname, p_mtime, p_size])
def ensure_dirs(input_dir: Path, output_dir: Path):
if not input_dir.is_dir():
raise ValueError
output_dir.mkdir(exist_ok=True, parents=True)
def main(input: str, output: str) -> None:
input_dir = Path(input)
output_dir = Path(output)
ensure_dirs(input_dir, output_dir)
filesize_csv(input_dir, output_dir)
if __name__ == "__main__":
import sys
if not len(sys.argv) == 3:
print("Please provide exactly one input directory and one output directory.")
sys.exit(1)
inp = sys.argv[1]
out = sys.argv[2]
main(inp, out)