From 42e706080adc0c65c8ed85296b510c665e2fc672 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 30 Sep 2025 21:39:22 +0200 Subject: [PATCH] Add code README and automatic CHANGELOG entries --- code/README.md | 15 ++++++++++++--- code/files.py | 3 --- code/kernels.py | 6 ------ justfile | 1 + 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/code/README.md b/code/README.md index 1ed9d2f..b1f9126 100644 --- a/code/README.md +++ b/code/README.md @@ -1,3 +1,12 @@ -All custom code goes into this directory. All scripts should be written such -that they can be executed from the root of the dataset, and are only using -relative paths for portability. +# Popcorn dataset code + +Each script can be run stand-alone like `python code/files.py `, +exchanging the script file for the one intended. + +It is suggested, however, to run the scripts using the `just` command runner from the +dataset root, such as `just files` for the same effect as above. +This will automatically populate the correct input and output directories. + +To create new `datalad` versioned output data, run `just versioned` or `just` without any arguments. +A new commit containing the updated data will be created, +and an automatic entry in the CHANGELOG made. diff --git a/code/files.py b/code/files.py index 8a6776b..26a8526 100644 --- a/code/files.py +++ b/code/files.py @@ -1,6 +1,4 @@ import csv -from collections.abc import Generator -from datetime import datetime from pathlib import Path @@ -29,7 +27,6 @@ def main(input: str, output: str) -> None: input_dir = Path(input) output_dir = Path(output) ensure_dirs(input_dir, output_dir) - filesize_csv(input_dir, output_dir) diff --git a/code/kernels.py b/code/kernels.py index 389f575..0c9b5fa 100644 --- a/code/kernels.py +++ b/code/kernels.py @@ -1,7 +1,5 @@ import csv import json -from collections.abc import Generator -from datetime import datetime from pathlib import Path from typing import Any, cast @@ -33,7 +31,6 @@ def package_kernel_csv(input_dir: Path, output_dir: Path) -> None: p_count = cast(int, data["XuKernel"][entry]) p_date = date writer.writerow([p_date, p_name, p_count]) - # print(output_file, p_date, p_name, p_count) def ensure_dirs(input_dir: Path, output_dir: Path): @@ -46,9 +43,6 @@ def main(input: str, output: str) -> None: input_dir = Path(input) output_dir = Path(output) ensure_dirs(input_dir, output_dir) - # json_to_daily_pkg(Path(input) / "daily", Path(output) / "daily", force=True) - # json_to_unique_csv(Path(input) / "daily", Path(output), force=True) - package_kernel_csv(input_dir, output_dir) diff --git a/justfile b/justfile index 6e0fa58..ba1d05a 100644 --- a/justfile +++ b/justfile @@ -16,3 +16,4 @@ packages: versioned: datalad run -m "Create updated output data" -i input/ -o output/ just all + printf "\n## %s\n\n- auto generated\n" $(date +%F) | sed -i '/CHANGELOG/r /dev/stdin' CHANGELOG.md