chore(code): Refactor data processing into module

This commit is contained in:
Marty Oehme 2024-07-15 20:06:48 +02:00
parent 4012ea55f0
commit 740350eacd
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
4 changed files with 8 additions and 5 deletions

View file

@ -43,7 +43,7 @@ cmd = "nvim"
[tool.poe.tasks.extract]
help = "Extract the csv data from raw yaml files"
shell = """
python src/matrix.py > 02-data/processed/extracted.csv
python src/extract/raw_to_extracted_csv.py > 02-data/processed/extracted.csv
"""
[tool.poe.tasks.milestone]
help = "Extract, render, commit and version a finished artifact"

View file

@ -5,7 +5,7 @@ from pathlib import Path
from pandas import DataFrame, read_csv
try:
import src.yml as yaml # for quarto document scripts
import src.extract.yml as yaml # for quarto document scripts
except ModuleNotFoundError:
import yml as yaml # for directly running the package

View file

@ -8,6 +8,10 @@ try:
except ModuleNotFoundError:
from model import validity # for directly running the package
try:
import src.process.load_data as load
except ModuleNotFoundError:
import load_data as load
def extract(df: DataFrame, file: Path | StringIO) -> None:
(
@ -21,12 +25,11 @@ if __name__ == "__main__":
import os
import sys
import load_data
if len(sys.argv) == 2:
df = load_data.from_yml(Path(sys.argv[1]))
df = load.from_yml(Path(sys.argv[1]))
else:
df = load_data.from_yml()
df = load.from_yml()
output = StringIO()
extract(df, output)