chore(code): Refactor data processing into module
This commit is contained in:
parent
4012ea55f0
commit
740350eacd
4 changed files with 8 additions and 5 deletions
|
@ -43,7 +43,7 @@ cmd = "nvim"
|
||||||
[tool.poe.tasks.extract]
|
[tool.poe.tasks.extract]
|
||||||
help = "Extract the csv data from raw yaml files"
|
help = "Extract the csv data from raw yaml files"
|
||||||
shell = """
|
shell = """
|
||||||
python src/matrix.py > 02-data/processed/extracted.csv
|
python src/extract/raw_to_extracted_csv.py > 02-data/processed/extracted.csv
|
||||||
"""
|
"""
|
||||||
[tool.poe.tasks.milestone]
|
[tool.poe.tasks.milestone]
|
||||||
help = "Extract, render, commit and version a finished artifact"
|
help = "Extract, render, commit and version a finished artifact"
|
||||||
|
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
||||||
from pandas import DataFrame, read_csv
|
from pandas import DataFrame, read_csv
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import src.yml as yaml # for quarto document scripts
|
import src.extract.yml as yaml # for quarto document scripts
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
import yml as yaml # for directly running the package
|
import yml as yaml # for directly running the package
|
||||||
|
|
|
@ -8,6 +8,10 @@ try:
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
from model import validity # for directly running the package
|
from model import validity # for directly running the package
|
||||||
|
|
||||||
|
try:
|
||||||
|
import src.process.load_data as load
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
import load_data as load
|
||||||
|
|
||||||
def extract(df: DataFrame, file: Path | StringIO) -> None:
|
def extract(df: DataFrame, file: Path | StringIO) -> None:
|
||||||
(
|
(
|
||||||
|
@ -21,12 +25,11 @@ if __name__ == "__main__":
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import load_data
|
|
||||||
|
|
||||||
if len(sys.argv) == 2:
|
if len(sys.argv) == 2:
|
||||||
df = load_data.from_yml(Path(sys.argv[1]))
|
df = load.from_yml(Path(sys.argv[1]))
|
||||||
else:
|
else:
|
||||||
df = load_data.from_yml()
|
df = load.from_yml()
|
||||||
|
|
||||||
output = StringIO()
|
output = StringIO()
|
||||||
extract(df, output)
|
extract(df, output)
|
Loading…
Reference in a new issue