Marty Oehme
c7533e01d6
Ingests all yaml files in the directory, and loads the data for each study. Currently creates a dataframe for each *observation* in the pool (can be multiple per study, if a study has multiple analyses for different independent/dependent vars). This is to follow the tidy data paradigm of one observation per row.
22 lines
499 B
Python
22 lines
499 B
Python
import io
|
|
from pathlib import Path
|
|
import sys
|
|
import load_yaml
|
|
from pandas import DataFrame, read_csv
|
|
|
|
DEFAULT_YAML_PATH = Path("../02-data/intermediate/relevant")
|
|
|
|
|
|
def load(yml_path: Path | str = DEFAULT_YAML_PATH) -> DataFrame:
|
|
studies = load_yaml.from_yml(yml_path)
|
|
tsv = load_yaml.to_tsv(studies)
|
|
return read_csv(io.StringIO(tsv), sep="\t")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) == 2:
|
|
res = load(Path(sys.argv[1]))
|
|
else:
|
|
res = load()
|
|
|
|
print(res)
|