60 lines
1.5 KiB
Python
60 lines
1.5 KiB
Python
|
import sys
|
||
|
from typing import cast
|
||
|
import yaml
|
||
|
from pathlib import Path
|
||
|
|
||
|
DEFAULT_YAML_PATH = Path("../02-data/intermediate/relevant")
|
||
|
|
||
|
|
||
|
def _get_all_yml(path: Path) -> list:
|
||
|
"""Returns list of all yml files."""
|
||
|
return list(path.rglob(r"*.y*ml"))
|
||
|
|
||
|
|
||
|
def _read_yml(path: Path) -> dict | None:
|
||
|
try:
|
||
|
with open(path, "r") as f:
|
||
|
return yaml.safe_load(f)
|
||
|
except FileNotFoundError as e:
|
||
|
print(e)
|
||
|
return None
|
||
|
|
||
|
|
||
|
def from_yml(
|
||
|
yml_path: Path | str = DEFAULT_YAML_PATH,
|
||
|
) -> list[dict]:
|
||
|
"""Main data process routine.
|
||
|
Extracts all necessary data from yaml files returns it.
|
||
|
"""
|
||
|
contents = [_read_yml(source) for source in _get_all_yml(Path(yml_path))]
|
||
|
if not contents:
|
||
|
return []
|
||
|
contents = cast(list[dict], contents)
|
||
|
for study in contents:
|
||
|
del study["annotation"]
|
||
|
return contents
|
||
|
|
||
|
|
||
|
def to_tsv(studies: list[dict]) -> str:
|
||
|
tsv = ""
|
||
|
tab = "\t"
|
||
|
tsv += (
|
||
|
f"{tab.join(studies[0].keys())}{tab}"
|
||
|
f"{tab.join(studies[0]['observation'][0].keys())}\n"
|
||
|
)
|
||
|
for study in studies:
|
||
|
study_list = [str(val).replace("\n", "") for val in study.values()]
|
||
|
for obs in study["observation"]:
|
||
|
obs_list = [str(val).replace("\n", "") for val in obs.values()]
|
||
|
tsv += f"{tab.join(study_list)}{tab}{tab.join(obs_list)}\n"
|
||
|
return tsv
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
if len(sys.argv) == 2:
|
||
|
res = from_yml(Path(sys.argv[1]))
|
||
|
else:
|
||
|
res = from_yml()
|
||
|
|
||
|
print(to_tsv(res))
|