chore(repo): Move scripts to src directory
This commit is contained in:
parent
76ff71765c
commit
a854794a43
4 changed files with 5 additions and 5 deletions
44
src/data.py
Normal file
44
src/data.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import io
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import load_yaml
|
||||
from pandas import DataFrame, read_csv
|
||||
|
||||
DEFAULT_YAML_PATH = Path("02-data/processed/relevant")
|
||||
|
||||
|
||||
def to_tsv(studies: list[dict]) -> str:
|
||||
if not studies:
|
||||
return ""
|
||||
tsv = ""
|
||||
tab = "\t"
|
||||
tsv += (
|
||||
f"{tab.join(studies[0].keys())}{tab}"
|
||||
f"{tab.join(studies[0]['observation'][0].keys())}\n"
|
||||
)
|
||||
for study in studies:
|
||||
study_list = [str(val).replace("\n", "") for val in study.values()]
|
||||
for obs in study["observation"]:
|
||||
obs_list = [str(val).replace("\n", "") for val in obs.values()]
|
||||
tsv += f"{tab.join(study_list)}{tab}{tab.join(obs_list)}\n"
|
||||
return tsv
|
||||
|
||||
|
||||
def from_yml(yml_path: Path | str = DEFAULT_YAML_PATH) -> DataFrame:
|
||||
yml_path = Path(yml_path).resolve()
|
||||
studies = load_yaml.load(yml_path)
|
||||
if not studies:
|
||||
raise ValueError(f"No studies found in directory {yml_path.resolve()}")
|
||||
tsv = to_tsv(studies)
|
||||
return read_csv(io.StringIO(tsv), sep="\t")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 2:
|
||||
res = from_yml(Path(sys.argv[1]))
|
||||
else:
|
||||
res = from_yml()
|
||||
|
||||
print(res)
|
||||
# print out tsv file instead
|
||||
# print(to_tsv(load_yaml.load(DEFAULT_YAML_PATH)))
|
||||
39
src/load_yaml.py
Normal file
39
src/load_yaml.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import sys
|
||||
from typing import cast
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _get_all_yml(path: Path) -> list:
|
||||
"""Returns list of all yml files."""
|
||||
return list(path.rglob(r"**/*.y*ml"))
|
||||
|
||||
|
||||
def _read_yml(path: Path) -> dict | None:
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
return yaml.safe_load(f)
|
||||
except FileNotFoundError as e:
|
||||
print(e)
|
||||
return None
|
||||
|
||||
|
||||
def load(yml_path: Path | str) -> list[dict]:
|
||||
"""Main data process routine.
|
||||
Extracts all necessary data from yaml files returns it.
|
||||
"""
|
||||
contents = [_read_yml(source) for source in _get_all_yml(Path(yml_path))]
|
||||
if not contents:
|
||||
return []
|
||||
contents = cast(list[dict], contents)
|
||||
for study in contents:
|
||||
del study["annotation"]
|
||||
return contents
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 2:
|
||||
res = load(Path(sys.argv[1]))
|
||||
print(res)
|
||||
else:
|
||||
print("Please provide path to yml files.")
|
||||
2513
src/pandoc-to-zotero-live.lua
Normal file
2513
src/pandoc-to-zotero-live.lua
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue