chore(repo): Rename data loading module

This commit is contained in:
Marty Oehme 2024-01-10 17:27:48 +01:00
parent de5bc68836
commit 4dd8e13d6b
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
4 changed files with 5 additions and 5 deletions

View file

@ -41,7 +41,7 @@ cmd = "nvim"
[tool.poe.tasks.extract] [tool.poe.tasks.extract]
help = "Extract the csv data from raw yaml files" help = "Extract the csv data from raw yaml files"
shell = """ shell = """
python src/data.py > 02-data/processed/extracted.csv python src/load_data.py > 02-data/processed/extracted.csv
""" """
[tool.poe.tasks.milestone] [tool.poe.tasks.milestone]
help = "Extract, render, commit and version a finished artifact" help = "Extract, render, commit and version a finished artifact"

View file

@ -57,7 +57,7 @@ bib_sample = bibtexparser.parse_string(bib_string)
```{python} ```{python}
# load relevant studies # load relevant studies
from src import data from src import load_data
# load zotero-based metadata: citations and uses # load zotero-based metadata: citations and uses
zot_df = pd.DataFrame([ zot_df = pd.DataFrame([
@ -91,7 +91,7 @@ def countries_to_income_groups(countries:str):
return ";".join(res) return ";".join(res)
bib_df = (data.from_yml(f"{PROCESSED_DATA}") bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
.assign( .assign(
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False), doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]), zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),

View file

@ -2,9 +2,9 @@ import io
from pathlib import Path from pathlib import Path
import sys import sys
try: try:
import src.load_yaml as yaml # for quarto document scripts import src.yaml as yaml # for quarto document scripts
except ModuleNotFoundError: except ModuleNotFoundError:
import load_yaml as yaml # for directly running the package import yaml as yaml # for directly running the package
from pandas import DataFrame, read_csv from pandas import DataFrame, read_csv
DEFAULT_YAML_PATH = Path("02-data/processed") DEFAULT_YAML_PATH = Path("02-data/processed")