chore(repo): Rename data loading module
This commit is contained in:
parent
de5bc68836
commit
4dd8e13d6b
4 changed files with 5 additions and 5 deletions
|
@ -41,7 +41,7 @@ cmd = "nvim"
|
||||||
[tool.poe.tasks.extract]
|
[tool.poe.tasks.extract]
|
||||||
help = "Extract the csv data from raw yaml files"
|
help = "Extract the csv data from raw yaml files"
|
||||||
shell = """
|
shell = """
|
||||||
python src/data.py > 02-data/processed/extracted.csv
|
python src/load_data.py > 02-data/processed/extracted.csv
|
||||||
"""
|
"""
|
||||||
[tool.poe.tasks.milestone]
|
[tool.poe.tasks.milestone]
|
||||||
help = "Extract, render, commit and version a finished artifact"
|
help = "Extract, render, commit and version a finished artifact"
|
||||||
|
|
|
@ -57,7 +57,7 @@ bib_sample = bibtexparser.parse_string(bib_string)
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
# load relevant studies
|
# load relevant studies
|
||||||
from src import data
|
from src import load_data
|
||||||
|
|
||||||
# load zotero-based metadata: citations and uses
|
# load zotero-based metadata: citations and uses
|
||||||
zot_df = pd.DataFrame([
|
zot_df = pd.DataFrame([
|
||||||
|
@ -91,7 +91,7 @@ def countries_to_income_groups(countries:str):
|
||||||
return ";".join(res)
|
return ";".join(res)
|
||||||
|
|
||||||
|
|
||||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}")
|
bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
|
||||||
.assign(
|
.assign(
|
||||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||||
|
|
|
@ -2,9 +2,9 @@ import io
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
try:
|
try:
|
||||||
import src.load_yaml as yaml # for quarto document scripts
|
import src.yaml as yaml # for quarto document scripts
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
import load_yaml as yaml # for directly running the package
|
import yaml as yaml # for directly running the package
|
||||||
from pandas import DataFrame, read_csv
|
from pandas import DataFrame, read_csv
|
||||||
|
|
||||||
DEFAULT_YAML_PATH = Path("02-data/processed")
|
DEFAULT_YAML_PATH = Path("02-data/processed")
|
Loading…
Reference in a new issue