chore(repo): Move references to reference data dir

Begin restructuring data dir by separating out references into their own
data sub-dir containing only references and bibtex files.
This commit is contained in:
Marty Oehme 2024-07-16 15:59:41 +02:00
parent 2a1fc9611f
commit 4f9acd0816
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
177 changed files with 77679 additions and 40 deletions

18
notebooks/README.md Normal file
View file

@ -0,0 +1,18 @@
# Notebooks
The notebooks in this directory undertook a variety of exploratory data analysis.
They are possibly outdated and not guaranteed to work as the project around them continues changing.
However, they are kept here for the moment to serve as reminders and inspiration for further data analysis.
Each notebook looked into a different issue:
explore.qmd is a fundamental data exploration of the extracted data-set.
test-magma.qmd explores the interaction with ipynb notebooks through the neovim plugin magma-nvim.
bibmanip.qmd explores the parsing and analysis of .bib bibtex files through python.
main-findings.qmd is an attempt to distill the variety of findings into a short-form table format.
rank_validities.qmd attempts to create a map of the validity of each study utilizing the
internal/external validity method used in the main paper.

View file

@ -22,7 +22,8 @@ subtitle: Addressing inequalities in the World of Work
```{python}
#| echo: false
from pathlib import Path
data_dir=Path("../data")
import src.globals as g
data_dir = g.DATA_DIR
## standard imports
from IPython.core.display import Markdown as md
@ -43,8 +44,9 @@ sns.set_style("whitegrid")
import bibtexparser
bib_string=""
print(f"path: {data_dir.joinpath('raw/01_wos-sample_2023-11-02').absolute()}")
for partial_bib in data_dir.joinpath("raw/01_wos-sample_2023-11-02").glob("*.bib"):
sample_dir = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
print(f"path: {sample_dir.absolute()}")
for partial_bib in sample_dir.glob("*.bib"):
with open(partial_bib) as f:
bib_string+="\n".join(f.readlines())
sample = bibtexparser.parse_string(bib_string)

View file

@ -184,23 +184,18 @@ from matplotlib import pyplot as plt
import seaborn as sns
from tabulate import tabulate
import bibtexparser
import src.globals as g
sns.set_style("whitegrid")
DATA_DIR=Path("./data")
RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed")
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
bib_string=""
for partial_bib in RAW_DATA.glob("**/*.bib"):
for partial_bib in g.REFERENCE_DATA.glob("**/*.bib"):
with open(partial_bib) as f:
bib_string+="\n".join(f.readlines())
bib_sample_raw_db = bibtexparser.parse_string(bib_string)
bib_string=""
for partial_bib in WORKING_DATA.glob("**/*.bib"):
for partial_bib in g.REFERENCE_DATA.joinpath("zotero-library.bib"):
with open(partial_bib) as f:
bib_string+="\n".join(f.readlines())
bib_sample = bibtexparser.parse_string(bib_string)
@ -222,10 +217,10 @@ zot_df = pd.DataFrame([
], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi")
# Add WB country grouping definitions (income group, world region)
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}")
.assign(
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),

View file

@ -28,8 +28,10 @@ zotero:
```{python}
#| echo: false
from pathlib import Path
DATA_DIR=Path("./data")
BIB_PATH = DATA_DIR.joinpath("raw/01_wos-sample_2023-11-02")
import src.globals as g
DATA_DIR = g.DATA_DIR
RAW_DATA = g.RAW_DATA
BIB_PATH = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
## standard imports
from IPython.core.display import Markdown as md