chore(repo): Move references to reference data dir

Begin restructuring data dir by separating out references into their own data sub-dir containing only references and bibtex files.
2024-07-16 15:59:41 +02:00 · 2024-07-16 15:59:41 +02:00 · 4f9acd0816
commit 4f9acd0816
parent 2a1fc9611f
177 changed files with 77679 additions and 40 deletions
--- a/notebooks/README.md
+++ b/notebooks/README.md
@ -0,0 +1,18 @@
+# Notebooks
+
+The notebooks in this directory undertook a variety of exploratory data analysis.
+They are possibly outdated and not guaranteed to work as the project around them continues changing.
+However, they are kept here for the moment to serve as reminders and inspiration for further data analysis.
+
+Each notebook looked into a different issue:
+
+explore.qmd is a fundamental data exploration of the extracted data-set.
+
+test-magma.qmd explores the interaction with ipynb notebooks through the neovim plugin magma-nvim.
+
+bibmanip.qmd explores the parsing and analysis of .bib bibtex files through python.
+
+main-findings.qmd is an attempt to distill the variety of findings into a short-form table format.
+
+rank_validities.qmd attempts to create a map of the validity of each study utilizing the
+    internal/external validity method used in the main paper.
--- a/notebooks/bibmanip.qmd
+++ b/notebooks/bibmanip.qmd
@ -22,7 +22,8 @@ subtitle: Addressing inequalities in the World of Work
 ```{python}
 #| echo: false
 from pathlib import Path
-data_dir=Path("../data")
+import src.globals as g
+data_dir = g.DATA_DIR

 ## standard imports
 from IPython.core.display import Markdown as md
@ -43,8 +44,9 @@ sns.set_style("whitegrid")
 import bibtexparser

 bib_string=""
-print(f"path: {data_dir.joinpath('raw/01_wos-sample_2023-11-02').absolute()}")
-for partial_bib in data_dir.joinpath("raw/01_wos-sample_2023-11-02").glob("*.bib"):
+sample_dir = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
+print(f"path: {sample_dir.absolute()}")
+for partial_bib in sample_dir.glob("*.bib"):
    with open(partial_bib) as f:
        bib_string+="\n".join(f.readlines())
 sample = bibtexparser.parse_string(bib_string)
--- a/notebooks/explore.qmd
+++ b/notebooks/explore.qmd
@ -184,23 +184,18 @@ from matplotlib import pyplot as plt
 import seaborn as sns
 from tabulate import tabulate
 import bibtexparser
+import src.globals as g

 sns.set_style("whitegrid")

-DATA_DIR=Path("./data")
-RAW_DATA=DATA_DIR.joinpath("raw")
-WORKING_DATA=DATA_DIR.joinpath("intermediate")
-PROCESSED_DATA=DATA_DIR.joinpath("processed")
-SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
-
 bib_string=""
-for partial_bib in RAW_DATA.glob("**/*.bib"):
+for partial_bib in g.REFERENCE_DATA.glob("**/*.bib"):
    with open(partial_bib) as f:
        bib_string+="\n".join(f.readlines())
 bib_sample_raw_db = bibtexparser.parse_string(bib_string)

 bib_string=""
-for partial_bib in WORKING_DATA.glob("**/*.bib"):
+for partial_bib in g.REFERENCE_DATA.joinpath("zotero-library.bib"):
    with open(partial_bib) as f:
        bib_string+="\n".join(f.readlines())
 bib_sample = bibtexparser.parse_string(bib_string)
@ -222,10 +217,10 @@ zot_df = pd.DataFrame([
 ], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi")

 # Add WB country grouping definitions (income group, world region)
-WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
+WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
 df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")

-bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
+bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}")
    .assign(
        doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
        zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
--- a/notebooks/test-magma.qmd
+++ b/notebooks/test-magma.qmd
@ -28,8 +28,10 @@ zotero:
 ```{python}
 #| echo: false
 from pathlib import Path
-DATA_DIR=Path("./data")
-BIB_PATH = DATA_DIR.joinpath("raw/01_wos-sample_2023-11-02")
+import src.globals as g
+DATA_DIR = g.DATA_DIR
+RAW_DATA = g.RAW_DATA
+BIB_PATH = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")

 ## standard imports
 from IPython.core.display import Markdown as md