chore(repo): Move references to reference data dir
Begin restructuring data dir by separating out references into their own data sub-dir containing only references and bibtex files.
This commit is contained in:
parent
2a1fc9611f
commit
4f9acd0816
177 changed files with 77679 additions and 40 deletions
18
notebooks/README.md
Normal file
18
notebooks/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Notebooks
|
||||
|
||||
The notebooks in this directory undertook a variety of exploratory data analysis.
|
||||
They are possibly outdated and not guaranteed to work as the project around them continues changing.
|
||||
However, they are kept here for the moment to serve as reminders and inspiration for further data analysis.
|
||||
|
||||
Each notebook looked into a different issue:
|
||||
|
||||
explore.qmd is a fundamental data exploration of the extracted data-set.
|
||||
|
||||
test-magma.qmd explores the interaction with ipynb notebooks through the neovim plugin magma-nvim.
|
||||
|
||||
bibmanip.qmd explores the parsing and analysis of .bib bibtex files through python.
|
||||
|
||||
main-findings.qmd is an attempt to distill the variety of findings into a short-form table format.
|
||||
|
||||
rank_validities.qmd attempts to create a map of the validity of each study utilizing the
|
||||
internal/external validity method used in the main paper.
|
||||
|
|
@ -22,7 +22,8 @@ subtitle: Addressing inequalities in the World of Work
|
|||
```{python}
|
||||
#| echo: false
|
||||
from pathlib import Path
|
||||
data_dir=Path("../data")
|
||||
import src.globals as g
|
||||
data_dir = g.DATA_DIR
|
||||
|
||||
## standard imports
|
||||
from IPython.core.display import Markdown as md
|
||||
|
|
@ -43,8 +44,9 @@ sns.set_style("whitegrid")
|
|||
import bibtexparser
|
||||
|
||||
bib_string=""
|
||||
print(f"path: {data_dir.joinpath('raw/01_wos-sample_2023-11-02').absolute()}")
|
||||
for partial_bib in data_dir.joinpath("raw/01_wos-sample_2023-11-02").glob("*.bib"):
|
||||
sample_dir = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
|
||||
print(f"path: {sample_dir.absolute()}")
|
||||
for partial_bib in sample_dir.glob("*.bib"):
|
||||
with open(partial_bib) as f:
|
||||
bib_string+="\n".join(f.readlines())
|
||||
sample = bibtexparser.parse_string(bib_string)
|
||||
|
|
|
|||
|
|
@ -184,23 +184,18 @@ from matplotlib import pyplot as plt
|
|||
import seaborn as sns
|
||||
from tabulate import tabulate
|
||||
import bibtexparser
|
||||
import src.globals as g
|
||||
|
||||
sns.set_style("whitegrid")
|
||||
|
||||
DATA_DIR=Path("./data")
|
||||
RAW_DATA=DATA_DIR.joinpath("raw")
|
||||
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
||||
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
||||
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
||||
|
||||
bib_string=""
|
||||
for partial_bib in RAW_DATA.glob("**/*.bib"):
|
||||
for partial_bib in g.REFERENCE_DATA.glob("**/*.bib"):
|
||||
with open(partial_bib) as f:
|
||||
bib_string+="\n".join(f.readlines())
|
||||
bib_sample_raw_db = bibtexparser.parse_string(bib_string)
|
||||
|
||||
bib_string=""
|
||||
for partial_bib in WORKING_DATA.glob("**/*.bib"):
|
||||
for partial_bib in g.REFERENCE_DATA.joinpath("zotero-library.bib"):
|
||||
with open(partial_bib) as f:
|
||||
bib_string+="\n".join(f.readlines())
|
||||
bib_sample = bibtexparser.parse_string(bib_string)
|
||||
|
|
@ -222,10 +217,10 @@ zot_df = pd.DataFrame([
|
|||
], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi")
|
||||
|
||||
# Add WB country grouping definitions (income group, world region)
|
||||
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||
|
||||
bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
|
||||
bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}")
|
||||
.assign(
|
||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||
|
|
|
|||
|
|
@ -28,8 +28,10 @@ zotero:
|
|||
```{python}
|
||||
#| echo: false
|
||||
from pathlib import Path
|
||||
DATA_DIR=Path("./data")
|
||||
BIB_PATH = DATA_DIR.joinpath("raw/01_wos-sample_2023-11-02")
|
||||
import src.globals as g
|
||||
DATA_DIR = g.DATA_DIR
|
||||
RAW_DATA = g.RAW_DATA
|
||||
BIB_PATH = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
|
||||
|
||||
## standard imports
|
||||
from IPython.core.display import Markdown as md
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue