Compare commits

..

No commits in common. "2b7bb5c4e7cee59f4768c2364f09be27024e07c7" and "8f2d361e5fa65befb78750683fac81c1f20f71ee" have entirely different histories.

315 changed files with 960 additions and 7887 deletions

14
.gitignore vendored
View file

@ -1,14 +1,6 @@
/documentation/DB /03-documentation/DB
/documentation/library /03-documentation/library
/output /04-outputs
/manuscript/*_files/
# Elsevier journal target:
# Ignore bibliography style, spl (for natbib?) and latex class
# since they don't get deleted after compilation
*.spl
*.bst
*.cls
# Created by https://www.toptal.com/developers/gitignore/api/-f,python,linux,vim,quarto,markdown,jupyternotebooks # Created by https://www.toptal.com/developers/gitignore/api/-f,python,linux,vim,quarto,markdown,jupyternotebooks
# Edit at https://www.toptal.com/developers/gitignore?templates=-f,python,linux,vim,quarto,markdown,jupyternotebooks # Edit at https://www.toptal.com/developers/gitignore?templates=-f,python,linux,vim,quarto,markdown,jupyternotebooks

View file

@ -1,5 +1,5 @@
--- ---
bibliography: ../data/intermediate/zotero-library.bib bibliography: ../02-data/intermediate/zotero-library.bib
csl: /home/marty/documents/library/utilities/styles/APA-7.csl csl: /home/marty/documents/library/utilities/styles/APA-7.csl
papersize: A4 papersize: A4
linestretch: 1.5 linestretch: 1.5
@ -22,8 +22,7 @@ subtitle: Addressing inequalities in the World of Work
```{python} ```{python}
#| echo: false #| echo: false
from pathlib import Path from pathlib import Path
import src.globals as g data_dir=Path("../02-data")
data_dir = g.DATA_DIR
## standard imports ## standard imports
from IPython.core.display import Markdown as md from IPython.core.display import Markdown as md
@ -44,9 +43,8 @@ sns.set_style("whitegrid")
import bibtexparser import bibtexparser
bib_string="" bib_string=""
sample_dir = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02") print(f"path: {data_dir.joinpath('raw/01_wos-sample_2023-11-02').absolute()}")
print(f"path: {sample_dir.absolute()}") for partial_bib in data_dir.joinpath("raw/01_wos-sample_2023-11-02").glob("*.bib"):
for partial_bib in sample_dir.glob("*.bib"):
with open(partial_bib) as f: with open(partial_bib) as f:
bib_string+="\n".join(f.readlines()) bib_string+="\n".join(f.readlines())
sample = bibtexparser.parse_string(bib_string) sample = bibtexparser.parse_string(bib_string)

View file

@ -1,5 +1,5 @@
--- ---
bibliography: data/intermediate/zotero-library.bib bibliography: 02-data/intermediate/zotero-library.bib
title: Grab yml title: Grab yml
--- ---
@ -184,18 +184,23 @@ from matplotlib import pyplot as plt
import seaborn as sns import seaborn as sns
from tabulate import tabulate from tabulate import tabulate
import bibtexparser import bibtexparser
import src.globals as g
sns.set_style("whitegrid") sns.set_style("whitegrid")
DATA_DIR=Path("./02-data")
RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed")
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
bib_string="" bib_string=""
for partial_bib in g.REFERENCE_DATA.glob("**/*.bib"): for partial_bib in RAW_DATA.glob("**/*.bib"):
with open(partial_bib) as f: with open(partial_bib) as f:
bib_string+="\n".join(f.readlines()) bib_string+="\n".join(f.readlines())
bib_sample_raw_db = bibtexparser.parse_string(bib_string) bib_sample_raw_db = bibtexparser.parse_string(bib_string)
bib_string="" bib_string=""
for partial_bib in g.REFERENCE_DATA.joinpath("zotero-library.bib"): for partial_bib in WORKING_DATA.glob("**/*.bib"):
with open(partial_bib) as f: with open(partial_bib) as f:
bib_string+="\n".join(f.readlines()) bib_string+="\n".join(f.readlines())
bib_sample = bibtexparser.parse_string(bib_string) bib_sample = bibtexparser.parse_string(bib_string)
@ -217,10 +222,10 @@ zot_df = pd.DataFrame([
], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi") ], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi")
# Add WB country grouping definitions (income group, world region) # Add WB country grouping definitions (income group, world region)
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy") df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}") bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
.assign( .assign(
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False), doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]), zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),

View file

@ -15,7 +15,7 @@ import bibtexparser
sns.set_style("whitegrid") sns.set_style("whitegrid")
DATA_DIR=Path("./data") DATA_DIR=Path("./02-data")
RAW_DATA=DATA_DIR.joinpath("raw") RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate") WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed") PROCESSED_DATA=DATA_DIR.joinpath("processed")
@ -120,7 +120,7 @@ datavis:
```{python} ```{python}
findings_institutional = pd.read_csv("data/supplementary/findings-institutional.csv") findings_institutional = pd.read_csv("02-data/supplementary/findings-institutional.csv")
findings_institutional findings_institutional
from src.model import validity from src.model import validity
import math import math

View file

@ -16,7 +16,7 @@ import bibtexparser
sns.set_style("whitegrid") sns.set_style("whitegrid")
DATA_DIR=Path("./data") DATA_DIR=Path("./02-data")
RAW_DATA=DATA_DIR.joinpath("raw") RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate") WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed") PROCESSED_DATA=DATA_DIR.joinpath("processed")

View file

@ -1,5 +1,5 @@
--- ---
bibliography: data/intermediate/zotero-library.bib bibliography: 02-data/intermediate/zotero-library.bib
csl: /home/marty/documents/library/utilities/styles/APA-7.csl csl: /home/marty/documents/library/utilities/styles/APA-7.csl
papersize: A4 papersize: A4
linestretch: 1.5 linestretch: 1.5
@ -28,10 +28,8 @@ zotero:
```{python} ```{python}
#| echo: false #| echo: false
from pathlib import Path from pathlib import Path
import src.globals as g DATA_DIR=Path("./02-data")
DATA_DIR = g.DATA_DIR BIB_PATH = DATA_DIR.joinpath("raw/01_wos-sample_2023-11-02")
RAW_DATA = g.RAW_DATA
BIB_PATH = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
## standard imports ## standard imports
from IPython.core.display import Markdown as md from IPython.core.display import Markdown as md

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show more