Compare commits

..

No commits in common. "main" and "0.5.1" have entirely different histories.
main ... 0.5.1

316 changed files with 2167 additions and 9937 deletions
.gitignore
00-notebooks
02-data

14
.gitignore vendored
View file

@ -1,14 +1,6 @@
/documentation/DB /03-documentation/DB
/documentation/library /03-documentation/library
/output /04-outputs
/manuscript/*_files/
# Elsevier journal target:
# Ignore bibliography style, spl (for natbib?) and latex class
# since they don't get deleted after compilation
*.spl
*.bst
*.cls
# Created by https://www.toptal.com/developers/gitignore/api/-f,python,linux,vim,quarto,markdown,jupyternotebooks # Created by https://www.toptal.com/developers/gitignore/api/-f,python,linux,vim,quarto,markdown,jupyternotebooks
# Edit at https://www.toptal.com/developers/gitignore?templates=-f,python,linux,vim,quarto,markdown,jupyternotebooks # Edit at https://www.toptal.com/developers/gitignore?templates=-f,python,linux,vim,quarto,markdown,jupyternotebooks

View file

@ -1,5 +1,5 @@
--- ---
bibliography: ../data/intermediate/zotero-library.bib bibliography: ../02-data/intermediate/zotero-library.bib
csl: /home/marty/documents/library/utilities/styles/APA-7.csl csl: /home/marty/documents/library/utilities/styles/APA-7.csl
papersize: A4 papersize: A4
linestretch: 1.5 linestretch: 1.5
@ -22,8 +22,7 @@ subtitle: Addressing inequalities in the World of Work
```{python} ```{python}
#| echo: false #| echo: false
from pathlib import Path from pathlib import Path
import src.globals as g data_dir=Path("../02-data")
data_dir = g.DATA_DIR
## standard imports ## standard imports
from IPython.core.display import Markdown as md from IPython.core.display import Markdown as md
@ -44,9 +43,8 @@ sns.set_style("whitegrid")
import bibtexparser import bibtexparser
bib_string="" bib_string=""
sample_dir = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02") print(f"path: {data_dir.joinpath('raw/01_wos-sample_2023-11-02').absolute()}")
print(f"path: {sample_dir.absolute()}") for partial_bib in data_dir.joinpath("raw/01_wos-sample_2023-11-02").glob("*.bib"):
for partial_bib in sample_dir.glob("*.bib"):
with open(partial_bib) as f: with open(partial_bib) as f:
bib_string+="\n".join(f.readlines()) bib_string+="\n".join(f.readlines())
sample = bibtexparser.parse_string(bib_string) sample = bibtexparser.parse_string(bib_string)

View file

@ -1,5 +1,5 @@
--- ---
bibliography: data/intermediate/zotero-library.bib bibliography: 02-data/intermediate/zotero-library.bib
title: Grab yml title: Grab yml
--- ---
@ -184,18 +184,23 @@ from matplotlib import pyplot as plt
import seaborn as sns import seaborn as sns
from tabulate import tabulate from tabulate import tabulate
import bibtexparser import bibtexparser
import src.globals as g
sns.set_style("whitegrid") sns.set_style("whitegrid")
DATA_DIR=Path("./02-data")
RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed")
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
bib_string="" bib_string=""
for partial_bib in g.REFERENCE_DATA.glob("**/*.bib"): for partial_bib in RAW_DATA.glob("**/*.bib"):
with open(partial_bib) as f: with open(partial_bib) as f:
bib_string+="\n".join(f.readlines()) bib_string+="\n".join(f.readlines())
bib_sample_raw_db = bibtexparser.parse_string(bib_string) bib_sample_raw_db = bibtexparser.parse_string(bib_string)
bib_string="" bib_string=""
for partial_bib in g.REFERENCE_DATA.joinpath("zotero-library.bib"): for partial_bib in WORKING_DATA.glob("**/*.bib"):
with open(partial_bib) as f: with open(partial_bib) as f:
bib_string+="\n".join(f.readlines()) bib_string+="\n".join(f.readlines())
bib_sample = bibtexparser.parse_string(bib_string) bib_sample = bibtexparser.parse_string(bib_string)
@ -217,10 +222,10 @@ zot_df = pd.DataFrame([
], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi") ], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi")
# Add WB country grouping definitions (income group, world region) # Add WB country grouping definitions (income group, world region)
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy") df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}") bib_df = (load_data.from_yml(f"{PROCESSED_DATA}")
.assign( .assign(
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False), doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]), zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),

View file

@ -15,7 +15,7 @@ import bibtexparser
sns.set_style("whitegrid") sns.set_style("whitegrid")
DATA_DIR=Path("./data") DATA_DIR=Path("./02-data")
RAW_DATA=DATA_DIR.joinpath("raw") RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate") WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed") PROCESSED_DATA=DATA_DIR.joinpath("processed")
@ -120,7 +120,7 @@ datavis:
```{python} ```{python}
findings_institutional = pd.read_csv("data/supplementary/findings-institutional.csv") findings_institutional = pd.read_csv("02-data/supplementary/findings-institutional.csv")
findings_institutional findings_institutional
from src.model import validity from src.model import validity
import math import math

View file

@ -16,7 +16,7 @@ import bibtexparser
sns.set_style("whitegrid") sns.set_style("whitegrid")
DATA_DIR=Path("./data") DATA_DIR=Path("./02-data")
RAW_DATA=DATA_DIR.joinpath("raw") RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate") WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed") PROCESSED_DATA=DATA_DIR.joinpath("processed")

View file

@ -1,5 +1,5 @@
--- ---
bibliography: data/intermediate/zotero-library.bib bibliography: 02-data/intermediate/zotero-library.bib
csl: /home/marty/documents/library/utilities/styles/APA-7.csl csl: /home/marty/documents/library/utilities/styles/APA-7.csl
papersize: A4 papersize: A4
linestretch: 1.5 linestretch: 1.5
@ -28,10 +28,8 @@ zotero:
```{python} ```{python}
#| echo: false #| echo: false
from pathlib import Path from pathlib import Path
import src.globals as g DATA_DIR=Path("./02-data")
DATA_DIR = g.DATA_DIR BIB_PATH = DATA_DIR.joinpath("raw/01_wos-sample_2023-11-02")
RAW_DATA = g.RAW_DATA
BIB_PATH = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
## standard imports ## standard imports
from IPython.core.display import Markdown as md from IPython.core.display import Markdown as md

File diff suppressed because one or more lines are too long

View file

@ -15,7 +15,7 @@ group: working mothers
data: national administrative Social Security Records (1975-2008) data: national administrative Social Security Records (1975-2008)
design: quasi-experimental design: quasi-experimental
method: difference-in-difference analysis; regression discontinuity method: difference-in-difference analysis
sample: 13000 sample: 13000
unit: individual unit: individual
representativeness: national, census representativeness: national, census

Some files were not shown because too many files have changed in this diff Show more