chore(notebooks): Update exploration nb
This commit is contained in:
parent
3deb30b5b3
commit
3bb3602f90
1 changed files with 53 additions and 2 deletions
|
@ -3,6 +3,8 @@ bibliography: 02-data/supplementary/lib.bib
|
||||||
title: Grab yml
|
title: Grab yml
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Separate data acquisition
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from src import data
|
from src import data
|
||||||
|
@ -166,6 +168,8 @@ gender_df = temp_df.loc[temp_df["Inequality"] == "gender"]
|
||||||
income_df = temp_df.loc[temp_df["Inequality"] == "income"]
|
income_df = temp_df.loc[temp_df["Inequality"] == "income"]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Complete data replication from scoping
|
||||||
|
|
||||||
prep full data set:
|
prep full data set:
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
|
@ -221,7 +225,7 @@ zot_df = pd.DataFrame([
|
||||||
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||||
|
|
||||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
bib_df = (data.from_yml(f"{PROCESSED_DATA}")
|
||||||
.assign(
|
.assign(
|
||||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||||
|
@ -239,7 +243,7 @@ df_country_groups = None
|
||||||
```
|
```
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['World'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy")
|
df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['Europe & Central Asia;South Asia;North America;East Asia & Pacific;Sub-Saharan Africa;Europe & Central Asia;Latin America & Caribbean'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy")
|
||||||
|
|
||||||
def countries_to_regions(countries:str):
|
def countries_to_regions(countries:str):
|
||||||
res = set()
|
res = set()
|
||||||
|
@ -251,6 +255,7 @@ def countries_to_regions(countries:str):
|
||||||
|
|
||||||
# countries_to_regions("India; Nicaragua")
|
# countries_to_regions("India; Nicaragua")
|
||||||
bib_df['region'] = bib_df['country'].map(countries_to_regions)
|
bib_df['region'] = bib_df['country'].map(countries_to_regions)
|
||||||
|
bib_df['region'].value_counts().plot.bar()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
|
@ -269,3 +274,49 @@ plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
||||||
rotation_mode="anchor")
|
rotation_mode="anchor")
|
||||||
plt.show()
|
plt.show()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
df_inequality = (bib_df[["region", "intervention", "inequality"]]
|
||||||
|
.assign(
|
||||||
|
Intervention = lambda _df: (_df["intervention"]
|
||||||
|
.str.replace(r"\(.+\)", "", regex=True)
|
||||||
|
.str.replace(r" ?; ?", ";", regex=True)
|
||||||
|
.str.strip()
|
||||||
|
.str.split(";")
|
||||||
|
),
|
||||||
|
Inequality = lambda _df: (_df["inequality"]
|
||||||
|
.str.replace(r"\(.+\)", "", regex=True)
|
||||||
|
.str.replace(r" ?; ?", ";", regex=True)
|
||||||
|
.str.strip()
|
||||||
|
.str.split(";")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.explode("Intervention")
|
||||||
|
.explode("Inequality")
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
def crosstab_inequality(df, inequality:str, **kwargs):
|
||||||
|
df_temp = df.loc[(df["Inequality"] == inequality) | (df["Inequality"] == "income")]
|
||||||
|
tab = pd.crosstab(df_temp["Intervention"], df_temp["Inequality"], **kwargs)
|
||||||
|
return tab.drop(tab[tab[inequality] == 0].index)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Gender inequality
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
#| label: tbl-gender-crosstab
|
||||||
|
#| tbl-cap: Interventions targeting gender inequality
|
||||||
|
|
||||||
|
crosstab_inequality(df_inequality, "gender", normalize=False).sort_values("gender", ascending=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
def region_vis_inequality(df, inequality:str):
|
||||||
|
df_temp = df.loc[(df["Inequality"] == inequality)]
|
||||||
|
return sns.countplot(df_temp, x="region", order=df_temp["region"].value_counts().index)
|
||||||
|
region_vis_inequality(df_inequality, "spatial")
|
||||||
|
```
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue