chore(notebooks): Update exploration nb
This commit is contained in:
parent
3deb30b5b3
commit
3bb3602f90
1 changed files with 53 additions and 2 deletions
|
@ -3,6 +3,8 @@ bibliography: 02-data/supplementary/lib.bib
|
|||
title: Grab yml
|
||||
---
|
||||
|
||||
## Separate data acquisition
|
||||
|
||||
```{python}
|
||||
import pandas as pd
|
||||
from src import data
|
||||
|
@ -166,6 +168,8 @@ gender_df = temp_df.loc[temp_df["Inequality"] == "gender"]
|
|||
income_df = temp_df.loc[temp_df["Inequality"] == "income"]
|
||||
```
|
||||
|
||||
## Complete data replication from scoping
|
||||
|
||||
prep full data set:
|
||||
|
||||
```{python}
|
||||
|
@ -221,7 +225,7 @@ zot_df = pd.DataFrame([
|
|||
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||
|
||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}")
|
||||
.assign(
|
||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||
|
@ -239,7 +243,7 @@ df_country_groups = None
|
|||
```
|
||||
|
||||
```{python}
|
||||
df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['World'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy")
|
||||
df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['Europe & Central Asia;South Asia;North America;East Asia & Pacific;Sub-Saharan Africa;Europe & Central Asia;Latin America & Caribbean'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy")
|
||||
|
||||
def countries_to_regions(countries:str):
|
||||
res = set()
|
||||
|
@ -251,6 +255,7 @@ def countries_to_regions(countries:str):
|
|||
|
||||
# countries_to_regions("India; Nicaragua")
|
||||
bib_df['region'] = bib_df['country'].map(countries_to_regions)
|
||||
bib_df['region'].value_counts().plot.bar()
|
||||
```
|
||||
|
||||
```{python}
|
||||
|
@ -269,3 +274,49 @@ plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
|||
rotation_mode="anchor")
|
||||
plt.show()
|
||||
```
|
||||
|
||||
```{python}
|
||||
df_inequality = (bib_df[["region", "intervention", "inequality"]]
|
||||
.assign(
|
||||
Intervention = lambda _df: (_df["intervention"]
|
||||
.str.replace(r"\(.+\)", "", regex=True)
|
||||
.str.replace(r" ?; ?", ";", regex=True)
|
||||
.str.strip()
|
||||
.str.split(";")
|
||||
),
|
||||
Inequality = lambda _df: (_df["inequality"]
|
||||
.str.replace(r"\(.+\)", "", regex=True)
|
||||
.str.replace(r" ?; ?", ";", regex=True)
|
||||
.str.strip()
|
||||
.str.split(";")
|
||||
)
|
||||
)
|
||||
.explode("Intervention")
|
||||
.explode("Inequality")
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
```
|
||||
|
||||
```{python}
|
||||
def crosstab_inequality(df, inequality:str, **kwargs):
|
||||
df_temp = df.loc[(df["Inequality"] == inequality) | (df["Inequality"] == "income")]
|
||||
tab = pd.crosstab(df_temp["Intervention"], df_temp["Inequality"], **kwargs)
|
||||
return tab.drop(tab[tab[inequality] == 0].index)
|
||||
```
|
||||
|
||||
## Gender inequality
|
||||
|
||||
```{python}
|
||||
#| label: tbl-gender-crosstab
|
||||
#| tbl-cap: Interventions targeting gender inequality
|
||||
|
||||
crosstab_inequality(df_inequality, "gender", normalize=False).sort_values("gender", ascending=False)
|
||||
```
|
||||
|
||||
```{python}
|
||||
def region_vis_inequality(df, inequality:str):
|
||||
df_temp = df.loc[(df["Inequality"] == inequality)]
|
||||
return sns.countplot(df_temp, x="region", order=df_temp["region"].value_counts().index)
|
||||
region_vis_inequality(df_inequality, "spatial")
|
||||
```
|
||||
|
||||
|
|
Loading…
Reference in a new issue