chore(code): Rename ymlgrab to explore notebook

This commit is contained in:
Marty Oehme 2023-12-21 11:40:05 +01:00
parent b3a96a4a4f
commit fb032508f1
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A

View file

@ -94,6 +94,8 @@ by_study = (
```{python} ```{python}
import re import re
from matplotlib import pyplot as plt
import seaborn as sns
by_intervention = ( by_intervention = (
df.groupby(["author", "year", "title"]) df.groupby(["author", "year", "title"])
.agg( .agg(
@ -120,3 +122,50 @@ plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
plt.show() plt.show()
by_intervention = None by_intervention = None
``` ```
```{python}
#| label: fig-publications-per-year
#| fig-cap: Publications per year
df_study_years = (
df.groupby(["author", "year", "title"])
.first()
.reset_index()
.drop_duplicates()
)
# plot by year TODO decide if we want to distinguish by literature type/region/etc as hue
# FIXME should be timeseries plot so no years are missing
ax = sns.countplot(df_study_years, x="year", native_scale=True)
ax.tick_params(axis='x', rotation=45)
ax.set_xlabel("")
plt.tight_layout()
plt.show()
df_study_years = None
```
```{python}
#| label: tbl-income-crosstab
#| tbl-cap: Interventions targeting income inequality
df_income = df.copy()
df_income['Inequality'] = df_income['inequality'].str.split(";").explode(ignore_index=True).str.strip()
df_income = df_income.loc[df_income['Inequality'] == "income"].copy()
df_income['Intervention'] = df_income['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
pd.crosstab(df_income["Intervention"], df_income["Inequality"])
```
```{python}
#| label: tbl-income-crosstab
#| tbl-cap: Interventions targeting income inequality
def inequality_crosstab(df, inequality:str):
temp_df = df.copy()
temp_df['Inequality'] = temp_df['inequality'].str.split(";").explode(ignore_index=True).str.strip()
temp_df = temp_df.loc[temp_df['Inequality'] == inequality].copy()
temp_df['Intervention'] = temp_df['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
tab = pd.crosstab(temp_df["Intervention"], temp_df["Inequality"])
temp_df=None
return tab
inequality_crosstab(df, "income")
```