diff --git a/00-notebooks/yml-grab.qmd b/00-notebooks/explore.qmd similarity index 64% rename from 00-notebooks/yml-grab.qmd rename to 00-notebooks/explore.qmd index eebf5eb..b2c9aa6 100644 --- a/00-notebooks/yml-grab.qmd +++ b/00-notebooks/explore.qmd @@ -94,6 +94,8 @@ by_study = ( ```{python} import re +from matplotlib import pyplot as plt +import seaborn as sns by_intervention = ( df.groupby(["author", "year", "title"]) .agg( @@ -120,3 +122,50 @@ plt.setp(ax.get_xticklabels(), rotation=45, ha="right", plt.show() by_intervention = None ``` + +```{python} +#| label: fig-publications-per-year +#| fig-cap: Publications per year + +df_study_years = ( + df.groupby(["author", "year", "title"]) + .first() + .reset_index() + .drop_duplicates() +) +# plot by year TODO decide if we want to distinguish by literature type/region/etc as hue +# FIXME should be timeseries plot so no years are missing +ax = sns.countplot(df_study_years, x="year", native_scale=True) +ax.tick_params(axis='x', rotation=45) +ax.set_xlabel("") +plt.tight_layout() +plt.show() +df_study_years = None +``` + +```{python} +#| label: tbl-income-crosstab +#| tbl-cap: Interventions targeting income inequality + +df_income = df.copy() +df_income['Inequality'] = df_income['inequality'].str.split(";").explode(ignore_index=True).str.strip() +df_income = df_income.loc[df_income['Inequality'] == "income"].copy() +df_income['Intervention'] = df_income['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip() +pd.crosstab(df_income["Intervention"], df_income["Inequality"]) +``` + +```{python} +#| label: tbl-income-crosstab +#| tbl-cap: Interventions targeting income inequality + +def inequality_crosstab(df, inequality:str): + temp_df = df.copy() + temp_df['Inequality'] = temp_df['inequality'].str.split(";").explode(ignore_index=True).str.strip() + temp_df = temp_df.loc[temp_df['Inequality'] == inequality].copy() + temp_df['Intervention'] = temp_df['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip() + tab = pd.crosstab(temp_df["Intervention"], temp_df["Inequality"]) + temp_df=None + return tab + +inequality_crosstab(df, "income") +```