chore(code): Rename ymlgrab to explore notebook

2023-12-21 11:40:05 +01:00 · 2023-12-21 11:40:05 +01:00 · fb032508f1
commit fb032508f1
parent b3a96a4a4f
1 changed files with 49 additions and 0 deletions
--- a/00-notebooks/yml-grab.qmd
+++ b/00-notebooks/yml-grab.qmd
@ -94,6 +94,8 @@ by_study = (
 ```{python}
 import re
 from matplotlib import pyplot as plt
 import seaborn as sns
 by_intervention = (
    df.groupby(["author", "year", "title"])
    .agg(
@ -120,3 +122,50 @@ plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
 plt.show()
 by_intervention = None
 ```
 ```{python}
 #| label: fig-publications-per-year
 #| fig-cap: Publications per year
 df_study_years = (
    df.groupby(["author", "year", "title"])
    .first()
    .reset_index()
    .drop_duplicates()
 )
 # plot by year TODO decide if we want to distinguish by literature type/region/etc as hue
 # FIXME should be timeseries plot so no years are missing
 ax = sns.countplot(df_study_years, x="year", native_scale=True)
 ax.tick_params(axis='x', rotation=45)
 ax.set_xlabel("")
 plt.tight_layout()
 plt.show()
 df_study_years = None
 ```
 ```{python}
 #| label: tbl-income-crosstab
 #| tbl-cap: Interventions targeting income inequality
 df_income = df.copy()
 df_income['Inequality'] = df_income['inequality'].str.split(";").explode(ignore_index=True).str.strip()
 df_income = df_income.loc[df_income['Inequality'] == "income"].copy()
 df_income['Intervention'] = df_income['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
 pd.crosstab(df_income["Intervention"], df_income["Inequality"])
 ```
 ```{python}
 #| label: tbl-income-crosstab
 #| tbl-cap: Interventions targeting income inequality
 def inequality_crosstab(df, inequality:str):
    temp_df = df.copy()
    temp_df['Inequality'] = temp_df['inequality'].str.split(";").explode(ignore_index=True).str.strip()
    temp_df = temp_df.loc[temp_df['Inequality'] == inequality].copy()
    temp_df['Intervention'] = temp_df['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
    tab = pd.crosstab(temp_df["Intervention"], temp_df["Inequality"])
    temp_df=None
    return tab
 inequality_crosstab(df, "income")
 ```