chore(code): Rename ymlgrab to explore notebook

2023-12-21 11:40:05 +01:00 · 2023-12-21 11:40:05 +01:00 · fb032508f1
commit fb032508f1
parent b3a96a4a4f
1 changed files with 49 additions and 0 deletions
--- a/00-notebooks/yml-grab.qmd
+++ b/00-notebooks/yml-grab.qmd
@ -94,6 +94,8 @@ by_study = (

 ```{python}
 import re
+from matplotlib import pyplot as plt
+import seaborn as sns
 by_intervention = (
    df.groupby(["author", "year", "title"])
    .agg(
@ -120,3 +122,50 @@ plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
 plt.show()
 by_intervention = None
 ```
+
+```{python}
+#| label: fig-publications-per-year
+#| fig-cap: Publications per year
+
+df_study_years = (
+    df.groupby(["author", "year", "title"])
+    .first()
+    .reset_index()
+    .drop_duplicates()
+)
+# plot by year TODO decide if we want to distinguish by literature type/region/etc as hue
+# FIXME should be timeseries plot so no years are missing
+ax = sns.countplot(df_study_years, x="year", native_scale=True)
+ax.tick_params(axis='x', rotation=45)
+ax.set_xlabel("")
+plt.tight_layout()
+plt.show()
+df_study_years = None
+```
+
+```{python}
+#| label: tbl-income-crosstab
+#| tbl-cap: Interventions targeting income inequality
+
+df_income = df.copy()
+df_income['Inequality'] = df_income['inequality'].str.split(";").explode(ignore_index=True).str.strip()
+df_income = df_income.loc[df_income['Inequality'] == "income"].copy()
+df_income['Intervention'] = df_income['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
+pd.crosstab(df_income["Intervention"], df_income["Inequality"])
+```
+
+```{python}
+#| label: tbl-income-crosstab
+#| tbl-cap: Interventions targeting income inequality
+
+def inequality_crosstab(df, inequality:str):
+    temp_df = df.copy()
+    temp_df['Inequality'] = temp_df['inequality'].str.split(";").explode(ignore_index=True).str.strip()
+    temp_df = temp_df.loc[temp_df['Inequality'] == inequality].copy()
+    temp_df['Intervention'] = temp_df['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
+    tab = pd.crosstab(temp_df["Intervention"], temp_df["Inequality"])
+    temp_df=None
+    return tab
+
+inequality_crosstab(df, "income")
+```