From e3315d402f4a01cf15d24bc49be8fa744ca0a768 Mon Sep 17 00:00:00 2001
From: Marty Oehme <marty.oehme@gmail.com>
Date: Fri, 22 Dec 2023 19:27:20 +0100
Subject: [PATCH] chore(script): Move inequality and region breakdowns to
 discussion

---
 scoping_review.qmd | 114 ++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 64 deletions(-)

diff --git a/scoping_review.qmd b/scoping_review.qmd
index bf38784..6f1647d 100644
--- a/scoping_review.qmd
+++ b/scoping_review.qmd
@@ -604,63 +604,6 @@ by_intervention = None
 
 {{++ TODO: describe intervention types with complete dataset ++}}
 
-```{python}
-#| label: fig-inequality-types
-#| fig-cap: Types of inequality analyzed
-
-by_inequality = (
-    bib_df.groupby(["author", "year", "title"])
-    .agg(
-        {
-            "inequality": lambda _col: "; ".join(_col),
-        }
-    )
-    .reset_index()
-    .drop_duplicates()
-    .assign(
-        inequality=lambda _df: _df["inequality"].apply(
-            lambda _cell: set([x.strip() for x in _cell.split(";")])
-        ),
-    )
-    .explode("inequality")
-)
-sort_order = by_inequality["inequality"].value_counts().index
-
-fig = plt.figure()
-fig.set_size_inches(6, 3)
-ax = sns.countplot(by_inequality, x="inequality", order=by_inequality["inequality"].value_counts().index)
-plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
-         rotation_mode="anchor")
-plt.show()
-by_inequality = None
-```
-
-Income inequality is the primary type of inequality interrogated in most of the relevant studies.
-This follows the identified lens income inequality can provide through which to understand other inequalities ---
-many studies use income measurements and changes in income or income inequality over time as indicators to understand a variety of other inequalities' linkages through.
-
-{{++ TODO: describe inequality type distribution for overall dataset ++}}
-
-```{python}
-#| label: fig-region-counts
-#| fig-cap: Studies by regions analysed
-
-bib_df = (bib_df
-    .assign(
-        # create de-duplicated joins for all observations
-        region=lambda _df: _df["region"].apply(
-            lambda _cell: set([x.strip() for x in _cell.split(";")])
-        ),
-     )
-    .explode("region")
-)
-# bib_df["region"] = bib_df["region"].str.split(";").explode().str.strip()
-ax = sns.countplot(bib_df, x="region", order=bib_df["region"].value_counts().index)
-plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
-         rotation_mode="anchor")
-plt.show()
-```
-
 # Synthesis of Evidence --- a multitude of lenses
 
 This section will present a synthesis of evidence from the scoping review.
@@ -1048,19 +991,62 @@ The authors suggest the primary channel is the newly increased bargaining power
 
 [^1]: The Mahatma Gandhi National Rural Employment Guarantee Scheme, one of the largest redistribution programmes on the household level in the world, entitling each household to up to 100 days of work per year.
 
-# Results for horizontal inequalities
+# Discussion
 
 ```{python}
 # dataframe containing each intervention inequality pair
-df_inequality = bib_df[["intervention", "inequality"]].copy().reset_index(drop=True)
-df_inequality['Inequality'] = df_inequality['inequality'].str.split(";").explode(ignore_index=True).str.strip()
-df_inequality['Intervention'] = df_inequality['intervention'].str.split(";").explode(ignore_index=True).str.replace(r"\(.+\)", "", regex=True).str.strip()
+df_inequality = (
+    bib_df[["region", "intervention", "inequality"]]
+    .assign(
+        Intervention = lambda _df: (_df["intervention"]
+            .str.replace(r"\(.+\)", "", regex=True)
+            .str.replace(r" ?; ?", ";", regex=True)
+            .str.strip()
+            .str.split(";")
+        ),
+        inequality = lambda _df: (_df["inequality"]
+            .str.replace(r"\(.+\)", "", regex=True)
+            .str.replace(r" ?; ?", ";", regex=True)
+            .str.strip()
+            .str.split(";")
+        )
+    )
+    .explode("Intervention")
+    .explode("inequality")
+    .reset_index(drop=True)
+)
 
-def crosstab_inequality(df, inequality:str):
-    df_temp = df.loc[(df["Inequality"] == inequality) | (df["Inequality"] == "income")]
-    tab = pd.crosstab(df_temp["Intervention"], df_temp["Inequality"])
+def crosstab_inequality(df, inequality:str, **kwargs):
+    df_temp = df.loc[(df["inequality"] == inequality) | (df["inequality"] == "income")]
+    tab = pd.crosstab(df_temp["Intervention"], df_temp["inequality"], **kwargs)
     return tab.drop(tab[tab[inequality] == 0].index)
+```
 
+
+```{python}
+#| label: fig-region-counts
+#| fig-cap: Studies by regions analysed
+
+by_region = (
+    bib_df[["region"]]
+    .assign(
+        region = lambda _df: (_df["region"]
+            .str.replace(r" ?; ?", ";", regex=True)
+            .str.strip()
+            .str.split(";")
+        )
+    )
+    .explode("region")
+    .reset_index(drop=True)
+)
+ax = sns.countplot(by_region, x="region", order=by_region["region"].value_counts().index)
+plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
+         rotation_mode="anchor")
+plt.show()
+
+def regions_for_inequality(df, inequality:str):
+    df_temp = df.loc[(df["inequality"] == inequality)]
+    return sns.countplot(df_temp, x="region", order=df_temp["region"].value_counts().index)
 ```
 
 ## Gender inequality