From ff4af556a5da7caf98b7af6af40f67fa34f60c48 Mon Sep 17 00:00:00 2001
From: Marty Oehme <marty.oehme@gmail.com>
Date: Wed, 6 Dec 2023 23:49:14 +0100
Subject: [PATCH] feat(scripts): Add figure for intervention and inequality
 types

---
 scoping_review.qmd | 57 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/scoping_review.qmd b/scoping_review.qmd
index f555193..e6b242f 100644
--- a/scoping_review.qmd
+++ b/scoping_review.qmd
@@ -476,9 +476,10 @@ for e in sample_relevant:
                         ed.get("type", Field(key="type", value=None)).value,
                         ed.get("times-cited", Field(key="times-cited", value=None)).value,
                         ed.get("usage-count-since-2013", Field(key="usage-count-since-2013", value=None)).value,
+                        ed.get("keywords", Field(key="keywords", value=None)).value,
                         ])
 # FIXME do not just drop missing values
-bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage"])
+bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage", "Keywords"])
 bib_df = bib_df.dropna(how="any")
 bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="mixed")
 bib_df["Year"] = bib_df["Date"].dt.year
@@ -538,6 +539,60 @@ This is because, as @fig-publications-per-year showed, the overall output was no
 In all of these cases, such outliers should provide clear points of interest during the screening process for possible re-evaluation of current term clusters for scoping.
 Should they point towards gaps (or over-optimization) of sepcific areas of interest during those time-frames or more generally, they may provide an impetus for tweaking the identification query terms to better align with the prevailing literature output.
 
+```{python}
+#| label: fig-intervention-types
+#| fig-cap: Predominant type of intervention
+#| fig-width: 18cm
+#| column: page
+
+interv_type_df = (
+    bib_df["Keywords"]
+    .str.replace(r"\_", " ")
+    .str.extractall(r"type::([\w ]+)")
+    .reset_index(drop=True)
+    .rename(columns = {0:"Intervention type"})
+)
+
+sort_order = interv_type_df["Intervention type"].value_counts(ascending=False).index
+fig = plt.figure()
+fig.set_size_inches(12, 4)
+ax = sns.countplot(interv_type_df, x="Intervention type", order=sort_order)
+plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
+         rotation_mode="anchor")
+plt.show()
+```
+
+{{++ TODO: describe intervention types with complete dataset ++}}
+
+```{python}
+#| label: fig-inequality-types
+#| fig-cap: Types of inequality analyzed
+#| fig-width: 18cm
+#| column: page
+
+inequ_type_df = (
+    bib_df["Keywords"]
+    .str.replace(r"\_", " ")
+    .str.extractall(r"inequality::([\w ]+)")
+    .reset_index(drop=True)
+    .rename(columns = {0:"Inequality type"})
+)
+
+sort_order = inequ_type_df["Inequality type"].value_counts(ascending=False).index
+fig = plt.figure()
+fig.set_size_inches(12, 4)
+ax = sns.countplot(inequ_type_df, x="Inequality type", order=sort_order)
+plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
+         rotation_mode="anchor")
+plt.show()
+```
+
+Income inequality is the primary type of inequality interrogated in most of the relevant studies.
+This follows the identified lens income inequality can provide through which to understand other inequalities ---
+many studies use income measurements and changes in income or income inequality over time as indicators to understand a variety of other inequalities' linkages through.
+
+{{++ TODO: describe inequality types with complete dataset ++}}
+
 # Synthesis of Evidence
 
 This section will present a synthesis of evidence from the scoping review.