From 6c28fd70b21534a38bc8f73d1552b573bf55184d Mon Sep 17 00:00:00 2001
From: Marty Oehme <marty.oehme@gmail.com>
Date: Tue, 13 Feb 2024 20:55:50 +0100
Subject: [PATCH] feat(script): Add simple validity data prep

---
 scoping_review.qmd | 61 +++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/scoping_review.qmd b/scoping_review.qmd
index d131560..5245630 100644
--- a/scoping_review.qmd
+++ b/scoping_review.qmd
@@ -566,7 +566,7 @@ Should they point towards gaps (or over-optimization) of specific areas of inter
 #| fig-cap: Predominant type of intervention
 
 by_intervention = (
-    bib_df.groupby(["author", "year", "title"])
+    bib_df.groupby(["author", "year", "title", "design", "method", "representativeness"])
     .agg(
         {
             "intervention": lambda _col: "; ".join(_col),
@@ -589,7 +589,6 @@ ax = sns.countplot(by_intervention, x="intervention", order=by_intervention["int
 plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
          rotation_mode="anchor")
 plt.show()
-by_intervention = None
 ```
 
 @fig-intervention-types shows the most often analysed interventions for the literature reviewed.
@@ -605,44 +604,46 @@ This section will present a synthesis of evidence from the scoping review.
 The section will also present a discussion on the implications of the current evidence base for policy and underscore key knowledge gaps.
 
 One of the primary lenses through which policy interventions to reduce inequalities in the world of work are viewed is that of income inequality, often measured for all people throughout a country or subsets thereof.
-At the same time, the primacy of income should not be overstated as disregarding the intersectional nature of inequalities may lead to adverse targeting or intervention outcomes, as can be seen in the following studies on policies to increase overall income equality.
+At the same time, the primacy of income should not be overstated as disregarding the intersectional nature of inequalities may lead to adverse targeting or intervention outcomes.
+In the following synthesis each reviewed study will be analysed through the primary policies they concern themselves with.
 
 Since policies employed in the pursuit of increased equality can take a wide form of actors, strategy approaches and implementation details,
 the following synthesis will first categorize between the main thematic area and its associated interventions,
-which are then distinguished between for their primary outcome inequalities.
+which are then descriptively distinguished between for their primary outcome inequalities.
 
-Strength of Evidence
+Each main thematic area will be preceded by a table prsenting the overall inequalities reviewed,
+main findings and accompanying channels that could be identified.
+Afterwards, the analytical lens will be inverted for the discussion (Section 5)
+and the reviewed studies discussed from a perspective of their analysed inequalities,
+to better identify areas of strong analytical lenses or areas of more limited analyses.
 
 ```{python}
-# Create a dictionary with the data for the dataframe
-data = {
-    'strong_internal_validity': [
-        'weak', 'strong', 'strong', 'strong', 'strong', 'weak', 'strong', #minimum wage
-        'strong', 'weak', 'strong', 'strong', # paid leave
-        'strong', 'weak' # protective env policies
-    ],
-    'strong_external_validity': [
-        'strong', 'strong', 'weak', 'weak', 'strong', 'strong', 'strong', #minimum wage
-        'strong', 'strong', 'weak', 'weak', # paid leave
-        'strong', 'weak' # prot env policies
-    ],
-    'policy': [
-        'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage',
-        'paid leave', 'paid leave', 'paid leave', 'paid leave',
-        'environmental infrastructure','environmental infrastructure'
-    ]
-}
+vd = by_intervention[(by_intervention['design'] == 'quasi-experimental') | (by_intervention['design'] == 'experimental')]
+vd = vd.assign(valid_ext=0)
 
-# Create the dataframe
-test_df = pd.DataFrame(data)
+# assign external validities
+vd["representativeness"] = vd["representativeness"].fillna("")
+vd.loc[vd['representativeness'].str.contains("subnational"), 'valid_ext'] = 5.0
+vd.loc[vd['representativeness'].str.contains("national"), 'valid_ext'] = 4.0
+vd.loc[vd['representativeness'].str.contains("regional"), 'valid_ext'] = 3.0
+vd.loc[vd['representativeness'].str.contains("local"), 'valid_ext'] = 2.0
+
+# assign internal validities
+vd = vd.assign(valid_int=0)
+vd["method"] = vd["method"].fillna("")
+vd.loc[vd['method'].str.contains("RCT"), 'valid_int'] = 5.0
+vd.loc[vd['method'].str.contains("|".join(["RD","regression.discontinuity"])), 'valid_int'] = 4.5
+vd.loc[vd['method'].str.contains("|".join(["IV","instrumental.variable"])), 'valid_int'] = 4.0
+vd.loc[vd['method'].str.contains("|".join(["PSM","propensity.score.matching"])), 'valid_int'] = 3.5
+vd.loc[vd['method'].str.contains("|".join(["DM","discontinuity.matching"])), 'valid_int'] = 3.0
+vd.loc[vd['method'].str.contains("|".join(["DID","difference.in.difference", "triple.diff"])), 'valid_int'] = 3.0
+vd.loc[vd['method'].str.contains("|".join(["OLS","ordinary.least.square"])), 'valid_int'] = 2.0
 
-# Assuming df is your dataframe
 # Melt the dataframe to long format for plotting
-melted_df = test_df.melt(value_vars=['strong_internal_validity', 'strong_external_validity'], id_vars
-='policy', var_name='Validity')
-
+melted_df = vd.melt(value_vars=['valid_int', 'valid_ext'], id_vars
+='intervention', var_name='Validity')
 # Create a stacked histplot using Seaborn
-sns.histplot(data=melted_df, y='policy', hue='Validity', multiple='stack')
+sns.histplot(data=melted_df, y='intervention', hue='Validity', multiple='stack')
 ```
 
 ## Institutional