feat(script): Add simple validity data prep

2024-02-13 20:55:50 +01:00 · 2024-02-13 20:55:50 +01:00 · 6c28fd70b2
commit 6c28fd70b2
parent e69f741197
1 changed files with 31 additions and 30 deletions
--- a/scoping_review.qmd
+++ b/scoping_review.qmd
@ -566,7 +566,7 @@ Should they point towards gaps (or over-optimization) of specific areas of inter
 #| fig-cap: Predominant type of intervention
 by_intervention = (
-    bib_df.groupby(["author", "year", "title"])
+    bib_df.groupby(["author", "year", "title", "design", "method", "representativeness"])
    .agg(
        {
            "intervention": lambda _col: "; ".join(_col),
@ -589,7 +589,6 @@ ax = sns.countplot(by_intervention, x="intervention", order=by_intervention["int
 plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
 plt.show()
 by_intervention = None
 ```
@fig-intervention-types shows the most often analysed interventions for the literature reviewed.
@ -605,44 +604,46 @@ This section will present a synthesis of evidence from the scoping review.
 The section will also present a discussion on the implications of the current evidence base for policy and underscore key knowledge gaps.
 One of the primary lenses through which policy interventions to reduce inequalities in the world of work are viewed is that of income inequality, often measured for all people throughout a country or subsets thereof.
-At the same time, the primacy of income should not be overstated as disregarding the intersectional nature of inequalities may lead to adverse targeting or intervention outcomes, as can be seen in the following studies on policies to increase overall income equality.
+At the same time, the primacy of income should not be overstated as disregarding the intersectional nature of inequalities may lead to adverse targeting or intervention outcomes.
 In the following synthesis each reviewed study will be analysed through the primary policies they concern themselves with.
 Since policies employed in the pursuit of increased equality can take a wide form of actors, strategy approaches and implementation details,
 the following synthesis will first categorize between the main thematic area and its associated interventions,
-which are then distinguished between for their primary outcome inequalities.
+which are then descriptively distinguished between for their primary outcome inequalities.
-Strength of Evidence
+Each main thematic area will be preceded by a table prsenting the overall inequalities reviewed,
 main findings and accompanying channels that could be identified.
 Afterwards, the analytical lens will be inverted for the discussion (Section 5)
 and the reviewed studies discussed from a perspective of their analysed inequalities,
 to better identify areas of strong analytical lenses or areas of more limited analyses.
 ```{python}
-# Create a dictionary with the data for the dataframe
+vd = by_intervention[(by_intervention['design'] == 'quasi-experimental') | (by_intervention['design'] == 'experimental')]
-data = {
+vd = vd.assign(valid_ext=0)
    'strong_internal_validity': [
        'weak', 'strong', 'strong', 'strong', 'strong', 'weak', 'strong', #minimum wage
        'strong', 'weak', 'strong', 'strong', # paid leave
        'strong', 'weak' # protective env policies
    ],
    'strong_external_validity': [
        'strong', 'strong', 'weak', 'weak', 'strong', 'strong', 'strong', #minimum wage
        'strong', 'strong', 'weak', 'weak', # paid leave
        'strong', 'weak' # prot env policies
    ],
    'policy': [
        'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage', 'minimum wage',
        'paid leave', 'paid leave', 'paid leave', 'paid leave',
        'environmental infrastructure','environmental infrastructure'
    ]
 }
-# Create the dataframe
+# assign external validities
-test_df = pd.DataFrame(data)
+vd["representativeness"] = vd["representativeness"].fillna("")
 vd.loc[vd['representativeness'].str.contains("subnational"), 'valid_ext'] = 5.0
 vd.loc[vd['representativeness'].str.contains("national"), 'valid_ext'] = 4.0
 vd.loc[vd['representativeness'].str.contains("regional"), 'valid_ext'] = 3.0
 vd.loc[vd['representativeness'].str.contains("local"), 'valid_ext'] = 2.0
 # assign internal validities
 vd = vd.assign(valid_int=0)
 vd["method"] = vd["method"].fillna("")
 vd.loc[vd['method'].str.contains("RCT"), 'valid_int'] = 5.0
 vd.loc[vd['method'].str.contains("|".join(["RD","regression.discontinuity"])), 'valid_int'] = 4.5
 vd.loc[vd['method'].str.contains("|".join(["IV","instrumental.variable"])), 'valid_int'] = 4.0
 vd.loc[vd['method'].str.contains("|".join(["PSM","propensity.score.matching"])), 'valid_int'] = 3.5
 vd.loc[vd['method'].str.contains("|".join(["DM","discontinuity.matching"])), 'valid_int'] = 3.0
 vd.loc[vd['method'].str.contains("|".join(["DID","difference.in.difference", "triple.diff"])), 'valid_int'] = 3.0
 vd.loc[vd['method'].str.contains("|".join(["OLS","ordinary.least.square"])), 'valid_int'] = 2.0
 # Assuming df is your dataframe
 # Melt the dataframe to long format for plotting
-melted_df = test_df.melt(value_vars=['strong_internal_validity', 'strong_external_validity'], id_vars
+melted_df = vd.melt(value_vars=['valid_int', 'valid_ext'], id_vars
-='policy', var_name='Validity')
+='intervention', var_name='Validity')
 # Create a stacked histplot using Seaborn
-sns.histplot(data=melted_df, y='policy', hue='Validity', multiple='stack')
+sns.histplot(data=melted_df, y='intervention', hue='Validity', multiple='stack')
 ```
 ## Institutional