diff --git a/pyproject.toml b/pyproject.toml index a5a08ad..cff54a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ cmd = "nvim" [tool.poe.tasks.extract] help = "Extract the csv data from raw yaml files" shell = """ -python src/load_data.py > 02-data/processed/extracted.csv +python src/prep_data.py > 02-data/processed/extracted.csv """ [tool.poe.tasks.milestone] help = "Extract, render, commit and version a finished artifact" diff --git a/scoping_review.qmd b/scoping_review.qmd index ac08201..66611dc 100644 --- a/scoping_review.qmd +++ b/scoping_review.qmd @@ -608,36 +608,19 @@ which are then descriptively distinguished between for their primary outcome ine Each main thematic area will be preceded by a table prsenting the overall inequalities reviewed, main findings and accompanying channels that could be identified. Afterwards, the analytical lens will be inverted for the discussion (Section 5) -and the reviewed studies discussed from a perspective of their analysed inequalities, +and the reviewed studies discussed from a perspective of their analysed inequalities and limitations, to better identify areas of strong analytical lenses or areas of more limited analyses. ```{python} -vd = by_intervention[(by_intervention['design'] == 'quasi-experimental') | (by_intervention['design'] == 'experimental')] -vd = vd.assign(valid_ext=0) +from src import prep_data -# assign external validities -vd["representativeness"] = vd["representativeness"].fillna("") -vd.loc[vd['representativeness'].str.contains("subnational"), 'valid_ext'] = 5.0 -vd.loc[vd['representativeness'].str.contains("national"), 'valid_ext'] = 4.0 -vd.loc[vd['representativeness'].str.contains("regional"), 'valid_ext'] = 3.0 -vd.loc[vd['representativeness'].str.contains("local"), 'valid_ext'] = 2.0 - -# assign internal validities -vd = vd.assign(valid_int=0) -vd["method"] = vd["method"].fillna("") -vd.loc[vd['method'].str.contains("RCT"), 'valid_int'] = 5.0 -vd.loc[vd['method'].str.contains("|".join(["RD","regression.discontinuity"])), 'valid_int'] = 4.5 -vd.loc[vd['method'].str.contains("|".join(["IV","instrumental.variable"])), 'valid_int'] = 4.0 -vd.loc[vd['method'].str.contains("|".join(["PSM","propensity.score.matching"])), 'valid_int'] = 3.5 -vd.loc[vd['method'].str.contains("|".join(["DM","discontinuity.matching"])), 'valid_int'] = 3.0 -vd.loc[vd['method'].str.contains("|".join(["DID","difference.in.difference", "triple.diff"])), 'valid_int'] = 3.0 -vd.loc[vd['method'].str.contains("|".join(["OLS","ordinary.least.square"])), 'valid_int'] = 2.0 +validities = prep_data.calculate_validities(by_intervention) # Melt the dataframe to long format for plotting -melted_df = vd.melt(value_vars=['valid_int', 'valid_ext'], id_vars -='intervention', var_name='Validity') +# melted_validities = validities.melt(value_vars=['valid_int', 'valid_ext'], id_vars +# ='intervention', var_name='Validity') # Create a stacked histplot using Seaborn -sns.histplot(data=melted_df, y='intervention', hue='Validity', multiple='stack') +sns.scatterplot(data=validities, x='external_validity', y='internal_validity', hue='intervention') ``` ## Institutional diff --git a/src/calculate_validities.py b/src/prep_data.py similarity index 97% rename from src/calculate_validities.py rename to src/prep_data.py index ba619d7..619b58e 100644 --- a/src/calculate_validities.py +++ b/src/prep_data.py @@ -8,7 +8,7 @@ def calculate_validities( ) -> DataFrame: EXT_COL_NAME: str = "external_validity" INT_COL_NAME: str = "internal_validity" - cols = {EXT_COL_NAME: 0, INT_COL_NAME: 0} + cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0} vd = df[ (df["design"] == "quasi-experimental") | (df["design"] == "experimental")