feat(script): Begin using validities for visualization

This commit is contained in:
Marty Oehme 2024-02-14 17:30:04 +01:00
parent 41b2d651a6
commit 227adb33f8
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
3 changed files with 8 additions and 25 deletions

View file

@ -41,7 +41,7 @@ cmd = "nvim"
[tool.poe.tasks.extract]
help = "Extract the csv data from raw yaml files"
shell = """
python src/load_data.py > 02-data/processed/extracted.csv
python src/prep_data.py > 02-data/processed/extracted.csv
"""
[tool.poe.tasks.milestone]
help = "Extract, render, commit and version a finished artifact"

View file

@ -608,36 +608,19 @@ which are then descriptively distinguished between for their primary outcome ine
Each main thematic area will be preceded by a table prsenting the overall inequalities reviewed,
main findings and accompanying channels that could be identified.
Afterwards, the analytical lens will be inverted for the discussion (Section 5)
and the reviewed studies discussed from a perspective of their analysed inequalities,
and the reviewed studies discussed from a perspective of their analysed inequalities and limitations,
to better identify areas of strong analytical lenses or areas of more limited analyses.
```{python}
vd = by_intervention[(by_intervention['design'] == 'quasi-experimental') | (by_intervention['design'] == 'experimental')]
vd = vd.assign(valid_ext=0)
from src import prep_data
# assign external validities
vd["representativeness"] = vd["representativeness"].fillna("")
vd.loc[vd['representativeness'].str.contains("subnational"), 'valid_ext'] = 5.0
vd.loc[vd['representativeness'].str.contains("national"), 'valid_ext'] = 4.0
vd.loc[vd['representativeness'].str.contains("regional"), 'valid_ext'] = 3.0
vd.loc[vd['representativeness'].str.contains("local"), 'valid_ext'] = 2.0
# assign internal validities
vd = vd.assign(valid_int=0)
vd["method"] = vd["method"].fillna("")
vd.loc[vd['method'].str.contains("RCT"), 'valid_int'] = 5.0
vd.loc[vd['method'].str.contains("|".join(["RD","regression.discontinuity"])), 'valid_int'] = 4.5
vd.loc[vd['method'].str.contains("|".join(["IV","instrumental.variable"])), 'valid_int'] = 4.0
vd.loc[vd['method'].str.contains("|".join(["PSM","propensity.score.matching"])), 'valid_int'] = 3.5
vd.loc[vd['method'].str.contains("|".join(["DM","discontinuity.matching"])), 'valid_int'] = 3.0
vd.loc[vd['method'].str.contains("|".join(["DID","difference.in.difference", "triple.diff"])), 'valid_int'] = 3.0
vd.loc[vd['method'].str.contains("|".join(["OLS","ordinary.least.square"])), 'valid_int'] = 2.0
validities = prep_data.calculate_validities(by_intervention)
# Melt the dataframe to long format for plotting
melted_df = vd.melt(value_vars=['valid_int', 'valid_ext'], id_vars
='intervention', var_name='Validity')
# melted_validities = validities.melt(value_vars=['valid_int', 'valid_ext'], id_vars
# ='intervention', var_name='Validity')
# Create a stacked histplot using Seaborn
sns.histplot(data=melted_df, y='intervention', hue='Validity', multiple='stack')
sns.scatterplot(data=validities, x='external_validity', y='internal_validity', hue='intervention')
```
## Institutional

View file

@ -8,7 +8,7 @@ def calculate_validities(
) -> DataFrame:
EXT_COL_NAME: str = "external_validity"
INT_COL_NAME: str = "internal_validity"
cols = {EXT_COL_NAME: 0, INT_COL_NAME: 0}
cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
vd = df[
(df["design"] == "quasi-experimental") | (df["design"] == "experimental")