feat(script): Begin using validities for visualization

This commit is contained in:
Marty Oehme 2024-02-14 17:30:04 +01:00
parent 41b2d651a6
commit 227adb33f8
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
3 changed files with 8 additions and 25 deletions

View file

@ -41,7 +41,7 @@ cmd = "nvim"
[tool.poe.tasks.extract] [tool.poe.tasks.extract]
help = "Extract the csv data from raw yaml files" help = "Extract the csv data from raw yaml files"
shell = """ shell = """
python src/load_data.py > 02-data/processed/extracted.csv python src/prep_data.py > 02-data/processed/extracted.csv
""" """
[tool.poe.tasks.milestone] [tool.poe.tasks.milestone]
help = "Extract, render, commit and version a finished artifact" help = "Extract, render, commit and version a finished artifact"

View file

@ -608,36 +608,19 @@ which are then descriptively distinguished between for their primary outcome ine
Each main thematic area will be preceded by a table prsenting the overall inequalities reviewed, Each main thematic area will be preceded by a table prsenting the overall inequalities reviewed,
main findings and accompanying channels that could be identified. main findings and accompanying channels that could be identified.
Afterwards, the analytical lens will be inverted for the discussion (Section 5) Afterwards, the analytical lens will be inverted for the discussion (Section 5)
and the reviewed studies discussed from a perspective of their analysed inequalities, and the reviewed studies discussed from a perspective of their analysed inequalities and limitations,
to better identify areas of strong analytical lenses or areas of more limited analyses. to better identify areas of strong analytical lenses or areas of more limited analyses.
```{python} ```{python}
vd = by_intervention[(by_intervention['design'] == 'quasi-experimental') | (by_intervention['design'] == 'experimental')] from src import prep_data
vd = vd.assign(valid_ext=0)
# assign external validities validities = prep_data.calculate_validities(by_intervention)
vd["representativeness"] = vd["representativeness"].fillna("")
vd.loc[vd['representativeness'].str.contains("subnational"), 'valid_ext'] = 5.0
vd.loc[vd['representativeness'].str.contains("national"), 'valid_ext'] = 4.0
vd.loc[vd['representativeness'].str.contains("regional"), 'valid_ext'] = 3.0
vd.loc[vd['representativeness'].str.contains("local"), 'valid_ext'] = 2.0
# assign internal validities
vd = vd.assign(valid_int=0)
vd["method"] = vd["method"].fillna("")
vd.loc[vd['method'].str.contains("RCT"), 'valid_int'] = 5.0
vd.loc[vd['method'].str.contains("|".join(["RD","regression.discontinuity"])), 'valid_int'] = 4.5
vd.loc[vd['method'].str.contains("|".join(["IV","instrumental.variable"])), 'valid_int'] = 4.0
vd.loc[vd['method'].str.contains("|".join(["PSM","propensity.score.matching"])), 'valid_int'] = 3.5
vd.loc[vd['method'].str.contains("|".join(["DM","discontinuity.matching"])), 'valid_int'] = 3.0
vd.loc[vd['method'].str.contains("|".join(["DID","difference.in.difference", "triple.diff"])), 'valid_int'] = 3.0
vd.loc[vd['method'].str.contains("|".join(["OLS","ordinary.least.square"])), 'valid_int'] = 2.0
# Melt the dataframe to long format for plotting # Melt the dataframe to long format for plotting
melted_df = vd.melt(value_vars=['valid_int', 'valid_ext'], id_vars # melted_validities = validities.melt(value_vars=['valid_int', 'valid_ext'], id_vars
='intervention', var_name='Validity') # ='intervention', var_name='Validity')
# Create a stacked histplot using Seaborn # Create a stacked histplot using Seaborn
sns.histplot(data=melted_df, y='intervention', hue='Validity', multiple='stack') sns.scatterplot(data=validities, x='external_validity', y='internal_validity', hue='intervention')
``` ```
## Institutional ## Institutional

View file

@ -8,7 +8,7 @@ def calculate_validities(
) -> DataFrame: ) -> DataFrame:
EXT_COL_NAME: str = "external_validity" EXT_COL_NAME: str = "external_validity"
INT_COL_NAME: str = "internal_validity" INT_COL_NAME: str = "internal_validity"
cols = {EXT_COL_NAME: 0, INT_COL_NAME: 0} cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
vd = df[ vd = df[
(df["design"] == "quasi-experimental") | (df["design"] == "experimental") (df["design"] == "quasi-experimental") | (df["design"] == "experimental")