feat(code): Add validity calculation
This commit is contained in:
parent
0c596e2f43
commit
a0794c6d09
1 changed files with 71 additions and 0 deletions
71
src/calculate_validities.py
Normal file
71
src/calculate_validities.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
from typing import cast
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_validities(
|
||||||
|
df: DataFrame, repr_col: str = "representativeness", method_col: str = "method"
|
||||||
|
) -> DataFrame:
|
||||||
|
EXT_COL_NAME: str = "external_validity"
|
||||||
|
INT_COL_NAME: str = "internal_validity"
|
||||||
|
vd = df[(df["design"] == "quasi-experimental") | (df["design"] == "experimental")]
|
||||||
|
|
||||||
|
vd[EXT_COL_NAME] = 0
|
||||||
|
vd[INT_COL_NAME] = 0
|
||||||
|
vd = cast(DataFrame, vd)
|
||||||
|
|
||||||
|
vd[repr_col] = vd[repr_col].fillna("")
|
||||||
|
# needs to check national before subnational, subnational before local
|
||||||
|
vd.loc[vd[repr_col].str.contains("national"), EXT_COL_NAME] = 5.0
|
||||||
|
vd.loc[vd[repr_col].str.contains("regional"), EXT_COL_NAME] = 4.0
|
||||||
|
vd.loc[vd[repr_col].str.contains("subnational"), EXT_COL_NAME] = 3.0
|
||||||
|
vd.loc[vd[repr_col].str.contains("local"), EXT_COL_NAME] = 2.0
|
||||||
|
|
||||||
|
vd[method_col] = vd[method_col].fillna("")
|
||||||
|
# needs to go lowest to highest in case of multiple mentioned approaches
|
||||||
|
vd.loc[
|
||||||
|
vd[method_col].str.contains("|".join(["OLS", "ordinary.least.square"])),
|
||||||
|
INT_COL_NAME,
|
||||||
|
] = 2.0
|
||||||
|
vd.loc[
|
||||||
|
vd[method_col].str.contains("|".join(["DM", "discontinuity.matching"])),
|
||||||
|
INT_COL_NAME,
|
||||||
|
] = 3.0
|
||||||
|
vd.loc[
|
||||||
|
vd[method_col].str.contains(
|
||||||
|
"|".join(["DID", "difference.in.diff", "diff.in.diff", "triple.diff"])
|
||||||
|
),
|
||||||
|
INT_COL_NAME,
|
||||||
|
] = 3.0
|
||||||
|
vd.loc[
|
||||||
|
vd[method_col].str.contains(
|
||||||
|
"|".join(["PSM", "propensity.score.matching", "score.matching"])
|
||||||
|
),
|
||||||
|
INT_COL_NAME,
|
||||||
|
] = 3.5
|
||||||
|
vd.loc[
|
||||||
|
vd[method_col].str.contains("|".join(["IV", "instrumental.variable"])),
|
||||||
|
INT_COL_NAME,
|
||||||
|
] = 4.0
|
||||||
|
vd.loc[
|
||||||
|
vd[method_col].str.contains("|".join(["RD", "regression.discontinuity"])),
|
||||||
|
INT_COL_NAME,
|
||||||
|
] = 4.5
|
||||||
|
vd.loc[vd[method_col].str.contains("RCT"), INT_COL_NAME] = 5.0
|
||||||
|
|
||||||
|
return vd
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
import load_data
|
||||||
|
from pathlib import Path
|
||||||
|
from io import StringIO
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
df = load_data.from_yml(Path(sys.argv[1]))
|
||||||
|
else:
|
||||||
|
df = load_data.from_yml()
|
||||||
|
|
||||||
|
df = calculate_validities(df)
|
||||||
|
output = StringIO()
|
||||||
|
df.to_csv(output)
|
||||||
|
output.seek(0)
|
||||||
|
print(output.read())
|
Loading…
Reference in a new issue