refactor(code): Extract internal validity ranking keywords
Extracted keywords into dict which can be extended with either keywords or rankings as needed, providing more flexibility.
This commit is contained in:
parent
4e865ee2b5
commit
e032a5c192
1 changed files with 46 additions and 36 deletions
|
@ -33,17 +33,53 @@ def _combined_validities(
|
||||||
return r"\-"
|
return r"\-"
|
||||||
|
|
||||||
|
|
||||||
|
METHOD_RANKINGS = {
|
||||||
|
2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
|
||||||
|
3.0: [
|
||||||
|
"DM",
|
||||||
|
"discontinuity.matching",
|
||||||
|
"DID",
|
||||||
|
"difference.in.diff",
|
||||||
|
"diff.in.diff",
|
||||||
|
"triple.diff",
|
||||||
|
],
|
||||||
|
3.5: ["PSM", "propensity.score.matching", "score.matching"],
|
||||||
|
4.0: ["IV", "instrumental.variable"],
|
||||||
|
4.5: ["RD", "regression.discontinuity"],
|
||||||
|
5.0: ["RCT", "randomi(?:s|z)ed.control.trial"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# TODO do not filter by quasi-/experimental, but analyse the whole df passed in
|
||||||
|
# This allows filtering to happen where it's needed but otherwise validity
|
||||||
|
# given for all studies passed in.
|
||||||
def calculate(
|
def calculate(
|
||||||
df: DataFrame, repr_col: str = "representativeness", method_col: str = "method"
|
df: DataFrame,
|
||||||
|
repr_col: str = "representativeness",
|
||||||
|
design_col: str = "design",
|
||||||
|
method_col: str = "method",
|
||||||
) -> DataFrame:
|
) -> DataFrame:
|
||||||
|
"""Add internal and external validities to a dataframe.
|
||||||
|
|
||||||
|
Requires a dataframe containing a study or observation per row, with a
|
||||||
|
single column describing the study design, method and representativeness
|
||||||
|
each respectively.
|
||||||
|
|
||||||
|
Takes a combination of study design (simulation/observational/
|
||||||
|
quasi-experimental/experimental/..) and its method (OLS/DID/RD/...) to
|
||||||
|
calculate an internal validity.
|
||||||
|
|
||||||
|
Takes a study representativeness (local/subnational/national/regional/
|
||||||
|
census) to calculate the external validity.
|
||||||
|
"""
|
||||||
EXT_COL_NAME: str = "external_validity"
|
EXT_COL_NAME: str = "external_validity"
|
||||||
INT_COL_NAME: str = "internal_validity"
|
INT_COL_NAME: str = "internal_validity"
|
||||||
cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
|
cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
|
||||||
|
|
||||||
vd = df[
|
# vd = df[
|
||||||
(df["design"] == "quasi-experimental") | (df["design"] == "experimental")
|
# (df[design_col] == "quasi-experimental") | (df[design_col] == "experimental")
|
||||||
].copy()
|
# ].copy()
|
||||||
vd.assign(**cols)
|
vd = df.assign(**cols)
|
||||||
vd = cast(DataFrame, vd)
|
vd = cast(DataFrame, vd)
|
||||||
|
|
||||||
vd[repr_col] = vd[repr_col].fillna("")
|
vd[repr_col] = vd[repr_col].fillna("")
|
||||||
|
@ -57,37 +93,11 @@ def calculate(
|
||||||
vd.loc[vd[repr_col].str.contains("local"), EXT_COL_NAME] = 2.0
|
vd.loc[vd[repr_col].str.contains("local"), EXT_COL_NAME] = 2.0
|
||||||
|
|
||||||
# needs to go lowest to highest in case of multiple mentioned approaches
|
# needs to go lowest to highest in case of multiple mentioned approaches
|
||||||
|
for rank, methods in METHOD_RANKINGS.items():
|
||||||
vd.loc[
|
vd.loc[
|
||||||
vd[method_col].str.contains(
|
vd[method_col].str.contains("|".join(methods)),
|
||||||
"|".join(["OLS", "ordinary.least.square", "logistic.regression"])
|
|
||||||
),
|
|
||||||
INT_COL_NAME,
|
INT_COL_NAME,
|
||||||
] = 2.0
|
] = rank
|
||||||
vd.loc[
|
|
||||||
vd[method_col].str.contains("|".join(["DM", "discontinuity.matching"])),
|
|
||||||
INT_COL_NAME,
|
|
||||||
] = 3.0
|
|
||||||
vd.loc[
|
|
||||||
vd[method_col].str.contains(
|
|
||||||
"|".join(["DID", "difference.in.diff", "diff.in.diff", "triple.diff"])
|
|
||||||
),
|
|
||||||
INT_COL_NAME,
|
|
||||||
] = 3.0
|
|
||||||
vd.loc[
|
|
||||||
vd[method_col].str.contains(
|
|
||||||
"|".join(["PSM", "propensity.score.matching", "score.matching"])
|
|
||||||
),
|
|
||||||
INT_COL_NAME,
|
|
||||||
] = 3.5
|
|
||||||
vd.loc[
|
|
||||||
vd[method_col].str.contains("|".join(["IV", "instrumental.variable"])),
|
|
||||||
INT_COL_NAME,
|
|
||||||
] = 4.0
|
|
||||||
vd.loc[
|
|
||||||
vd[method_col].str.contains("|".join(["RD", "regression.discontinuity"])),
|
|
||||||
INT_COL_NAME,
|
|
||||||
] = 4.5
|
|
||||||
vd.loc[vd[method_col].str.contains("RCT"), INT_COL_NAME] = 5.0
|
|
||||||
|
|
||||||
return vd
|
return vd
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue