From cc9749a0117c5529179618ad595581aadf298dce Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Wed, 21 Feb 2024 11:30:38 +0100 Subject: [PATCH] feat(code): Update correct internal validity ranking regexes --- src/model/validity.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/model/validity.py b/src/model/validity.py index 9f5788c..401ed29 100644 --- a/src/model/validity.py +++ b/src/model/validity.py @@ -4,7 +4,7 @@ from typing import cast from pandas import DataFrame METHOD_RANKINGS = { - 2.0: ["OLS", "ordinary.least.square", "logistic.regression"], + 2.0: ["OLS", "ordinary.least.square", "logistic.regression", "fixed.effect"], 3.0: [ "DM", "discontinuity.matching", @@ -14,19 +14,24 @@ METHOD_RANKINGS = { "triple.diff", ], 3.5: ["PSM", "propensity.score.matching", "score.matching"], - 4.0: ["IV", "instrumental.variable"], + 4.0: [ + "IV", + "instrumental.variable", + "method.of.moment", + "GMM", + "GEE", + "generali(?:s|z)ed.estimating", + ], 4.5: ["RD", "regression.discontinuity"], 5.0: ["RCT", "randomi(?:s|z)ed.control.trial"], } -# TODO do not filter by quasi-/experimental, but analyse the whole df passed in -# This allows filtering to happen where it's needed but otherwise validity -# given for all studies passed in. def calculate( df: DataFrame, repr_col: str = "representativeness", method_col: str = "method", + valid_design_col: str | None = None, ) -> DataFrame: """Add internal and external validities to a dataframe. @@ -36,15 +41,27 @@ def calculate( Takes a combination of study design (simulation/observational/ quasi-experimental/experimental/..) and its method (OLS/DID/RD/...) to - calculate an internal validity. + calculate an internal validity. The column to use can be passed in as + method_col. Takes a study representativeness (local/subnational/national/regional/ - census) to calculate the external validity. + census) to calculate the external validity. The column to use can be + optionally passed in as repr_col. + + Takes an optional valid_design_col to decide if it should only + include quasi-experimental/experimental designs in the calculation. + Pass in the column name of the design column to only include those + in calculations. Leave as None to calculate for all studies passed in. """ EXT_COL_NAME: str = "external_validity" INT_COL_NAME: str = "internal_validity" cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0} + if valid_design_col: + vd = df[ + (df[valid_design_col] == "quasi-experimental") + | (df[valid_design_col] == "experimental") + ].copy() vd = df.assign(**cols) vd = cast(DataFrame, vd)