feat(code): Update correct internal validity ranking regexes

2024-02-21 11:30:38 +01:00 · 2024-02-21 11:30:38 +01:00 · cc9749a011
commit cc9749a011
parent c2d20e46ec
1 changed files with 24 additions and 7 deletions
--- a/src/model/validity.py
+++ b/src/model/validity.py
@ -4,7 +4,7 @@ from typing import cast
 from pandas import DataFrame

 METHOD_RANKINGS = {
-    2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
+    2.0: ["OLS", "ordinary.least.square", "logistic.regression", "fixed.effect"],
    3.0: [
        "DM",
        "discontinuity.matching",
@ -14,19 +14,24 @@ METHOD_RANKINGS = {
        "triple.diff",
    ],
    3.5: ["PSM", "propensity.score.matching", "score.matching"],
-    4.0: ["IV", "instrumental.variable"],
+    4.0: [
+        "IV",
+        "instrumental.variable",
+        "method.of.moment",
+        "GMM",
+        "GEE",
+        "generali(?:s|z)ed.estimating",
+    ],
    4.5: ["RD", "regression.discontinuity"],
    5.0: ["RCT", "randomi(?:s|z)ed.control.trial"],
 }


-# TODO do not filter by quasi-/experimental, but analyse the whole df passed in
-#      This allows filtering to happen where it's needed but otherwise validity
-#      given for all studies passed in.
 def calculate(
    df: DataFrame,
    repr_col: str = "representativeness",
    method_col: str = "method",
+    valid_design_col: str | None = None,
 ) -> DataFrame:
    """Add internal and external validities to a dataframe.

@ -36,15 +41,27 @@ def calculate(

    Takes a combination of study design (simulation/observational/
    quasi-experimental/experimental/..) and its method (OLS/DID/RD/...) to
-    calculate an internal validity.
+    calculate an internal validity. The column to use can be passed in as
+    method_col.

    Takes a study representativeness (local/subnational/national/regional/
-    census) to calculate the external validity.
+    census) to calculate the external validity. The column to use can be
+    optionally passed in as repr_col.
+
+    Takes an optional valid_design_col to decide if it should only
+    include quasi-experimental/experimental designs in the calculation.
+    Pass in the column name of the design column to only include those
+    in calculations. Leave as None to calculate for all studies passed in.
    """
    EXT_COL_NAME: str = "external_validity"
    INT_COL_NAME: str = "internal_validity"
    cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}

+    if valid_design_col:
+        vd = df[
+            (df[valid_design_col] == "quasi-experimental")
+            | (df[valid_design_col] == "experimental")
+        ].copy()
    vd = df.assign(**cols)
    vd = cast(DataFrame, vd)