feat(code): Update correct internal validity ranking regexes

2024-02-21 11:30:38 +01:00 · 2024-02-21 11:30:38 +01:00 · cc9749a011
commit cc9749a011
parent c2d20e46ec
1 changed files with 24 additions and 7 deletions
--- a/src/model/validity.py
+++ b/src/model/validity.py
@ -4,7 +4,7 @@ from typing import cast
 from pandas import DataFrame
 METHOD_RANKINGS = {
-    2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
+    2.0: ["OLS", "ordinary.least.square", "logistic.regression", "fixed.effect"],
    3.0: [
        "DM",
        "discontinuity.matching",
@ -14,19 +14,24 @@ METHOD_RANKINGS = {
        "triple.diff",
    ],
    3.5: ["PSM", "propensity.score.matching", "score.matching"],
-    4.0: ["IV", "instrumental.variable"],
+    4.0: [
        "IV",
        "instrumental.variable",
        "method.of.moment",
        "GMM",
        "GEE",
        "generali(?:s|z)ed.estimating",
    ],
    4.5: ["RD", "regression.discontinuity"],
    5.0: ["RCT", "randomi(?:s|z)ed.control.trial"],
 }
 # TODO do not filter by quasi-/experimental, but analyse the whole df passed in
 #      This allows filtering to happen where it's needed but otherwise validity
 #      given for all studies passed in.
 def calculate(
    df: DataFrame,
    repr_col: str = "representativeness",
    method_col: str = "method",
    valid_design_col: str | None = None,
 ) -> DataFrame:
    """Add internal and external validities to a dataframe.
@ -36,15 +41,27 @@ def calculate(
    Takes a combination of study design (simulation/observational/
    quasi-experimental/experimental/..) and its method (OLS/DID/RD/...) to
-    calculate an internal validity.
+    calculate an internal validity. The column to use can be passed in as
    method_col.
    Takes a study representativeness (local/subnational/national/regional/
-    census) to calculate the external validity.
+    census) to calculate the external validity. The column to use can be
    optionally passed in as repr_col.
    Takes an optional valid_design_col to decide if it should only
    include quasi-experimental/experimental designs in the calculation.
    Pass in the column name of the design column to only include those
    in calculations. Leave as None to calculate for all studies passed in.
    """
    EXT_COL_NAME: str = "external_validity"
    INT_COL_NAME: str = "internal_validity"
    cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
    if valid_design_col:
        vd = df[
            (df[valid_design_col] == "quasi-experimental")
            | (df[valid_design_col] == "experimental")
        ].copy()
    vd = df.assign(**cols)
    vd = cast(DataFrame, vd)