feat(code): Update correct internal validity ranking regexes

This commit is contained in:
Marty Oehme 2024-02-21 11:30:38 +01:00
parent c2d20e46ec
commit cc9749a011
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A

View file

@ -4,7 +4,7 @@ from typing import cast
from pandas import DataFrame
METHOD_RANKINGS = {
2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
2.0: ["OLS", "ordinary.least.square", "logistic.regression", "fixed.effect"],
3.0: [
"DM",
"discontinuity.matching",
@ -14,19 +14,24 @@ METHOD_RANKINGS = {
"triple.diff",
],
3.5: ["PSM", "propensity.score.matching", "score.matching"],
4.0: ["IV", "instrumental.variable"],
4.0: [
"IV",
"instrumental.variable",
"method.of.moment",
"GMM",
"GEE",
"generali(?:s|z)ed.estimating",
],
4.5: ["RD", "regression.discontinuity"],
5.0: ["RCT", "randomi(?:s|z)ed.control.trial"],
}
# TODO do not filter by quasi-/experimental, but analyse the whole df passed in
# This allows filtering to happen where it's needed but otherwise validity
# given for all studies passed in.
def calculate(
df: DataFrame,
repr_col: str = "representativeness",
method_col: str = "method",
valid_design_col: str | None = None,
) -> DataFrame:
"""Add internal and external validities to a dataframe.
@ -36,15 +41,27 @@ def calculate(
Takes a combination of study design (simulation/observational/
quasi-experimental/experimental/..) and its method (OLS/DID/RD/...) to
calculate an internal validity.
calculate an internal validity. The column to use can be passed in as
method_col.
Takes a study representativeness (local/subnational/national/regional/
census) to calculate the external validity.
census) to calculate the external validity. The column to use can be
optionally passed in as repr_col.
Takes an optional valid_design_col to decide if it should only
include quasi-experimental/experimental designs in the calculation.
Pass in the column name of the design column to only include those
in calculations. Leave as None to calculate for all studies passed in.
"""
EXT_COL_NAME: str = "external_validity"
INT_COL_NAME: str = "internal_validity"
cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
if valid_design_col:
vd = df[
(df[valid_design_col] == "quasi-experimental")
| (df[valid_design_col] == "experimental")
].copy()
vd = df.assign(**cols)
vd = cast(DataFrame, vd)