feat(code): Allow custom strength of evidence bins

For display in findings summaries we can now allow arbitrary strength of evidence binning. We simply pass in a dict with the strength (as float) as the key and the string-representation that should appear in the table as value.
2024-02-18 16:57:39 +01:00 · 2024-02-18 16:57:39 +01:00 · b453afd112
commit b453afd112
parent 2b0fa5db7c
2 changed files with 74 additions and 39 deletions
--- a/src/model/validity.py
+++ b/src/model/validity.py
@ -3,36 +3,6 @@ from typing import cast

 from pandas import DataFrame

-
-def _binned_strength(strength: float) -> str:
-    if strength < 3.0:
-        return r"\-"
-    if strength < 6.0:
-        return r"\+"
-    return r"\++"
-
-
-def _combined_validities(
-    apply_to: DataFrame, by_intervention: DataFrame, column: str = "internal_validity"
-):
-    if not isinstance(apply_to, str):
-        return
-    combined = 0.0
-    for study in apply_to.split(";"):
-        if study not in by_intervention["citation"].unique():
-            print(
-                f"WARNING: Findings table {study} study did not match any study in interventions dataframe!"
-            )
-        new = by_intervention.loc[by_intervention["citation"] == study, column]
-        if len(new) == 0 or math.isnan(new.iat[0]):
-            continue
-        combined += new.iat[0]
-
-    if combined:
-        return _binned_strength(combined)
-    return r"\-"
-
-
 METHOD_RANKINGS = {
    2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
    3.0: [
@ -56,7 +26,6 @@ METHOD_RANKINGS = {
 def calculate(
    df: DataFrame,
    repr_col: str = "representativeness",
-    design_col: str = "design",
    method_col: str = "method",
 ) -> DataFrame:
    """Add internal and external validities to a dataframe.
@ -76,9 +45,6 @@ def calculate(
    INT_COL_NAME: str = "internal_validity"
    cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}

-    # vd = df[
-    #     (df[design_col] == "quasi-experimental") | (df[design_col] == "experimental")
-    # ].copy()
    vd = df.assign(**cols)
    vd = cast(DataFrame, vd)

@ -103,8 +69,23 @@ def calculate(


 def add_to_findings(
-    findings_df: DataFrame, studies_by_intervention: DataFrame
+    findings_df: DataFrame,
+    studies_by_intervention: DataFrame,
+    strength_bins: dict[float, str] | None = None,
 ) -> DataFrame:
+    """Returns summary of findings with validities added.
+
+    Requires a 'findings' dataframe with at least a 'citation' column,
+    containing the bibtex keys of studies containing the findings
+    (semicolon-separated without spaces).
+    Then, another dataframe with a row per study is required containing
+    the study's 'method' and 'representativeness', in columns
+    named respectively.
+
+    Returns the correct bin to put the validity in, using the
+    optionally passed in bins dictionary or the default 0 (weak) ->
+    5 (evidence) -> 10 (strong) bins.
+    """
    valid_subset = (
        calculate(studies_by_intervention)[
            ["internal_validity", "external_validity", "citation"]
@ -115,11 +96,54 @@ def add_to_findings(
    )

    def apply_internal(df):
-        return _combined_validities(df, valid_subset, "internal_validity")
+        return _combined_validities(
+            df, valid_subset, "internal_validity", strength_bins
+        )

    def apply_external(df):
-        return _combined_validities(df, valid_subset, "external_validity")
+        return _combined_validities(
+            df, valid_subset, "external_validity", strength_bins
+        )

    findings_df["internal_validity"] = findings_df["studies"].apply(apply_internal)
    findings_df["external_validity"] = findings_df["studies"].apply(apply_external)
    return findings_df
+
+
+DEFAULT_BINS = {
+    0.0: r"\-",
+    1.0: r"\+",
+    1.5: r"\++",
+}
+
+
+def _combined_validities(
+    apply_to: DataFrame,
+    by_intervention: DataFrame,
+    column: str = "internal_validity",
+    strength_bins: dict[float, str] | None = None,
+):
+    if not isinstance(apply_to, str):
+        return
+    combined = 0.0
+    for study in apply_to.split(";"):
+        if study not in by_intervention["citation"].unique():
+            print(
+                f"WARNING: Findings table {study} study did not match any study in interventions dataframe!"
+            )
+        new = by_intervention.loc[by_intervention["citation"] == study, column]
+        if len(new) == 0 or math.isnan(new.iat[0]):
+            continue
+        combined += new.iat[0]
+
+    if combined:
+        return _binned_strength(combined, bins=strength_bins or DEFAULT_BINS)
+    return r"\-"
+
+
+def _binned_strength(strength: float, bins: dict[float, str]) -> str:
+    bin = ""
+    for val, txt in sorted(bins.items()):
+        if strength >= val:
+            bin = txt
+    return bin