feat(code): Allow custom strength of evidence bins

For display in findings summaries we can now allow arbitrary strength
of evidence binning. We simply pass in a dict with the strength (as
float) as the key and the string-representation that should appear
in the table as value.
This commit is contained in:
Marty Oehme 2024-02-18 16:57:39 +01:00
parent 2b0fa5db7c
commit b453afd112
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
2 changed files with 74 additions and 39 deletions

View file

@ -3,36 +3,6 @@ from typing import cast
from pandas import DataFrame
def _binned_strength(strength: float) -> str:
if strength < 3.0:
return r"\-"
if strength < 6.0:
return r"\+"
return r"\++"
def _combined_validities(
apply_to: DataFrame, by_intervention: DataFrame, column: str = "internal_validity"
):
if not isinstance(apply_to, str):
return
combined = 0.0
for study in apply_to.split(";"):
if study not in by_intervention["citation"].unique():
print(
f"WARNING: Findings table {study} study did not match any study in interventions dataframe!"
)
new = by_intervention.loc[by_intervention["citation"] == study, column]
if len(new) == 0 or math.isnan(new.iat[0]):
continue
combined += new.iat[0]
if combined:
return _binned_strength(combined)
return r"\-"
METHOD_RANKINGS = {
2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
3.0: [
@ -56,7 +26,6 @@ METHOD_RANKINGS = {
def calculate(
df: DataFrame,
repr_col: str = "representativeness",
design_col: str = "design",
method_col: str = "method",
) -> DataFrame:
"""Add internal and external validities to a dataframe.
@ -76,9 +45,6 @@ def calculate(
INT_COL_NAME: str = "internal_validity"
cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
# vd = df[
# (df[design_col] == "quasi-experimental") | (df[design_col] == "experimental")
# ].copy()
vd = df.assign(**cols)
vd = cast(DataFrame, vd)
@ -103,8 +69,23 @@ def calculate(
def add_to_findings(
findings_df: DataFrame, studies_by_intervention: DataFrame
findings_df: DataFrame,
studies_by_intervention: DataFrame,
strength_bins: dict[float, str] | None = None,
) -> DataFrame:
"""Returns summary of findings with validities added.
Requires a 'findings' dataframe with at least a 'citation' column,
containing the bibtex keys of studies containing the findings
(semicolon-separated without spaces).
Then, another dataframe with a row per study is required containing
the study's 'method' and 'representativeness', in columns
named respectively.
Returns the correct bin to put the validity in, using the
optionally passed in bins dictionary or the default 0 (weak) ->
5 (evidence) -> 10 (strong) bins.
"""
valid_subset = (
calculate(studies_by_intervention)[
["internal_validity", "external_validity", "citation"]
@ -115,11 +96,54 @@ def add_to_findings(
)
def apply_internal(df):
return _combined_validities(df, valid_subset, "internal_validity")
return _combined_validities(
df, valid_subset, "internal_validity", strength_bins
)
def apply_external(df):
return _combined_validities(df, valid_subset, "external_validity")
return _combined_validities(
df, valid_subset, "external_validity", strength_bins
)
findings_df["internal_validity"] = findings_df["studies"].apply(apply_internal)
findings_df["external_validity"] = findings_df["studies"].apply(apply_external)
return findings_df
DEFAULT_BINS = {
0.0: r"\-",
1.0: r"\+",
1.5: r"\++",
}
def _combined_validities(
apply_to: DataFrame,
by_intervention: DataFrame,
column: str = "internal_validity",
strength_bins: dict[float, str] | None = None,
):
if not isinstance(apply_to, str):
return
combined = 0.0
for study in apply_to.split(";"):
if study not in by_intervention["citation"].unique():
print(
f"WARNING: Findings table {study} study did not match any study in interventions dataframe!"
)
new = by_intervention.loc[by_intervention["citation"] == study, column]
if len(new) == 0 or math.isnan(new.iat[0]):
continue
combined += new.iat[0]
if combined:
return _binned_strength(combined, bins=strength_bins or DEFAULT_BINS)
return r"\-"
def _binned_strength(strength: float, bins: dict[float, str]) -> str:
bin = ""
for val, txt in sorted(bins.items()):
if strength >= val:
bin = txt
return bin