feat(code): Allow custom strength of evidence bins
For display in findings summaries we can now allow arbitrary strength of evidence binning. We simply pass in a dict with the strength (as float) as the key and the string-representation that should appear in the table as value.
This commit is contained in:
parent
2b0fa5db7c
commit
b453afd112
2 changed files with 74 additions and 39 deletions
|
|
@ -3,36 +3,6 @@ from typing import cast
|
|||
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
def _binned_strength(strength: float) -> str:
|
||||
if strength < 3.0:
|
||||
return r"\-"
|
||||
if strength < 6.0:
|
||||
return r"\+"
|
||||
return r"\++"
|
||||
|
||||
|
||||
def _combined_validities(
|
||||
apply_to: DataFrame, by_intervention: DataFrame, column: str = "internal_validity"
|
||||
):
|
||||
if not isinstance(apply_to, str):
|
||||
return
|
||||
combined = 0.0
|
||||
for study in apply_to.split(";"):
|
||||
if study not in by_intervention["citation"].unique():
|
||||
print(
|
||||
f"WARNING: Findings table {study} study did not match any study in interventions dataframe!"
|
||||
)
|
||||
new = by_intervention.loc[by_intervention["citation"] == study, column]
|
||||
if len(new) == 0 or math.isnan(new.iat[0]):
|
||||
continue
|
||||
combined += new.iat[0]
|
||||
|
||||
if combined:
|
||||
return _binned_strength(combined)
|
||||
return r"\-"
|
||||
|
||||
|
||||
METHOD_RANKINGS = {
|
||||
2.0: ["OLS", "ordinary.least.square", "logistic.regression"],
|
||||
3.0: [
|
||||
|
|
@ -56,7 +26,6 @@ METHOD_RANKINGS = {
|
|||
def calculate(
|
||||
df: DataFrame,
|
||||
repr_col: str = "representativeness",
|
||||
design_col: str = "design",
|
||||
method_col: str = "method",
|
||||
) -> DataFrame:
|
||||
"""Add internal and external validities to a dataframe.
|
||||
|
|
@ -76,9 +45,6 @@ def calculate(
|
|||
INT_COL_NAME: str = "internal_validity"
|
||||
cols = {EXT_COL_NAME: 0.0, INT_COL_NAME: 0.0}
|
||||
|
||||
# vd = df[
|
||||
# (df[design_col] == "quasi-experimental") | (df[design_col] == "experimental")
|
||||
# ].copy()
|
||||
vd = df.assign(**cols)
|
||||
vd = cast(DataFrame, vd)
|
||||
|
||||
|
|
@ -103,8 +69,23 @@ def calculate(
|
|||
|
||||
|
||||
def add_to_findings(
|
||||
findings_df: DataFrame, studies_by_intervention: DataFrame
|
||||
findings_df: DataFrame,
|
||||
studies_by_intervention: DataFrame,
|
||||
strength_bins: dict[float, str] | None = None,
|
||||
) -> DataFrame:
|
||||
"""Returns summary of findings with validities added.
|
||||
|
||||
Requires a 'findings' dataframe with at least a 'citation' column,
|
||||
containing the bibtex keys of studies containing the findings
|
||||
(semicolon-separated without spaces).
|
||||
Then, another dataframe with a row per study is required containing
|
||||
the study's 'method' and 'representativeness', in columns
|
||||
named respectively.
|
||||
|
||||
Returns the correct bin to put the validity in, using the
|
||||
optionally passed in bins dictionary or the default 0 (weak) ->
|
||||
5 (evidence) -> 10 (strong) bins.
|
||||
"""
|
||||
valid_subset = (
|
||||
calculate(studies_by_intervention)[
|
||||
["internal_validity", "external_validity", "citation"]
|
||||
|
|
@ -115,11 +96,54 @@ def add_to_findings(
|
|||
)
|
||||
|
||||
def apply_internal(df):
|
||||
return _combined_validities(df, valid_subset, "internal_validity")
|
||||
return _combined_validities(
|
||||
df, valid_subset, "internal_validity", strength_bins
|
||||
)
|
||||
|
||||
def apply_external(df):
|
||||
return _combined_validities(df, valid_subset, "external_validity")
|
||||
return _combined_validities(
|
||||
df, valid_subset, "external_validity", strength_bins
|
||||
)
|
||||
|
||||
findings_df["internal_validity"] = findings_df["studies"].apply(apply_internal)
|
||||
findings_df["external_validity"] = findings_df["studies"].apply(apply_external)
|
||||
return findings_df
|
||||
|
||||
|
||||
DEFAULT_BINS = {
|
||||
0.0: r"\-",
|
||||
1.0: r"\+",
|
||||
1.5: r"\++",
|
||||
}
|
||||
|
||||
|
||||
def _combined_validities(
|
||||
apply_to: DataFrame,
|
||||
by_intervention: DataFrame,
|
||||
column: str = "internal_validity",
|
||||
strength_bins: dict[float, str] | None = None,
|
||||
):
|
||||
if not isinstance(apply_to, str):
|
||||
return
|
||||
combined = 0.0
|
||||
for study in apply_to.split(";"):
|
||||
if study not in by_intervention["citation"].unique():
|
||||
print(
|
||||
f"WARNING: Findings table {study} study did not match any study in interventions dataframe!"
|
||||
)
|
||||
new = by_intervention.loc[by_intervention["citation"] == study, column]
|
||||
if len(new) == 0 or math.isnan(new.iat[0]):
|
||||
continue
|
||||
combined += new.iat[0]
|
||||
|
||||
if combined:
|
||||
return _binned_strength(combined, bins=strength_bins or DEFAULT_BINS)
|
||||
return r"\-"
|
||||
|
||||
|
||||
def _binned_strength(strength: float, bins: dict[float, str]) -> str:
|
||||
bin = ""
|
||||
for val, txt in sorted(bins.items()):
|
||||
if strength >= val:
|
||||
bin = txt
|
||||
return bin
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue