From 953720ce5473f4953881c4c7e4b68f0bc192944d Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Thu, 15 Feb 2024 16:32:46 +0100 Subject: [PATCH] refactor(code): Move finding table validities into model module All findings tables can use the validities functionality to add strength of evidence (internal/external) to themselves. Generalized the function to work for any main findings csv (to dataframe) table not just institional findings. --- scoping_review.qmd | 26 +++------------- src/model/strength_of_findings.py | 51 +++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 22 deletions(-) create mode 100644 src/model/strength_of_findings.py diff --git a/scoping_review.qmd b/scoping_review.qmd index 7b41996..dfb3c54 100644 --- a/scoping_review.qmd +++ b/scoping_review.qmd @@ -631,30 +631,12 @@ g = sns.PairGrid(validities[["internal_validity", "external_validity", "identifi ```{python} #| label: tbl-findings-institutional #| tbl-cap: Main findings summary institutional policies +from src.model import strength_of_findings as findings + findings_institutional = pd.read_csv("02-data/supplementary/findings-institutional.csv") -from src import prep_data -import math +fd_df = findings.add_validities(findings_institutional, by_intervention) -EVIDENCE_STRENGH=["\-","\-","\-","\+","\+","\+","++","++","++","++","++","++","++","++","++","++"] - -valid_subset = prep_data.calculate_validities(by_intervention)[["internal_validity", "external_validity", "citation"]].fillna(1.0).drop_duplicates(subset=["citation"]).sort_values("internal_validity") -def combined_validities(df_in, column: str = "internal_validity"): - if not isinstance(df_in, str): - return - combined = 0.0 - for study in df_in.split(";"): - new = valid_subset.loc[valid_subset["citation"] == study, column] - if len(new) > 0 and not math.isnan(new.iat[0]): - combined += new.iat[0] - if combined: - return EVIDENCE_STRENGH[int(combined)] + f" ({str(combined)})" - return "\-" -def combined_external(df_in, column: str = "external_validity"): - return combined_validities(df_in, column) - -findings_institutional["internal_validity"] = findings_institutional["studies"].apply(combined_validities) -findings_institutional["external_validity"] = findings_institutional["studies"].apply(combined_external) -md(tabulate(findings_institutional[["area of policy", "internal_validity", "external_validity", "findings", "channels"]].fillna(""), showindex=False, headers="keys", tablefmt="grid")) +md(tabulate(fd_df[["area of policy", "internal_validity", "external_validity", "findings", "channels"]].fillna(""), showindex=False, headers="keys", tablefmt="grid")) ``` {{< landscape >}} diff --git a/src/model/strength_of_findings.py b/src/model/strength_of_findings.py new file mode 100644 index 0000000..f078cd6 --- /dev/null +++ b/src/model/strength_of_findings.py @@ -0,0 +1,51 @@ +import math + +from pandas import DataFrame + +from src import prep_data + + +def _binned_strength(strength: float) -> str: + if strength < 3.0: + return r"\-" + if strength < 6.0: + return r"\+" + return r"\++" + + +def _combined_validities( + apply_to: DataFrame, by_intervention: DataFrame, column: str = "internal_validity" +): + if not isinstance(apply_to, str): + return + combined = 0.0 + for study in apply_to.split(";"): + new = by_intervention.loc[by_intervention["citation"] == study, column] + if len(new) > 0 and not math.isnan(new.iat[0]): + combined += new.iat[0] + if combined: + return _binned_strength(combined) + return r"\-" + + +def add_validities( + findings_df: DataFrame, studies_by_intervention: DataFrame +) -> DataFrame: + valid_subset = ( + prep_data.calculate_validities(studies_by_intervention)[ + ["internal_validity", "external_validity", "citation"] + ] + .fillna(1.0) + .drop_duplicates(subset=["citation"]) + .sort_values("internal_validity") + ) + + def apply_internal(df): + return _combined_validities(df, valid_subset, "internal_validity") + + def apply_external(df): + return _combined_validities(df, valid_subset, "external_validity") + + findings_df["internal_validity"] = findings_df["studies"].apply(apply_internal) + findings_df["external_validity"] = findings_df["studies"].apply(apply_external) + return findings_df