from pathlib import Path import os import re ## standard imports from IPython.display import display, Markdown, HTML import numpy as np import pandas as pd from matplotlib import pyplot as plt import seaborn as sns from tabulate import tabulate import bibtexparser sns.set_style("whitegrid") PROJECT_DIR=Path(os.getenv("QUARTO_PROJECT_DIR", ".")) DATA_DIR=PROJECT_DIR.joinpath("02-data") RAW_DATA=DATA_DIR.joinpath("raw") WORKING_DATA=DATA_DIR.joinpath("intermediate") PROCESSED_DATA=DATA_DIR.joinpath("processed") SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary") from src import prep_data # raw database-search results bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA) # the complete library of sampled (and working) literature bib_sample = prep_data.bib_library_from_dir(WORKING_DATA) # load relevant studies from src import load_data # each observation in a single dataframe df = prep_data.observations_with_metadata_df( raw_observations = load_data.from_yml(PROCESSED_DATA), study_metadata = prep_data.bib_metadata_df(bib_sample), country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")), ) # all observations but split per individual intervention df_by_intervention = ( df .fillna("") .groupby(["author", "year", "title", "design", "method", "representativeness", "citation"]) .agg( { "intervention": lambda _col: "; ".join(_col), } ) .reset_index() .drop_duplicates() .assign( intervention=lambda _df: _df["intervention"].apply( lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")]) ), ) .explode("intervention") ) # Calc study validities (internal & external separated) from src.model import validity validities = validity.calculate(df_by_intervention) validities["identifier"] = validities["author"].str.replace(r',.*$', '', regex=True) + " (" + validities["year"].astype(str) + ")" validities = validities.loc[(validities["design"] == "quasi-experimental") | (validities["design"] == "experimental")] #validities["external_validity"] = validities["external_validity"].astype('category') validities["internal_validity"] = validities["internal_validity"].astype('category') validities["External Validity"] = validities["external_validity"] validities["Internal Validity"] = validities["internal_validity"]