import re import pandas as pd from bibtexparser import Library import src.globals as g from src.extract import load_data as load from src.model import validity ## Creates 3 important data structures: # df: The main dataframe containing all final sample studies # df_by_intervention: The same dataframe but split up by individual interventions per study # validities: The studies with their validities, containing only quasi-/experimental studies # the complete library of sampled (and working) literature def bib_sample() -> Library: return meta.bib_library_from_file(g.REFERENCE_DATA.joinpath("zotero-library.bib")) # raw database-search results def bib_sample_db_raw() -> Library: return meta.bib_library_from_dir(g.REFERENCE_DATA.joinpath("db")) def df_main() -> pd.DataFrame: df = meta.observations_with_metadata_df( raw_observations=load.from_yml(g.EXTRACTED_DATA), study_metadata=meta.bib_metadata_df(bib_sample()), country_groups=meta.country_groups_df( g.SUPPLEMENTARY_DATA.joinpath("wb-country-groupings.xlsx") ), ) return df from src.process import add_metadata as meta def df_by_intervention() -> pd.DataFrame: df_by_intervention = ( df_main() .fillna("") .groupby( [ "author", "year", "title", "design", "method", "representativeness", "citation", ] ) .agg( { "intervention": lambda _col: "; ".join(_col), } ) .reset_index() .drop_duplicates() .assign( intervention=lambda _df: _df["intervention"].apply( lambda _cell: set( [x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")] ) ), ) .explode("intervention") ) return df_by_intervention def df_validities() -> pd.DataFrame: validities = validity.calculate(df_by_intervention()) validities["identifier"] = ( validities["author"].str.replace(r",.*$", "", regex=True) + " (" + validities["year"].astype(str) + ")" ) validities = validities.loc[ (validities["design"] == "quasi-experimental") | (validities["design"] == "experimental") ] # validities["external_validity"] = validities["external_validity"].astype('category') validities["internal_validity"] = validities["internal_validity"].astype("category") validities["External Validity"] = validities["external_validity"] validities["Internal Validity"] = validities["internal_validity"] return validities