From d2c25a90339b3d7adb509849f656c4fecf5cb3ed Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Mon, 15 Jul 2024 20:42:11 +0200 Subject: [PATCH] chore(code): Refactor data preparation into process module --- 01-codechunks/_prep-data.py | 11 +++++++---- src/{prep_data.py => process/add_metadata.py} | 0 2 files changed, 7 insertions(+), 4 deletions(-) rename src/{prep_data.py => process/add_metadata.py} (100%) diff --git a/01-codechunks/_prep-data.py b/01-codechunks/_prep-data.py index 0ad0d61..a2ea1d8 100644 --- a/01-codechunks/_prep-data.py +++ b/01-codechunks/_prep-data.py @@ -18,8 +18,12 @@ RAW_DATA=DATA_DIR.joinpath("raw") WORKING_DATA=DATA_DIR.joinpath("intermediate") PROCESSED_DATA=DATA_DIR.joinpath("processed") SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary") +## Creates 3 important data structures: +# df: The main dataframe containing all final sample studies +# df_by_intervention: The same dataframe but split up by individual interventions per study +# validities: The studies with their validities, containing only quasi-/experimental studies -from src import prep_data +from src.process import add_metadata as meta # raw database-search results bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA) @@ -27,13 +31,13 @@ bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA) bib_sample = prep_data.bib_library_from_dir(WORKING_DATA) # load relevant studies -from src import load_data +from src.extract import load_data as load # each observation in a single dataframe -df = prep_data.observations_with_metadata_df( raw_observations = load_data.from_yml(PROCESSED_DATA), study_metadata = prep_data.bib_metadata_df(bib_sample), country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")), +df = meta.observations_with_metadata_df( ) # all observations but split per individual intervention @@ -66,4 +70,3 @@ validities = validities.loc[(validities["design"] == "quasi-experimental") | (va validities["internal_validity"] = validities["internal_validity"].astype('category') validities["External Validity"] = validities["external_validity"] validities["Internal Validity"] = validities["internal_validity"] - diff --git a/src/prep_data.py b/src/process/add_metadata.py similarity index 100% rename from src/prep_data.py rename to src/process/add_metadata.py