chore(code): Refactor data preparation into process module
This commit is contained in:
parent
562b1eb6a0
commit
d2c25a9033
2 changed files with 7 additions and 4 deletions
|
@ -18,8 +18,12 @@ RAW_DATA=DATA_DIR.joinpath("raw")
|
||||||
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
||||||
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
||||||
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
||||||
|
## Creates 3 important data structures:
|
||||||
|
# df: The main dataframe containing all final sample studies
|
||||||
|
# df_by_intervention: The same dataframe but split up by individual interventions per study
|
||||||
|
# validities: The studies with their validities, containing only quasi-/experimental studies
|
||||||
|
|
||||||
from src import prep_data
|
from src.process import add_metadata as meta
|
||||||
|
|
||||||
# raw database-search results
|
# raw database-search results
|
||||||
bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA)
|
bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA)
|
||||||
|
@ -27,13 +31,13 @@ bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA)
|
||||||
bib_sample = prep_data.bib_library_from_dir(WORKING_DATA)
|
bib_sample = prep_data.bib_library_from_dir(WORKING_DATA)
|
||||||
|
|
||||||
# load relevant studies
|
# load relevant studies
|
||||||
from src import load_data
|
from src.extract import load_data as load
|
||||||
|
|
||||||
# each observation in a single dataframe
|
# each observation in a single dataframe
|
||||||
df = prep_data.observations_with_metadata_df(
|
|
||||||
raw_observations = load_data.from_yml(PROCESSED_DATA),
|
raw_observations = load_data.from_yml(PROCESSED_DATA),
|
||||||
study_metadata = prep_data.bib_metadata_df(bib_sample),
|
study_metadata = prep_data.bib_metadata_df(bib_sample),
|
||||||
country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
||||||
|
df = meta.observations_with_metadata_df(
|
||||||
)
|
)
|
||||||
|
|
||||||
# all observations but split per individual intervention
|
# all observations but split per individual intervention
|
||||||
|
@ -66,4 +70,3 @@ validities = validities.loc[(validities["design"] == "quasi-experimental") | (va
|
||||||
validities["internal_validity"] = validities["internal_validity"].astype('category')
|
validities["internal_validity"] = validities["internal_validity"].astype('category')
|
||||||
validities["External Validity"] = validities["external_validity"]
|
validities["External Validity"] = validities["external_validity"]
|
||||||
validities["Internal Validity"] = validities["internal_validity"]
|
validities["Internal Validity"] = validities["internal_validity"]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue