feat(script): Move all data transformations to single chunk
This commit is contained in:
parent
ed6c8550b6
commit
ad71859ded
2 changed files with 84 additions and 51 deletions
|
|
@ -27,11 +27,41 @@ bib_sample = prep_data.bib_library_from_dir(WORKING_DATA)
|
|||
# load relevant studies
|
||||
from src import load_data
|
||||
|
||||
bib_df = prep_data.observations_with_metadata_df(
|
||||
# each observation in a single dataframe
|
||||
df = prep_data.observations_with_metadata_df(
|
||||
raw_observations = load_data.from_yml(PROCESSED_DATA),
|
||||
study_metadata = prep_data.bib_metadata_df(bib_sample),
|
||||
country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
||||
)
|
||||
raw_observations = None
|
||||
zot_df = None
|
||||
df_country_groups = None
|
||||
|
||||
# all observations but split per individual intervention
|
||||
df_by_intervention = (
|
||||
df
|
||||
.fillna("")
|
||||
.groupby(["author", "year", "title", "design", "method", "representativeness", "citation"])
|
||||
.agg(
|
||||
{
|
||||
"intervention": lambda _col: "; ".join(_col),
|
||||
}
|
||||
)
|
||||
.reset_index()
|
||||
.drop_duplicates()
|
||||
.assign(
|
||||
intervention=lambda _df: _df["intervention"].apply(
|
||||
lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")])
|
||||
),
|
||||
)
|
||||
.explode("intervention")
|
||||
)
|
||||
|
||||
# Calc study validities (internal & external separated)
|
||||
from src.model import validity
|
||||
|
||||
validities = validity.calculate(df_by_intervention)
|
||||
validities["identifier"] = validities["author"].str.replace(r',.*$', '', regex=True) + " (" + validities["year"].astype(str) + ")"
|
||||
validities = validities.loc[(validities["design"] == "quasi-experimental") | (validities["design"] == "experimental")]
|
||||
#validities["external_validity"] = validities["external_validity"].astype('category')
|
||||
validities["internal_validity"] = validities["internal_validity"].astype('category')
|
||||
validities["External Validity"] = validities["external_validity"]
|
||||
validities["Internal Validity"] = validities["internal_validity"]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue