From 3f44b0d7108dc74843010168cdfb68796252d397 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Thu, 21 Dec 2023 16:13:25 +0100 Subject: [PATCH] fix(script): Correctly map regions to studies Correctly mapping multiple countries to multiple regions or income groups. --- scoping_review.qmd | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/scoping_review.qmd b/scoping_review.qmd index cb95cb6..f7bab5c 100644 --- a/scoping_review.qmd +++ b/scoping_review.qmd @@ -72,7 +72,24 @@ zot_df = pd.DataFrame([ # Add WB country grouping definitions (income group, world region) WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() -df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy") +df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['World'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy") + +def countries_to_regions(countries:str): + res = set() + for c in countries.replace(" ;", ";").replace("; ",";").split(";"): + if c in df_country_groups.index: + region = df_country_groups.at[c,'Region'] + res.add(region) + return ";".join(res) + +def countries_to_income_groups(countries:str): + res = set() + for c in countries.replace(" ;", ";").replace("; ",";").split(";"): + if c in df_country_groups.index: + region = df_country_groups.at[c,'Income group'] + res.add(region) + return ";".join(res) + bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant") .assign( @@ -82,8 +99,8 @@ bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant") zot_keywords=lambda _df: _df["doi"].map(zot_df["keywords"]), date = lambda _df: pd.to_datetime(_df["year"], format="%Y"), year = lambda _df: _df["date"].dt.year, - region = lambda _df: _df["country"].map(df_country_groups["Region"]), - income_group = lambda _df: _df["country"].map(df_country_groups["Income group"]), + region = lambda _df: _df["country"].map(countries_to_regions), + income_group = lambda _df: _df["country"].map(countries_to_income_groups), ) .query("year >= 2000") )