fix(script): Correctly map regions to studies
Correctly mapping multiple countries to multiple regions or income groups.
This commit is contained in:
parent
8bab7256e1
commit
3f44b0d710
1 changed files with 20 additions and 3 deletions
|
@ -72,7 +72,24 @@ zot_df = pd.DataFrame([
|
|||
|
||||
# Add WB country grouping definitions (income group, world region)
|
||||
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||
df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['World'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy")
|
||||
|
||||
def countries_to_regions(countries:str):
|
||||
res = set()
|
||||
for c in countries.replace(" ;", ";").replace("; ",";").split(";"):
|
||||
if c in df_country_groups.index:
|
||||
region = df_country_groups.at[c,'Region']
|
||||
res.add(region)
|
||||
return ";".join(res)
|
||||
|
||||
def countries_to_income_groups(countries:str):
|
||||
res = set()
|
||||
for c in countries.replace(" ;", ";").replace("; ",";").split(";"):
|
||||
if c in df_country_groups.index:
|
||||
region = df_country_groups.at[c,'Income group']
|
||||
res.add(region)
|
||||
return ";".join(res)
|
||||
|
||||
|
||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
||||
.assign(
|
||||
|
@ -82,8 +99,8 @@ bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
|||
zot_keywords=lambda _df: _df["doi"].map(zot_df["keywords"]),
|
||||
date = lambda _df: pd.to_datetime(_df["year"], format="%Y"),
|
||||
year = lambda _df: _df["date"].dt.year,
|
||||
region = lambda _df: _df["country"].map(df_country_groups["Region"]),
|
||||
income_group = lambda _df: _df["country"].map(df_country_groups["Income group"]),
|
||||
region = lambda _df: _df["country"].map(countries_to_regions),
|
||||
income_group = lambda _df: _df["country"].map(countries_to_income_groups),
|
||||
)
|
||||
.query("year >= 2000")
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue