fix(script): Correctly map regions to studies
Correctly mapping multiple countries to multiple regions or income groups.
This commit is contained in:
parent
8bab7256e1
commit
3f44b0d710
1 changed files with 20 additions and 3 deletions
|
@ -72,7 +72,24 @@ zot_df = pd.DataFrame([
|
||||||
|
|
||||||
# Add WB country grouping definitions (income group, world region)
|
# Add WB country grouping definitions (income group, world region)
|
||||||
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
df_country_groups = pd.concat([pd.read_excel(WB_COUNTRY_GROUPS_FILE), pd.DataFrame(data={'Economy':['global'],'Code':['WLD'],'Region':['World'], 'Income group':[''], 'Lending category':['']})]).set_index("Economy")
|
||||||
|
|
||||||
|
def countries_to_regions(countries:str):
|
||||||
|
res = set()
|
||||||
|
for c in countries.replace(" ;", ";").replace("; ",";").split(";"):
|
||||||
|
if c in df_country_groups.index:
|
||||||
|
region = df_country_groups.at[c,'Region']
|
||||||
|
res.add(region)
|
||||||
|
return ";".join(res)
|
||||||
|
|
||||||
|
def countries_to_income_groups(countries:str):
|
||||||
|
res = set()
|
||||||
|
for c in countries.replace(" ;", ";").replace("; ",";").split(";"):
|
||||||
|
if c in df_country_groups.index:
|
||||||
|
region = df_country_groups.at[c,'Income group']
|
||||||
|
res.add(region)
|
||||||
|
return ";".join(res)
|
||||||
|
|
||||||
|
|
||||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
||||||
.assign(
|
.assign(
|
||||||
|
@ -82,8 +99,8 @@ bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
||||||
zot_keywords=lambda _df: _df["doi"].map(zot_df["keywords"]),
|
zot_keywords=lambda _df: _df["doi"].map(zot_df["keywords"]),
|
||||||
date = lambda _df: pd.to_datetime(_df["year"], format="%Y"),
|
date = lambda _df: pd.to_datetime(_df["year"], format="%Y"),
|
||||||
year = lambda _df: _df["date"].dt.year,
|
year = lambda _df: _df["date"].dt.year,
|
||||||
region = lambda _df: _df["country"].map(df_country_groups["Region"]),
|
region = lambda _df: _df["country"].map(countries_to_regions),
|
||||||
income_group = lambda _df: _df["country"].map(df_country_groups["Income group"]),
|
income_group = lambda _df: _df["country"].map(countries_to_income_groups),
|
||||||
)
|
)
|
||||||
.query("year >= 2000")
|
.query("year >= 2000")
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue