diff --git a/02-data/supplementary/wb-country-groupings.xlsx b/02-data/supplementary/wb-country-groupings.xlsx new file mode 100644 index 0000000..35703c3 Binary files /dev/null and b/02-data/supplementary/wb-country-groupings.xlsx differ diff --git a/poetry.lock b/poetry.lock index d2578d8..20d8100 100644 --- a/poetry.lock +++ b/poetry.lock @@ -569,6 +569,17 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "executing" version = "2.0.1" @@ -1747,6 +1758,20 @@ files = [ {file = "numpy-1.26.1.tar.gz", hash = "sha256:c8c6c72d4a9f831f328efb1312642a1cafafaa88981d9ab76368d50d07d93cbe"}, ] +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, + {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "overrides" version = "7.4.0" @@ -3021,4 +3046,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "<3.13,>=3.11" -content-hash = "e02d73a52358bb9ed8da5e6d5cc8c55992ee449a429c820105610f6c484066e3" +content-hash = "f6f60ec28f3f1e61377114f1b58e6117b45fb290f362ad471790611505e95dfc" diff --git a/pyproject.toml b/pyproject.toml index ede6765..01fb1e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ bibtexparser = {version = ">=2.0.0b1", allow-prereleases = true} jupyter = "^1.0.0" jupyter-cache = "^0.6.1" tabulate = "^0.9.0" +openpyxl = "^3.1.2" [tool.poetry.group.dev.dependencies] pynvim = "^0.4.3" diff --git a/scoping_review.qmd b/scoping_review.qmd index 29fd7d9..9ce1858 100644 --- a/scoping_review.qmd +++ b/scoping_review.qmd @@ -463,8 +463,7 @@ as can be seen in @fig-publications-per-year. Keeping in mind that these results are not yet screened for their full relevance to the topic at hand, so far only being *potentially* relevant in falling into the requirements of the search pattern, an increased results output does not necessarily mean a clearly rising amount of relevant literature. ```{python} -#| label: fig-publications-per-year -#| fig-cap: Publications per year +# load relevant studies reformatted = [] for e in sample_relevant: ed = e.fields_dict @@ -476,6 +475,7 @@ for e in sample_relevant: ed.get("usage-count-since-2013", Field(key="usage-count-since-2013", value=None)).value, ed.get("keywords", Field(key="keywords", value=None)).value, ]) + # FIXME do not just drop missing values bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage", "Keywords"]) bib_df = bib_df.dropna(how="any") @@ -485,6 +485,18 @@ bib_df["Year"] = bib_df["Date"].dt.year # only keep newer entries bib_df = bib_df[bib_df["Year"] >= 2000] +# Add WB country grouping definitions (income group, world region) +# TODO Re-enable for processed study pool +# WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() +# df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE) +# bib_df["income group"] = bib_df["country"].map(df_country_groups.set_index("Economy")["Income group"]) +# bib_df["region"] = bib_df["country"].map(df_country_groups.set_index("Economy")["Region"]) +``` + +```{python} +#| label: fig-publications-per-year +#| fig-cap: Publications per year + # create dummy category for white or gray lit type (based on 'article' appearing in type) bib_df["Type"].value_counts() bib_df["Literature"] = np.where(bib_df["Type"].str.contains("article", case=False, regex=False), "white", "gray")