diff --git a/_quarto.yml b/_quarto.yml index 10b4210..0fede24 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -4,6 +4,7 @@ project: render: - presentation_summary.md - notes.qmd + - meeting_eoy.qmd - scoping_review.qmd toc: true diff --git a/meeting_eoy.qmd b/meeting_eoy.qmd new file mode 100644 index 0000000..ca645cc --- /dev/null +++ b/meeting_eoy.qmd @@ -0,0 +1,369 @@ +--- +bibliography: 02-data/supplementary/lib.bib +csl: /home/marty/documents/library/utilities/styles/APA-7.csl +papersize: A4 +linestretch: 1.5 +fontfamily: lmodern +fontsize: "12" +geometry: + - left=2.2cm + - right=3.5cm + - top=2.5cm + - bottom=2.5cm +lang: en +title: "Scoping Review: Preliminary findings" +subtitle: Addressing inequalities in the World of Work +--- + +```{python} +#| echo: false +from pathlib import Path +import re +## standard imports +from IPython.core.display import Markdown as md +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +import seaborn as sns +from tabulate import tabulate +import bibtexparser + +sns.set_style("whitegrid") + +DATA_DIR=Path("./02-data") +RAW_DATA=DATA_DIR.joinpath("raw") +WORKING_DATA=DATA_DIR.joinpath("intermediate") +PROCESSED_DATA=DATA_DIR.joinpath("processed") +SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary") + +bib_string="" +for partial_bib in RAW_DATA.glob("**/*.bib"): + with open(partial_bib) as f: + bib_string+="\n".join(f.readlines()) +bib_sample_raw_db = bibtexparser.parse_string(bib_string) + +bib_string="" +for partial_bib in WORKING_DATA.glob("**/*.bib"): + with open(partial_bib) as f: + bib_string+="\n".join(f.readlines()) +bib_sample = bibtexparser.parse_string(bib_string) + +# load relevant studies +from src import data + +# load zotero-based metadata: citations and uses +zot_df = pd.DataFrame([ + [ + entry["doi"] if "doi" in entry.fields_dict else None, + entry["times-cited"] if "times-cited" in entry.fields_dict else None, + entry["usage"] if "usage" in entry.fields_dict else None, + entry["keywords"] if "keywords" in entry.fields_dict else None, + ] + for entry in bib_sample.entries +], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi") + +# Add WB country grouping definitions (income group, world region) +WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() +df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy") + +bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant") + .assign( + doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False), + zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]), + zot_usage=lambda _df: _df["doi"].map(zot_df["usage"]), + zot_keywords=lambda _df: _df["doi"].map(zot_df["keywords"]), + date = lambda _df: pd.to_datetime(_df["year"], format="%Y"), + year = lambda _df: _df["date"].dt.year, + region = lambda _df: _df["country"].map(df_country_groups["Region"]), + income_group = lambda _df: _df["country"].map(df_country_groups["Income group"]), + ) + .query("year >= 2000") +) +zot_df = None +df_country_groups = None +``` + +# The data sample + +```{python} +#| echo: false +#| output: asis + +FULL_RAW_SAMPLE_NOTHING_REMOVED = 2396 +nr_database_query_raw = len(bib_sample_raw_db.entries) +nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - len(bib_sample.entries) +nr_other_sources = (len(bib_sample.entries) + nr_out_duplicates) - nr_database_query_raw + +all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()] +nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + 400 +nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + 400 +nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + 300 +nr_out_language = len([1 for kw in all_keywords if "out::language" in kw]) +nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw]) + +t3 = "`" * 3 +# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts +# from: https://github.com/quarto-dev/quarto-cli/discussions/6508 +print(f""" +```{{mermaid}} +%%| label: fig-prisma +%%| fig-cap: "Sample sorting process through identification and screening" +%%| fig-width: 6 +flowchart TD; + search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample; + search_prev["Records identified through other sources (n={nr_other_sources})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"]; + + starting_sample -- "Duplicate removal ({nr_out_duplicates} removed) "--> dedup["Records after duplicates removed (n={len(bib_sample.entries)})"]; + + dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={len(bib_sample.entries) - nr_out_title})"]; + + title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={len(bib_sample.entries)-nr_out_title-nr_out_abstract})"]; + + abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={len(bib_sample.entries)-nr_out_title-nr_out_abstract-nr_out_language})"]; + + language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done}) STILL OUTSTANDING: {len(bib_sample.entries)-nr_out_title-nr_out_abstract-nr_out_language - nr_extraction_done}"]; +{t3} +""") +``` + +- strongest focus on income inequality (vertical), with many horizontal inequality studies including aspect of income inequality +- horizontal inequalities: strongest focus on income - gender inequalities (horizontal) +- interventions: + - strongest research base on labour rights protection interventions + - second on infrastructural interventions + - third on agency-strengthening ones: training, financial access, education programmes + +- formalization & social protection research rarely goes into inequality outcomes beyond 'income' effects; most excluded for that reason + +```{python} +#| echo: false +#| label: fig-inequality-types-whole-sample +#| fig-cap: Overall inequality types in sample + +# load zotero-based metadata: citations and uses +pi = (pd.DataFrame([ + [ + entry["doi"] if "doi" in entry.fields_dict else None, + entry["times-cited"] if "times-cited" in entry.fields_dict else None, + entry["usage"] if "usage" in entry.fields_dict else None, + entry["keywords"] if "keywords" in entry.fields_dict else None, + ] + for entry in bib_sample.entries +], columns = ["doi", "cited", "usage", "keywords"]) + .drop_duplicates("doi") + .assign( + inequality=lambda _df: _df["keywords"].str.replace("\\", "").str.extract('inequality::([\w\_]+),?') + ).dropna(subset="inequality") + .assign( + inequality=lambda _df: _df["inequality"].str.replace("_", " "), + projected = 1 + ).reset_index() + ) +pi + +inequality = (pd.concat([ + bib_df.groupby(["author", "year", "title"]) + .agg( + { + "inequality": lambda _col: "; ".join(_col), + } + ) + .assign( + projected=0 + + ) + .reset_index() + .drop_duplicates() , pi]) + .assign( inequality=lambda _df: _df["inequality"].apply( + lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")]) + ), + ) + .explode("inequality") +.drop_duplicates() +) + +sort_order = inequality["inequality"].value_counts().index +i = inequality[inequality["inequality"].str.contains(r"(?:structural|institutional|agency)") == False] +fig = plt.figure() +fig.set_size_inches(6, 3) +ax = sns.countplot(i, x="inequality", hue="projected" ,order=i["inequality"].value_counts().index) +plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") +plt.show() +``` + +# Preliminary findings + +```{python} +#| echo: false +#| label: fig-inequality-types +#| fig-cap: Finished and projected inequality types +inequality = (pd.concat([ + bib_df.groupby(["author", "year", "title"]) + .agg( + { + "inequality": lambda _col: "; ".join(_col), + } + ) + .assign( + projected=0 + + ) + .reset_index() + .drop_duplicates() , pi[pi["keywords"].str.contains("relevant") == True]]) + .assign( inequality=lambda _df: _df["inequality"].apply( + lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")]) + ), + ) + .explode("inequality") +.drop_duplicates() +) + +sort_order = inequality["inequality"].value_counts().index +i = inequality[inequality["inequality"].str.contains(r"(?:structural|institutional|agency)") == False] +fig = plt.figure() +fig.set_size_inches(6, 3) +ax = sns.countplot(i, x="inequality", hue="projected" ,order=i["inequality"].value_counts().index) +plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") +plt.show() +``` + +- interventions most strongly target gender-income divide + - most studies here recommend further scale-integration between agency/structural approaches + - most studies also only focus on analysing a single scale however +- interventions often have intersectional impacts even if not targeted at them + - most visible for institutional/structural interventions and spatial inequalities + - studies analysing intersectional inequalities near unanimously recommend intersectional targeting + +- individual agency-based interventions (training, subsidies, maternity benefits, transfers, microcredit, etc): + - seem most effective for targeting WoW outcomes of disability inequalities + - seem marginally effective for targeting WoW outcomes of gender inequalities + - require additional mediating scales for other inequalities +- more structural interventions (education, infrastructural, ubi, trade liberalization, collective action): + - seem most effective for spatial, income, education-generational inequalities + - often show longer-term impacts, requiring longer periods of analyses + - can work without additional agency-based interventions, few studies analyse both at same time + +# Preliminary limitations + +```{python} +#| echo: false +#| label: fig-intervention-types +#| fig-cap: Finished and projected intervention types + +# load zotero-based metadata: citations and uses +pi = (pd.DataFrame([ + [ + entry["doi"] if "doi" in entry.fields_dict else None, + entry["times-cited"] if "times-cited" in entry.fields_dict else None, + entry["usage"] if "usage" in entry.fields_dict else None, + entry["keywords"] if "keywords" in entry.fields_dict else None, + ] + for entry in bib_sample.entries +], columns = ["doi", "cited", "usage", "keywords"]) + .drop_duplicates("doi") + .assign( + intervention=lambda _df: _df["keywords"].str.replace("\\", "").str.extract('type::([\w\_]+),?') + ).dropna(subset="intervention") + .assign( + intervention=lambda _df: _df["intervention"].str.replace("_", " "), + projected = 1 + ).reset_index() + ) +pi + +by_intervention = (pd.concat([ + bib_df.groupby(["author", "year", "title"]) + .agg( + { + "intervention": lambda _col: "; ".join(_col), + } + ) + .assign( + projected=0 + + ) + .reset_index() + .drop_duplicates() , pi[pi["keywords"].str.contains("relevant") == True]]) + .assign( intervention=lambda _df: _df["intervention"].apply( + lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")]) + ), + ) + .explode("intervention") +.drop_duplicates() +) + +sort_order = by_intervention["intervention"].value_counts().index +i = by_intervention[by_intervention["intervention"].str.contains(r"(?:structural|institutional|agency)") == False] +fig = plt.figure() +fig.set_size_inches(6, 3) +ax = sns.countplot(i, x="intervention", hue="projected" ,order=i["intervention"].value_counts().index) +plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") +plt.show() +``` + +- stronger institutional-structural research focus in developed countries, with more structural-agency based in developing countries +- employment creation as a category is often subsumed in other structural/institutional analyses +- little evidence-based research on effect of interventions targeting education on world of work outcomes +- spatial inequality most evenly geographically spread evidence base +- empirical base on interventions targeting disability inequalities strongly restricted on developed countries, especially United States + +```{python} +#| echo: false +#| label: fig-countries +#| fig-cap: Country spread +#| column: screen + +# load zotero-based metadata: citations and uses +pi = (pd.DataFrame([ + [ + entry["doi"] if "doi" in entry.fields_dict else None, + entry["times-cited"] if "times-cited" in entry.fields_dict else None, + entry["usage"] if "usage" in entry.fields_dict else None, + entry["keywords"] if "keywords" in entry.fields_dict else None, + ] + for entry in bib_sample.entries +], columns = ["doi", "cited", "usage", "keywords"]) + .drop_duplicates("doi") + .assign( + country=lambda _df: _df["keywords"].str.replace("\\", "").str.extract('country::([\w\_]+),?') + ).dropna(subset="country") + .assign( + country=lambda _df: _df["country"].str.replace("_", " ").str.replace("US", "United States").str.replace("Britain", "United Kingdom"), + projected = 1 + ).reset_index() + ) +pi + +by_country = (pd.concat([ + bib_df.groupby(["author", "year", "title"]) + .agg( + { + "country": lambda _col: "; ".join(_col), + } + ) + .assign( + projected=0 + + ) + .reset_index() + .drop_duplicates() , pi[pi["keywords"].str.contains("relevant") == True]]) + .assign( country=lambda _df: _df["country"].apply( + lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")]) + ), + ) + .explode("country") +.drop_duplicates() +) + +sort_order = by_country["country"].value_counts().index +i = by_country[by_country["country"].str.contains(r"(?:structural|institutional|agency)") == False] +fig = plt.figure() +fig.set_size_inches(12, 5) +ax = sns.countplot(i, x="country", hue="projected" ,order=i["country"].value_counts().index) +plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") +plt.show() +```