feat(notes): Add intermittent findings and progress
Created small up-to-date quick glance document for findings and data set.
This commit is contained in:
parent
b5e467e016
commit
9855256b00
2 changed files with 370 additions and 0 deletions
|
@ -4,6 +4,7 @@ project:
|
|||
render:
|
||||
- presentation_summary.md
|
||||
- notes.qmd
|
||||
- meeting_eoy.qmd
|
||||
- scoping_review.qmd
|
||||
|
||||
toc: true
|
||||
|
|
369
meeting_eoy.qmd
Normal file
369
meeting_eoy.qmd
Normal file
|
@ -0,0 +1,369 @@
|
|||
---
|
||||
bibliography: 02-data/supplementary/lib.bib
|
||||
csl: /home/marty/documents/library/utilities/styles/APA-7.csl
|
||||
papersize: A4
|
||||
linestretch: 1.5
|
||||
fontfamily: lmodern
|
||||
fontsize: "12"
|
||||
geometry:
|
||||
- left=2.2cm
|
||||
- right=3.5cm
|
||||
- top=2.5cm
|
||||
- bottom=2.5cm
|
||||
lang: en
|
||||
title: "Scoping Review: Preliminary findings"
|
||||
subtitle: Addressing inequalities in the World of Work
|
||||
---
|
||||
|
||||
```{python}
|
||||
#| echo: false
|
||||
from pathlib import Path
|
||||
import re
|
||||
## standard imports
|
||||
from IPython.core.display import Markdown as md
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
import seaborn as sns
|
||||
from tabulate import tabulate
|
||||
import bibtexparser
|
||||
|
||||
sns.set_style("whitegrid")
|
||||
|
||||
DATA_DIR=Path("./02-data")
|
||||
RAW_DATA=DATA_DIR.joinpath("raw")
|
||||
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
||||
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
||||
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
||||
|
||||
bib_string=""
|
||||
for partial_bib in RAW_DATA.glob("**/*.bib"):
|
||||
with open(partial_bib) as f:
|
||||
bib_string+="\n".join(f.readlines())
|
||||
bib_sample_raw_db = bibtexparser.parse_string(bib_string)
|
||||
|
||||
bib_string=""
|
||||
for partial_bib in WORKING_DATA.glob("**/*.bib"):
|
||||
with open(partial_bib) as f:
|
||||
bib_string+="\n".join(f.readlines())
|
||||
bib_sample = bibtexparser.parse_string(bib_string)
|
||||
|
||||
# load relevant studies
|
||||
from src import data
|
||||
|
||||
# load zotero-based metadata: citations and uses
|
||||
zot_df = pd.DataFrame([
|
||||
[
|
||||
entry["doi"] if "doi" in entry.fields_dict else None,
|
||||
entry["times-cited"] if "times-cited" in entry.fields_dict else None,
|
||||
entry["usage"] if "usage" in entry.fields_dict else None,
|
||||
entry["keywords"] if "keywords" in entry.fields_dict else None,
|
||||
]
|
||||
for entry in bib_sample.entries
|
||||
], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi")
|
||||
|
||||
# Add WB country grouping definitions (income group, world region)
|
||||
WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||
|
||||
bib_df = (data.from_yml(f"{PROCESSED_DATA}/relevant")
|
||||
.assign(
|
||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||
zot_usage=lambda _df: _df["doi"].map(zot_df["usage"]),
|
||||
zot_keywords=lambda _df: _df["doi"].map(zot_df["keywords"]),
|
||||
date = lambda _df: pd.to_datetime(_df["year"], format="%Y"),
|
||||
year = lambda _df: _df["date"].dt.year,
|
||||
region = lambda _df: _df["country"].map(df_country_groups["Region"]),
|
||||
income_group = lambda _df: _df["country"].map(df_country_groups["Income group"]),
|
||||
)
|
||||
.query("year >= 2000")
|
||||
)
|
||||
zot_df = None
|
||||
df_country_groups = None
|
||||
```
|
||||
|
||||
# The data sample
|
||||
|
||||
```{python}
|
||||
#| echo: false
|
||||
#| output: asis
|
||||
|
||||
FULL_RAW_SAMPLE_NOTHING_REMOVED = 2396
|
||||
nr_database_query_raw = len(bib_sample_raw_db.entries)
|
||||
nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - len(bib_sample.entries)
|
||||
nr_other_sources = (len(bib_sample.entries) + nr_out_duplicates) - nr_database_query_raw
|
||||
|
||||
all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
|
||||
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + 400
|
||||
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + 400
|
||||
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + 300
|
||||
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
|
||||
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])
|
||||
|
||||
t3 = "`" * 3
|
||||
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
|
||||
# from: https://github.com/quarto-dev/quarto-cli/discussions/6508
|
||||
print(f"""
|
||||
```{{mermaid}}
|
||||
%%| label: fig-prisma
|
||||
%%| fig-cap: "Sample sorting process through identification and screening"
|
||||
%%| fig-width: 6
|
||||
flowchart TD;
|
||||
search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;
|
||||
search_prev["Records identified through other sources (n={nr_other_sources})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];
|
||||
|
||||
starting_sample -- "Duplicate removal ({nr_out_duplicates} removed) "--> dedup["Records after duplicates removed (n={len(bib_sample.entries)})"];
|
||||
|
||||
dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={len(bib_sample.entries) - nr_out_title})"];
|
||||
|
||||
title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={len(bib_sample.entries)-nr_out_title-nr_out_abstract})"];
|
||||
|
||||
abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={len(bib_sample.entries)-nr_out_title-nr_out_abstract-nr_out_language})"];
|
||||
|
||||
language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done}) STILL OUTSTANDING: {len(bib_sample.entries)-nr_out_title-nr_out_abstract-nr_out_language - nr_extraction_done}"];
|
||||
{t3}
|
||||
""")
|
||||
```
|
||||
|
||||
- strongest focus on income inequality (vertical), with many horizontal inequality studies including aspect of income inequality
|
||||
- horizontal inequalities: strongest focus on income - gender inequalities (horizontal)
|
||||
- interventions:
|
||||
- strongest research base on labour rights protection interventions
|
||||
- second on infrastructural interventions
|
||||
- third on agency-strengthening ones: training, financial access, education programmes
|
||||
|
||||
- formalization & social protection research rarely goes into inequality outcomes beyond 'income' effects; most excluded for that reason
|
||||
|
||||
```{python}
|
||||
#| echo: false
|
||||
#| label: fig-inequality-types-whole-sample
|
||||
#| fig-cap: Overall inequality types in sample
|
||||
|
||||
# load zotero-based metadata: citations and uses
|
||||
pi = (pd.DataFrame([
|
||||
[
|
||||
entry["doi"] if "doi" in entry.fields_dict else None,
|
||||
entry["times-cited"] if "times-cited" in entry.fields_dict else None,
|
||||
entry["usage"] if "usage" in entry.fields_dict else None,
|
||||
entry["keywords"] if "keywords" in entry.fields_dict else None,
|
||||
]
|
||||
for entry in bib_sample.entries
|
||||
], columns = ["doi", "cited", "usage", "keywords"])
|
||||
.drop_duplicates("doi")
|
||||
.assign(
|
||||
inequality=lambda _df: _df["keywords"].str.replace("\\", "").str.extract('inequality::([\w\_]+),?')
|
||||
).dropna(subset="inequality")
|
||||
.assign(
|
||||
inequality=lambda _df: _df["inequality"].str.replace("_", " "),
|
||||
projected = 1
|
||||
).reset_index()
|
||||
)
|
||||
pi
|
||||
|
||||
inequality = (pd.concat([
|
||||
bib_df.groupby(["author", "year", "title"])
|
||||
.agg(
|
||||
{
|
||||
"inequality": lambda _col: "; ".join(_col),
|
||||
}
|
||||
)
|
||||
.assign(
|
||||
projected=0
|
||||
|
||||
)
|
||||
.reset_index()
|
||||
.drop_duplicates() , pi])
|
||||
.assign( inequality=lambda _df: _df["inequality"].apply(
|
||||
lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")])
|
||||
),
|
||||
)
|
||||
.explode("inequality")
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
sort_order = inequality["inequality"].value_counts().index
|
||||
i = inequality[inequality["inequality"].str.contains(r"(?:structural|institutional|agency)") == False]
|
||||
fig = plt.figure()
|
||||
fig.set_size_inches(6, 3)
|
||||
ax = sns.countplot(i, x="inequality", hue="projected" ,order=i["inequality"].value_counts().index)
|
||||
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
||||
rotation_mode="anchor")
|
||||
plt.show()
|
||||
```
|
||||
|
||||
# Preliminary findings
|
||||
|
||||
```{python}
|
||||
#| echo: false
|
||||
#| label: fig-inequality-types
|
||||
#| fig-cap: Finished and projected inequality types
|
||||
inequality = (pd.concat([
|
||||
bib_df.groupby(["author", "year", "title"])
|
||||
.agg(
|
||||
{
|
||||
"inequality": lambda _col: "; ".join(_col),
|
||||
}
|
||||
)
|
||||
.assign(
|
||||
projected=0
|
||||
|
||||
)
|
||||
.reset_index()
|
||||
.drop_duplicates() , pi[pi["keywords"].str.contains("relevant") == True]])
|
||||
.assign( inequality=lambda _df: _df["inequality"].apply(
|
||||
lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")])
|
||||
),
|
||||
)
|
||||
.explode("inequality")
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
sort_order = inequality["inequality"].value_counts().index
|
||||
i = inequality[inequality["inequality"].str.contains(r"(?:structural|institutional|agency)") == False]
|
||||
fig = plt.figure()
|
||||
fig.set_size_inches(6, 3)
|
||||
ax = sns.countplot(i, x="inequality", hue="projected" ,order=i["inequality"].value_counts().index)
|
||||
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
||||
rotation_mode="anchor")
|
||||
plt.show()
|
||||
```
|
||||
|
||||
- interventions most strongly target gender-income divide
|
||||
- most studies here recommend further scale-integration between agency/structural approaches
|
||||
- most studies also only focus on analysing a single scale however
|
||||
- interventions often have intersectional impacts even if not targeted at them
|
||||
- most visible for institutional/structural interventions and spatial inequalities
|
||||
- studies analysing intersectional inequalities near unanimously recommend intersectional targeting
|
||||
|
||||
- individual agency-based interventions (training, subsidies, maternity benefits, transfers, microcredit, etc):
|
||||
- seem most effective for targeting WoW outcomes of disability inequalities
|
||||
- seem marginally effective for targeting WoW outcomes of gender inequalities
|
||||
- require additional mediating scales for other inequalities
|
||||
- more structural interventions (education, infrastructural, ubi, trade liberalization, collective action):
|
||||
- seem most effective for spatial, income, education-generational inequalities
|
||||
- often show longer-term impacts, requiring longer periods of analyses
|
||||
- can work without additional agency-based interventions, few studies analyse both at same time
|
||||
|
||||
# Preliminary limitations
|
||||
|
||||
```{python}
|
||||
#| echo: false
|
||||
#| label: fig-intervention-types
|
||||
#| fig-cap: Finished and projected intervention types
|
||||
|
||||
# load zotero-based metadata: citations and uses
|
||||
pi = (pd.DataFrame([
|
||||
[
|
||||
entry["doi"] if "doi" in entry.fields_dict else None,
|
||||
entry["times-cited"] if "times-cited" in entry.fields_dict else None,
|
||||
entry["usage"] if "usage" in entry.fields_dict else None,
|
||||
entry["keywords"] if "keywords" in entry.fields_dict else None,
|
||||
]
|
||||
for entry in bib_sample.entries
|
||||
], columns = ["doi", "cited", "usage", "keywords"])
|
||||
.drop_duplicates("doi")
|
||||
.assign(
|
||||
intervention=lambda _df: _df["keywords"].str.replace("\\", "").str.extract('type::([\w\_]+),?')
|
||||
).dropna(subset="intervention")
|
||||
.assign(
|
||||
intervention=lambda _df: _df["intervention"].str.replace("_", " "),
|
||||
projected = 1
|
||||
).reset_index()
|
||||
)
|
||||
pi
|
||||
|
||||
by_intervention = (pd.concat([
|
||||
bib_df.groupby(["author", "year", "title"])
|
||||
.agg(
|
||||
{
|
||||
"intervention": lambda _col: "; ".join(_col),
|
||||
}
|
||||
)
|
||||
.assign(
|
||||
projected=0
|
||||
|
||||
)
|
||||
.reset_index()
|
||||
.drop_duplicates() , pi[pi["keywords"].str.contains("relevant") == True]])
|
||||
.assign( intervention=lambda _df: _df["intervention"].apply(
|
||||
lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")])
|
||||
),
|
||||
)
|
||||
.explode("intervention")
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
sort_order = by_intervention["intervention"].value_counts().index
|
||||
i = by_intervention[by_intervention["intervention"].str.contains(r"(?:structural|institutional|agency)") == False]
|
||||
fig = plt.figure()
|
||||
fig.set_size_inches(6, 3)
|
||||
ax = sns.countplot(i, x="intervention", hue="projected" ,order=i["intervention"].value_counts().index)
|
||||
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
||||
rotation_mode="anchor")
|
||||
plt.show()
|
||||
```
|
||||
|
||||
- stronger institutional-structural research focus in developed countries, with more structural-agency based in developing countries
|
||||
- employment creation as a category is often subsumed in other structural/institutional analyses
|
||||
- little evidence-based research on effect of interventions targeting education on world of work outcomes
|
||||
- spatial inequality most evenly geographically spread evidence base
|
||||
- empirical base on interventions targeting disability inequalities strongly restricted on developed countries, especially United States
|
||||
|
||||
```{python}
|
||||
#| echo: false
|
||||
#| label: fig-countries
|
||||
#| fig-cap: Country spread
|
||||
#| column: screen
|
||||
|
||||
# load zotero-based metadata: citations and uses
|
||||
pi = (pd.DataFrame([
|
||||
[
|
||||
entry["doi"] if "doi" in entry.fields_dict else None,
|
||||
entry["times-cited"] if "times-cited" in entry.fields_dict else None,
|
||||
entry["usage"] if "usage" in entry.fields_dict else None,
|
||||
entry["keywords"] if "keywords" in entry.fields_dict else None,
|
||||
]
|
||||
for entry in bib_sample.entries
|
||||
], columns = ["doi", "cited", "usage", "keywords"])
|
||||
.drop_duplicates("doi")
|
||||
.assign(
|
||||
country=lambda _df: _df["keywords"].str.replace("\\", "").str.extract('country::([\w\_]+),?')
|
||||
).dropna(subset="country")
|
||||
.assign(
|
||||
country=lambda _df: _df["country"].str.replace("_", " ").str.replace("US", "United States").str.replace("Britain", "United Kingdom"),
|
||||
projected = 1
|
||||
).reset_index()
|
||||
)
|
||||
pi
|
||||
|
||||
by_country = (pd.concat([
|
||||
bib_df.groupby(["author", "year", "title"])
|
||||
.agg(
|
||||
{
|
||||
"country": lambda _col: "; ".join(_col),
|
||||
}
|
||||
)
|
||||
.assign(
|
||||
projected=0
|
||||
|
||||
)
|
||||
.reset_index()
|
||||
.drop_duplicates() , pi[pi["keywords"].str.contains("relevant") == True]])
|
||||
.assign( country=lambda _df: _df["country"].apply(
|
||||
lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")])
|
||||
),
|
||||
)
|
||||
.explode("country")
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
sort_order = by_country["country"].value_counts().index
|
||||
i = by_country[by_country["country"].str.contains(r"(?:structural|institutional|agency)") == False]
|
||||
fig = plt.figure()
|
||||
fig.set_size_inches(12, 5)
|
||||
ax = sns.countplot(i, x="country", hue="projected" ,order=i["country"].value_counts().index)
|
||||
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
||||
rotation_mode="anchor")
|
||||
plt.show()
|
||||
```
|
Loading…
Reference in a new issue