feat(script): Switch term tables to data driven design
Moved term tables to data directory as csv files and included them in main script from there, to function as single source of truth.
This commit is contained in:
parent
92a1162dce
commit
6020d122b6
5 changed files with 59 additions and 156 deletions
21
02-data/supplementary/terms_inequality.csv
Normal file
21
02-data/supplementary/terms_inequality.csv
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
General,Vertical,Horizontal
|
||||||
|
inequality,income,identity
|
||||||
|
barrier,Palma ratio [@DFI2023],demographic
|
||||||
|
advantaged,Gini coefficient [@DFI2023],gender
|
||||||
|
disadvantaged,Log deviation,colour
|
||||||
|
discriminated,Theil,beliefs
|
||||||
|
disparity,Atkinson,racial
|
||||||
|
horizontal inequality,class [@Kalasa2021],ethnic
|
||||||
|
vertical inequality,fertility [@Kalasa2021],migrant
|
||||||
|
,bottom percentile,spatial
|
||||||
|
,top percentile,rural
|
||||||
|
,,urban
|
||||||
|
,,mega-cities
|
||||||
|
,,small cities
|
||||||
|
,,peripheral cities
|
||||||
|
,,age
|
||||||
|
,,nationality
|
||||||
|
,,ethnicity
|
||||||
|
,,health status
|
||||||
|
,,disability
|
||||||
|
,,characteristics
|
|
16
02-data/supplementary/terms_policy.csv
Normal file
16
02-data/supplementary/terms_policy.csv
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
General,Institutional,Structural,Agency
|
||||||
|
intervention,support for childcare [@Perez2022],cash benefits,credit programs [@Perez2022]
|
||||||
|
policy,labour rights,services in kind,career guidance
|
||||||
|
participation,minimum wage,green transition,vocational guidance [@Nevala2015]
|
||||||
|
targeting/targeted,collective bargaining,infrastructure,vocational counselling [@Nevala2015]
|
||||||
|
distributive,business sustainability promotion,digital infrastructure,counteracting of stereotypes
|
||||||
|
redistributive,work-life balance promotion,quality of education,commuting subsidies [@Perez2022]
|
||||||
|
,equal pay for work of equal value,public service improvement,housing mobility programs [@Perez2022]
|
||||||
|
,removal of (discriminatory) law,lowering of gender segregation,encouraging re-situation/migration [@Perez2022]
|
||||||
|
,law reformation,price stability intervention,encouraging self-advocacy [@Nevala2015]
|
||||||
|
,social dialogue,extended social protection scheme,cognitive behavioural therapy [@Lettieri2017]
|
||||||
|
,guaranteed income [@Perez2022],comprehensive social protection,computer-assisted therapy [@Lettieri2017]
|
||||||
|
,universal basic income [@Perez2022],sustainable social protection,work organization [@Nevala2015]
|
||||||
|
,provision of living wage [@Perez2022],supported employment [@Lettieri2017],special transportation [@Nevala2015]
|
||||||
|
,maternity leave [@Chang2021],"vocational rehabilitation [@Silvaggi2020, @Lettieri2017]",collective action
|
||||||
|
,,unionization,
|
|
13
02-data/supplementary/terms_wow.csv
Normal file
13
02-data/supplementary/terms_wow.csv
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
General,Forms of work,Labour market outcomes
|
||||||
|
work,own-use,employment outcomes
|
||||||
|
labour,employment,labour rights
|
||||||
|
production of goods,unpaid trainee,equality of opportunity
|
||||||
|
provision of services,volunteer,equality of outcome
|
||||||
|
own-use,other work activities,labour force participation [@Pinto2021]
|
||||||
|
use by others,wage-employed,labour force exit [@Silvaggi2020]
|
||||||
|
of working age,self-employed,job quality [@Finlay2021]
|
||||||
|
for pay,formal work,career advancement [@Finlay2021]
|
||||||
|
for profit,informal work,hours worked [@Finlay2021]
|
||||||
|
remuneration,domestic work,wage
|
||||||
|
market transactions,care work,salary
|
||||||
|
,unpaid work,return to work [@Silvaggi2020]
|
|
3
notes.md
3
notes.md
|
@ -457,6 +457,9 @@ to extraction metadata sheet.
|
||||||
|
|
||||||
## Search Term clusters
|
## Search Term clusters
|
||||||
|
|
||||||
|
These lists have been used to create data-driven term cluster files in the supplementary data directory.
|
||||||
|
The lists have been kept here for historic documentation but should not be used for up-to-date term changes, use the csv files instead.
|
||||||
|
|
||||||
### World-of-work cluster
|
### World-of-work cluster
|
||||||
|
|
||||||
- ILO:
|
- ILO:
|
||||||
|
|
|
@ -323,52 +323,8 @@ with the search query requiring a term from the general column and one other col
|
||||||
```{python}
|
```{python}
|
||||||
#| label: tbl-wow-terms
|
#| label: tbl-wow-terms
|
||||||
#| tbl-cap: World of work term cluster
|
#| tbl-cap: World of work term cluster
|
||||||
wow_terms_cluster = {
|
terms_wow = pd.read_csv("02-data/supplementary/terms_wow.csv")
|
||||||
"General": pd.Series([
|
md(tabulate(terms_wow.fillna(""), showindex=False, headers="keys", tablefmt="grid"))
|
||||||
"work",
|
|
||||||
"labour",
|
|
||||||
"production of goods",
|
|
||||||
"provision of services",
|
|
||||||
"own-use",
|
|
||||||
"use by others",
|
|
||||||
"of working age",
|
|
||||||
"for pay",
|
|
||||||
"for profit",
|
|
||||||
"remuneration",
|
|
||||||
"market transactions"
|
|
||||||
]),
|
|
||||||
"Forms of work": pd.Series([
|
|
||||||
"own-use",
|
|
||||||
"employment",
|
|
||||||
"unpaid trainee",
|
|
||||||
"volunteer",
|
|
||||||
"other work activities",
|
|
||||||
"wage-employed",
|
|
||||||
"self-employed",
|
|
||||||
"formal work",
|
|
||||||
"informal work",
|
|
||||||
"domestic work",
|
|
||||||
"care work",
|
|
||||||
"unpaid work",
|
|
||||||
]),
|
|
||||||
"Labour market outcomes": pd.Series([
|
|
||||||
"employment outcomes",
|
|
||||||
"labour rights",
|
|
||||||
"equality of opportunity",
|
|
||||||
"equality of outcome",
|
|
||||||
"labour force participation [@Pinto2021]",
|
|
||||||
"labour force exit [@Silvaggi2020]",
|
|
||||||
"job quality [@Finlay2021]",
|
|
||||||
"career advancement [@Finlay2021]",
|
|
||||||
"hours worked [@Finlay2021]",
|
|
||||||
"wage",
|
|
||||||
"salary",
|
|
||||||
"return to work [@Silvaggi2020]",
|
|
||||||
])
|
|
||||||
}
|
|
||||||
|
|
||||||
df = pd.DataFrame(wow_terms_cluster)
|
|
||||||
md(tabulate(df.fillna(""), headers=[wow_terms_cluster.keys()], showindex=False, tablefmt="grid"))
|
|
||||||
```
|
```
|
||||||
|
|
||||||
The world of work cluster, like the inequality and policy intervention clusters below, is made up of a general signifier (such as "work", "inequality" or "intervention") which has to be labelled in a study to form part of the sample,
|
The world of work cluster, like the inequality and policy intervention clusters below, is made up of a general signifier (such as "work", "inequality" or "intervention") which has to be labelled in a study to form part of the sample,
|
||||||
|
@ -383,69 +339,10 @@ For the database query, a single term from the general category is required to b
|
||||||
```{python}
|
```{python}
|
||||||
#| label: tbl-intervention-terms
|
#| label: tbl-intervention-terms
|
||||||
#| tbl-cap: Policy intervention term cluster
|
#| tbl-cap: Policy intervention term cluster
|
||||||
policy_terms_cluster = {
|
terms_policy = pd.read_csv("02-data/supplementary/terms_policy.csv")
|
||||||
"General" : pd.Series([
|
|
||||||
"intervention",
|
|
||||||
"policy",
|
|
||||||
"participation",
|
|
||||||
"targeting/targeted",
|
|
||||||
"distributive",
|
|
||||||
"redistributive",
|
|
||||||
]),
|
|
||||||
"Institutional" : pd.Series([
|
|
||||||
"support for childcare [@Perez2022]",
|
|
||||||
"labour rights",
|
|
||||||
"minimum wage",
|
|
||||||
"collective bargaining",
|
|
||||||
"business sustainability promotion",
|
|
||||||
"work-life balance promotion",
|
|
||||||
"equal pay for work of equal value",
|
|
||||||
"removal of (discriminatory) law",
|
|
||||||
"law reformation",
|
|
||||||
"social dialogue",
|
|
||||||
"guaranteed income [@Perez2022]",
|
|
||||||
"universal basic income [@Perez2022]",
|
|
||||||
"provision of living wage [@Perez2022]",
|
|
||||||
"maternity leave [@Chang2021]",
|
|
||||||
]),
|
|
||||||
"Structural" : pd.Series([
|
|
||||||
"cash benefits",
|
|
||||||
"services in kind",
|
|
||||||
"green transition",
|
|
||||||
"infrastructure",
|
|
||||||
"digital infrastructure",
|
|
||||||
"quality of education",
|
|
||||||
"public service improvement",
|
|
||||||
"lowering of gender segregation",
|
|
||||||
"price stability intervention",
|
|
||||||
"extended social protection scheme",
|
|
||||||
"comprehensive social protection",
|
|
||||||
"sustainable social protection",
|
|
||||||
"supported employment [@Lettieri2017]",
|
|
||||||
"vocational rehabilitation [@Silvaggi2020, @Lettieri2017]",
|
|
||||||
"unionization",
|
|
||||||
]),
|
|
||||||
"Agency" : pd.Series([
|
|
||||||
"credit programs [@Perez2022]",
|
|
||||||
"career guidance",
|
|
||||||
"vocational guidance [@Nevala2015]",
|
|
||||||
"vocational counselling [@Nevala2015]",
|
|
||||||
"counteracting of stereotypes",
|
|
||||||
"commuting subsidies [@Perez2022]",
|
|
||||||
"housing mobility programs [@Perez2022]",
|
|
||||||
"encouraging re-situation/migration [@Perez2022]",
|
|
||||||
"encouraging self-advocacy [@Nevala2015]",
|
|
||||||
"cognitive behavioural therapy [@Lettieri2017]",
|
|
||||||
"computer-assisted therapy [@Lettieri2017]",
|
|
||||||
"work organization [@Nevala2015]",
|
|
||||||
"special transportation [@Nevala2015]",
|
|
||||||
"collective action",
|
|
||||||
])
|
|
||||||
}
|
|
||||||
# different headers to include 'social norms'
|
# different headers to include 'social norms'
|
||||||
headers = ["General", "Institutional", "Structural", "Agency & social norms"]
|
headers = ["General", "Institutional", "Structural", "Agency & social norms"]
|
||||||
df = pd.DataFrame(policy_terms_cluster)
|
md(tabulate(terms_policy.fillna(""), showindex=False, headers=headers, tablefmt="grid"))
|
||||||
md(tabulate(df.fillna(""), headers=headers, showindex=False, tablefmt="grid"))
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Lastly, the inequality cluster is once again made up of a general term describing inequality which has to form part of the query results, as well as at least one term describing a specific vertical or horizontal inequality,
|
Lastly, the inequality cluster is once again made up of a general term describing inequality which has to form part of the query results, as well as at least one term describing a specific vertical or horizontal inequality,
|
||||||
|
@ -454,55 +351,8 @@ as seen in @tbl-inequality-terms.
|
||||||
```{python}
|
```{python}
|
||||||
#| label: tbl-inequality-terms
|
#| label: tbl-inequality-terms
|
||||||
#| tbl-cap: Inequality term cluster
|
#| tbl-cap: Inequality term cluster
|
||||||
inequality_terms_cluster = {
|
terms_inequality = pd.read_csv("02-data/supplementary/terms_inequality.csv")
|
||||||
"General": pd.Series([
|
md(tabulate(terms_inequality.fillna(""), showindex=False, headers="keys", tablefmt="grid"))
|
||||||
"inequality",
|
|
||||||
"barrier",
|
|
||||||
"advantaged",
|
|
||||||
"disadvantaged",
|
|
||||||
"discriminated",
|
|
||||||
"disparity",
|
|
||||||
"horizontal inequality",
|
|
||||||
"vertical inequality",
|
|
||||||
]),
|
|
||||||
"Vertical": pd.Series([
|
|
||||||
"income",
|
|
||||||
"Palma ratio [@DFI2023]",
|
|
||||||
"Gini coefficient [@DFI2023]",
|
|
||||||
"Log deviation",
|
|
||||||
"Theil",
|
|
||||||
"Atkinson",
|
|
||||||
"class [@Kalasa2021]",
|
|
||||||
"fertility [@Kalasa2021]",
|
|
||||||
"bottom percentile",
|
|
||||||
"top percentile"
|
|
||||||
]),
|
|
||||||
"Horizontal": pd.Series([
|
|
||||||
"identity",
|
|
||||||
"demographic",
|
|
||||||
"gender",
|
|
||||||
"colour",
|
|
||||||
"beliefs",
|
|
||||||
"racial",
|
|
||||||
"ethnic",
|
|
||||||
"migrant",
|
|
||||||
"spatial",
|
|
||||||
"rural",
|
|
||||||
"urban",
|
|
||||||
"mega-cities",
|
|
||||||
"small cities",
|
|
||||||
"peripheral cities",
|
|
||||||
"age",
|
|
||||||
"nationality",
|
|
||||||
"ethnicity",
|
|
||||||
"health status",
|
|
||||||
"disability",
|
|
||||||
"characteristics",
|
|
||||||
])
|
|
||||||
}
|
|
||||||
|
|
||||||
df = pd.DataFrame(inequality_terms_cluster)
|
|
||||||
md(tabulate(df.fillna(""), headers=inequality_terms_cluster.keys(), showindex=False, tablefmt="grid"))
|
|
||||||
```
|
```
|
||||||
|
|
||||||
A general as well as category-specific term from each cluster will be required, using a intersection merge (Boolean 'AND'),
|
A general as well as category-specific term from each cluster will be required, using a intersection merge (Boolean 'AND'),
|
||||||
|
|
Loading…
Reference in a new issue