feat(code): Add prisma calculation
This commit is contained in:
parent
0d05ed981a
commit
38254d1605
7 changed files with 99 additions and 62 deletions
|
@ -1,45 +0,0 @@
|
||||||
nr_database_query_raw = len(bib_sample_raw_db.entries)
|
|
||||||
nr_snowballing_raw = 2240
|
|
||||||
|
|
||||||
all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
|
|
||||||
nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])
|
|
||||||
nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])
|
|
||||||
nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])
|
|
||||||
|
|
||||||
FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw
|
|
||||||
FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded
|
|
||||||
|
|
||||||
NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150
|
|
||||||
NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727
|
|
||||||
NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348
|
|
||||||
|
|
||||||
nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED
|
|
||||||
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL
|
|
||||||
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL
|
|
||||||
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL
|
|
||||||
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
|
|
||||||
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])
|
|
||||||
|
|
||||||
t3 = "`" * 3
|
|
||||||
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
|
|
||||||
# from: https://github.com/quarto-dev/quarto-cli/discussions/6508
|
|
||||||
print(f"""
|
|
||||||
```{{mermaid}}
|
|
||||||
%%| label: fig-prisma
|
|
||||||
%%| fig-cap: "Sample sorting process through identification and screening"
|
|
||||||
%%| fig-width: 6
|
|
||||||
flowchart TD;
|
|
||||||
search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;
|
|
||||||
search_prev["Records identified through other sources (n={nr_snowballing_raw})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];
|
|
||||||
|
|
||||||
starting_sample -- "Duplicate removal ({nr_out_duplicates+nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={FULL_SAMPLE_DUPLICATES_REMOVED})"];
|
|
||||||
|
|
||||||
dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title})"];
|
|
||||||
|
|
||||||
title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract})"];
|
|
||||||
|
|
||||||
abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language})"];
|
|
||||||
|
|
||||||
language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done})"];
|
|
||||||
{t3}
|
|
||||||
""")
|
|
15
02-data/processed/prisma.mmd
Normal file
15
02-data/processed/prisma.mmd
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
flowchart TD;
|
||||||
|
search_db["Records identified through database searching (n=1749)"] --> starting_sample;
|
||||||
|
search_prev["Records identified through other sources (n=2240)"] --> starting_sample["Starting sample (n=3989)"];
|
||||||
|
|
||||||
|
starting_sample -- "Duplicate removal (267 removed) "--> dedup["Records after duplicates removed (n=3723)"];
|
||||||
|
|
||||||
|
dedup -- "Title screening (1779 excluded)" --> title_screened["Records after titles screened (n=1944)"];
|
||||||
|
|
||||||
|
title_screened -- "Abstract screening (1506 excluded)"--> abstract_screened["Records after abstracts screened (n=438)"];
|
||||||
|
|
||||||
|
abstract_screened -- " Language screening (2 excluded) "--> language_screened["Records after language screened (n=436)"];
|
||||||
|
|
||||||
|
language_screened -- " Full-text screening (383 excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n=52)"];
|
||||||
|
|
3
Makefile
3
Makefile
|
@ -8,6 +8,9 @@ render: scoping_review.qmd
|
||||||
|
|
||||||
extract: scoping_review.qmd
|
extract: scoping_review.qmd
|
||||||
poetry extract
|
poetry extract
|
||||||
|
poetry prisma
|
||||||
|
|
||||||
release: scoping_review.qmd
|
release: scoping_review.qmd
|
||||||
|
poetry extract
|
||||||
|
poetry prisma
|
||||||
poetry milestone
|
poetry milestone
|
||||||
|
|
19
article.qmd
19
article.qmd
|
@ -136,20 +136,23 @@ they will in turn be crawled for cited sources in a 'snowballing' process.
|
||||||
The sources will be added to the sample to undergo the same screening process explained above,
|
The sources will be added to the sample to undergo the same screening process explained above,
|
||||||
ultimately resulting in the process represented in the PRISMA chart in @fig-prisma.
|
ultimately resulting in the process represented in the PRISMA chart in @fig-prisma.
|
||||||
|
|
||||||
```{python}
|
```{mermaid}
|
||||||
#| label: calculate-scoping-flowchart
|
%%| label: fig-prisma
|
||||||
#| echo: false
|
%%| fig-cap: PRISMA flowchart for scoping process
|
||||||
#| output: asis
|
%%| file: 02-data/processed/prisma.mmd
|
||||||
{{< include 01-codechunks/_prisma-flowchart.py >}}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
||||||
The results to be identified in the matrix include a study's: i) key outcome measures (dependent variables), ii) main findings, iii) main policy interventions (independent variables), iv) study design and sample size, v) dataset and methods of evaluation, vi) direction of relation and level of representativeness, vii) level of statistical significance, viii) main limitations.
|
The results to be identified in the matrix include a study's: i) key outcome measures (dependent variables), ii) main findings, iii) main policy interventions (independent variables), iv) study design and sample size, v) dataset and methods of evaluation, vi) direction of relation and level of representativeness, vii) level of statistical significance, viii) main limitations.
|
||||||
|
|
||||||
The query execution results in an initial sample of `{python} nr_database_query_raw` potential studies identified from the database search as well as `{python} nr_snowballing_raw` potential studies from other sources, leading to a total initial number of `{python} FULL_RAW_SAMPLE_NOTHING_REMOVED`.
|
```{python}
|
||||||
|
from src.model import prisma
|
||||||
|
nr = prisma.PrismaNumbers()
|
||||||
|
```
|
||||||
|
|
||||||
|
The query execution results in an initial sample of `{python} nr.nr_database_query_raw` potential studies identified from the database search as well as `{python} nr.nr_snowballing_raw` potential studies from other sources, leading to a total initial number of `{python} nr.FULL_RAW_SAMPLE_NOTHING_REMOVED`.
|
||||||
This accounts for all identified studies without duplicate removal, without controlling for literature that has been superseded or applying any other screening criteria.
|
This accounts for all identified studies without duplicate removal, without controlling for literature that has been superseded or applying any other screening criteria.
|
||||||
Of these, `{python} FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language` have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review,
|
Of these, `{python} nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract-nr.nr_out_language` have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review,
|
||||||
from which in turn `{python} nr_extraction_done` have ultimately been extracted.
|
|
||||||
|
|
||||||
@fig-intervention-types shows the predominant interventions contained in the reviewed literature.
|
@fig-intervention-types shows the predominant interventions contained in the reviewed literature.
|
||||||
Overall, there is a focus on measures of minimum wage, subsidisation, considerations of trade liberalisation and collective bargaining, education and training.
|
Overall, there is a focus on measures of minimum wage, subsidisation, considerations of trade liberalisation and collective bargaining, education and training.
|
||||||
|
|
|
@ -45,11 +45,15 @@ help = "Extract the csv data from raw yaml files"
|
||||||
shell = """
|
shell = """
|
||||||
python src/extract/raw_to_extracted_csv.py > 02-data/processed/extracted.csv
|
python src/extract/raw_to_extracted_csv.py > 02-data/processed/extracted.csv
|
||||||
"""
|
"""
|
||||||
|
[tool.poe.tasks.prisma]
|
||||||
|
help = "Update PRISMA flowchart numbers"
|
||||||
|
shell = """
|
||||||
|
python src/model/prisma.py > 02-data/processed/prisma.mmd
|
||||||
|
"""
|
||||||
[tool.poe.tasks.milestone]
|
[tool.poe.tasks.milestone]
|
||||||
help = "Extract, render, commit and version a finished artifact"
|
help = "Extract, render, commit and version a finished artifact"
|
||||||
shell = """
|
shell = """
|
||||||
quarto render --output-dir 05-final_paper
|
quarto render --output-dir 05-final_paper
|
||||||
poe extract
|
|
||||||
VERSION="$(poetry version -s minor)"
|
VERSION="$(poetry version -s minor)"
|
||||||
git add pyproject.toml 02-data 05-final_paper
|
git add pyproject.toml 02-data 05-final_paper
|
||||||
git commit -m "Publish version $VERSION" --no-gpg-sign
|
git commit -m "Publish version $VERSION" --no-gpg-sign
|
||||||
|
|
|
@ -363,12 +363,12 @@ Last, for extraction, studies are screened for their full-texts, irrelevant stud
|
||||||
Should any literature reviews be identified as relevant during this screening process,
|
Should any literature reviews be identified as relevant during this screening process,
|
||||||
they will in turn be crawled for cited sources in a 'snowballing' process,
|
they will in turn be crawled for cited sources in a 'snowballing' process,
|
||||||
and the sources will be added to the sample to undergo the same screening process explained above.
|
and the sources will be added to the sample to undergo the same screening process explained above.
|
||||||
|
The resulting process can be seen in @fig-prisma.
|
||||||
|
|
||||||
```{python}
|
```{mermaid}
|
||||||
#| label: calculate-scoping-flowchart
|
%%| label: fig-prisma
|
||||||
#| echo: false
|
%%| fig-cap: PRISMA flowchart for scoping process
|
||||||
#| output: asis
|
%%| file: 02-data/processed/prisma.mmd
|
||||||
{{< include 01-codechunks/_prisma-flowchart.py >}}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
||||||
|
@ -386,10 +386,15 @@ For a full list of validity ranks, see @apptbl-validity-external and @apptbl-val
|
||||||
|
|
||||||
## Data
|
## Data
|
||||||
|
|
||||||
The query execution results in an initial sample of `{python} nr_database_query_raw` potential studies identified from the database search as well as `{python} nr_snowballing_raw` potential studies from other sources, leading to a total initial number of `{python} FULL_RAW_SAMPLE_NOTHING_REMOVED`.
|
```{python}
|
||||||
|
from src.model import prisma
|
||||||
|
nr = prisma.PrismaNumbers()
|
||||||
|
```
|
||||||
|
|
||||||
|
The query execution results in an initial sample of `{python} nr.nr_database_query_raw` potential studies identified from the database search as well as `{python} nr.nr_snowballing_raw` potential studies from other sources, leading to a total initial number of `{python} nr.FULL_RAW_SAMPLE_NOTHING_REMOVED`.
|
||||||
This accounts for all identified studies without duplicate removal, without controlling for literature that has been superseded or applying any other screening criteria.
|
This accounts for all identified studies without duplicate removal, without controlling for literature that has been superseded or applying any other screening criteria.
|
||||||
Of these, `{python} FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language` have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review,
|
Of these, `{python} nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract-nr.nr_out_language` have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review,
|
||||||
from which in turn `{python} nr_extraction_done` have ultimately been extracted.
|
from which in turn `{python} nr.nr_extraction_done` have ultimately been extracted.
|
||||||
|
|
||||||
The currently identified literature rises somewhat in volume over time,
|
The currently identified literature rises somewhat in volume over time,
|
||||||
with first larger outputs identified from 2014,
|
with first larger outputs identified from 2014,
|
||||||
|
|
52
src/model/prisma.py
Normal file
52
src/model/prisma.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
from src.process import add_metadata as meta
|
||||||
|
from src import globals as g
|
||||||
|
|
||||||
|
bib_sample_raw_db = meta.bib_library_from_dir(g.RAW_DATA)
|
||||||
|
bib_sample = meta.bib_library_from_dir(g.WORKING_DATA)
|
||||||
|
|
||||||
|
class PrismaNumbers:
|
||||||
|
nr_database_query_raw = len(bib_sample_raw_db.entries)
|
||||||
|
nr_snowballing_raw = 2240
|
||||||
|
|
||||||
|
all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
|
||||||
|
nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])
|
||||||
|
nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])
|
||||||
|
nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])
|
||||||
|
|
||||||
|
FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw
|
||||||
|
FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded
|
||||||
|
|
||||||
|
NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150
|
||||||
|
NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727
|
||||||
|
NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348
|
||||||
|
|
||||||
|
nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED
|
||||||
|
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL
|
||||||
|
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL
|
||||||
|
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL
|
||||||
|
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
|
||||||
|
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])
|
||||||
|
|
||||||
|
|
||||||
|
del bib_sample, bib_sample_raw_db
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
nr = PrismaNumbers()
|
||||||
|
|
||||||
|
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
|
||||||
|
outp = f"""
|
||||||
|
flowchart TD;
|
||||||
|
search_db["Records identified through database searching (n={nr.nr_database_query_raw})"] --> starting_sample;
|
||||||
|
search_prev["Records identified through other sources (n={nr.nr_snowballing_raw})"] --> starting_sample["Starting sample (n={nr.FULL_RAW_SAMPLE_NOTHING_REMOVED})"];
|
||||||
|
|
||||||
|
starting_sample -- "Duplicate removal ({nr.nr_out_duplicates+nr.nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED})"];
|
||||||
|
|
||||||
|
dedup -- "Title screening ({nr.nr_out_title} excluded)" --> title_screened["Records after titles screened (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED - nr.nr_out_title})"];
|
||||||
|
|
||||||
|
title_screened -- "Abstract screening ({nr.nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract})"];
|
||||||
|
|
||||||
|
abstract_screened -- " Language screening ({nr.nr_out_language} excluded) "--> language_screened["Records after language screened (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract-nr.nr_out_language})"];
|
||||||
|
|
||||||
|
language_screened -- " Full-text screening ({nr.nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr.nr_extraction_done})"];
|
||||||
|
"""
|
||||||
|
print(outp)
|
Loading…
Reference in a new issue