From 0d723dbfdf60fe8f0c48fd7146e754fcd80a67ea Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 16 Jul 2024 17:47:20 +0200 Subject: [PATCH] chore(code): Rename prisma calculation variables Renamed intermediate calculation vars from long and redundant names to slightly shorter and more coherent versions. --- manuscript/article.qmd | 17 +++++++-- manuscript/scoping_review.qmd | 18 +++++++--- src/model/prisma.py | 65 ++++++++++++++++++++++------------- 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/manuscript/article.qmd b/manuscript/article.qmd index 6f91f86..abaeedc 100644 --- a/manuscript/article.qmd +++ b/manuscript/article.qmd @@ -160,12 +160,23 @@ The results to be identified in the matrix include a study's: i) key outcome mea ```{python} from src.model import prisma -nr = prisma.PrismaNumbers() +p = prisma.PrismaNumbers() ``` -The query execution results in an initial sample of `{python} nr.nr_database_query_raw` potential studies identified from the database search as well as `{python} nr.nr_snowballing_raw` potential studies from other sources, leading to a total initial number of `{python} nr.FULL_RAW_SAMPLE_NOTHING_REMOVED`. +The query execution results in an initial sample of +`{python} p.raw_db` +potential studies identified from the database search as well as +`{python} p.raw_snowball` +potential studies from other sources, +leading to a total initial number of +`{python} p.raw_full`. This accounts for all identified studies without duplicate removal, without controlling for literature that has been superseded or applying any other screening criteria. -Of these, `{python} nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract-nr.nr_out_language` have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review, +Of these, +`{python} p.dedup_full - p.out_title - p.out_abstract - p.out_language` +have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review, +from which in turn +`{python} p.final_extracted` +have ultimately been extracted. @fig-intervention-types shows the predominant interventions contained in the reviewed literature. Overall, there is a focus on measures of minimum wage, subsidisation, considerations of trade liberalisation and collective bargaining, education and training. diff --git a/manuscript/scoping_review.qmd b/manuscript/scoping_review.qmd index 08480b2..06d174f 100644 --- a/manuscript/scoping_review.qmd +++ b/manuscript/scoping_review.qmd @@ -403,13 +403,23 @@ For a full list of validity ranks, see @apptbl-validity-external and @apptbl-val ```{python} from src.model import prisma -nr = prisma.PrismaNumbers() +p = prisma.PrismaNumbers() ``` -The query execution results in an initial sample of `{python} nr.nr_database_query_raw` potential studies identified from the database search as well as `{python} nr.nr_snowballing_raw` potential studies from other sources, leading to a total initial number of `{python} nr.FULL_RAW_SAMPLE_NOTHING_REMOVED`. +The query execution results in an initial sample of +`{python} p.raw_db` +potential studies identified from the database search as well as +`{python} p.raw_snowball` +potential studies from other sources, +leading to a total initial number of +`{python} p.raw_full`. This accounts for all identified studies without duplicate removal, without controlling for literature that has been superseded or applying any other screening criteria. -Of these, `{python} nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract-nr.nr_out_language` have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review, -from which in turn `{python} nr.nr_extraction_done` have ultimately been extracted. +Of these, +`{python} p.dedup_full - p.out_title - p.out_abstract - p.out_language` +have been identified as potentially relevant studies for the purposes of this scoping review and selected for a full text review, +from which in turn +`{python} p.final_extracted` +have ultimately been extracted. The currently identified literature rises somewhat in volume over time, with first larger outputs identified from 2014, diff --git a/src/model/prisma.py b/src/model/prisma.py index 01d35f2..fe29532 100644 --- a/src/model/prisma.py +++ b/src/model/prisma.py @@ -1,48 +1,67 @@ -from src.process.generate_dataframes import bib_sample_raw_db, bib_sample +from src.process.generate_dataframes import bib_sample, bib_sample_raw_db + class PrismaNumbers: - nr_database_query_raw = len(bib_sample_raw_db.entries) - nr_snowballing_raw = 2240 + raw_db = len(bib_sample_raw_db.entries) + raw_snowball = 2240 - all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()] - nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw]) - nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw]) - nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw]) + # list of all keywords (semicolon-delimited string) for each entry in sample + all_kw = [ + entry["keywords"] + for entry in bib_sample.entries + if "keywords" in entry.fields_dict.keys() + ] - FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw - FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded + # calculate deduplicated and superseded amounts + dedup_db = len([1 for kw in all_kw if "sample::database" in kw]) + dedup_snowball = len([1 for kw in all_kw if "sample::snowballing" in kw]) + out_superseded = len([1 for kw in all_kw if "out::superseded" in kw]) + raw_full = raw_db + raw_snowball + dedup_full = dedup_db + dedup_snowball + out_superseded + + # additional non-captured numbers NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150 NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727 NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348 - nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL - nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL - nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL - nr_out_language = len([1 for kw in all_keywords if "out::language" in kw]) - nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw]) + out_duplicates = raw_full - dedup_full + out_title = ( + len([1 for kw in all_kw if "out::title" in kw]) + + NON_ZOTERO_CAPTURE_TITLE_REMOVAL + ) + out_abstract = ( + len([1 for kw in all_kw if "out::abstract" in kw]) + + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL + ) + out_fulltext = ( + len([1 for kw in all_kw if "out::full-text" in kw]) + + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL + ) + out_language = len([1 for kw in all_kw if "out::language" in kw]) + + final_extracted = len([1 for kw in all_kw if "done::extracted" in kw]) del bib_sample, bib_sample_raw_db if __name__ == "__main__": - nr = PrismaNumbers() + prisma = PrismaNumbers() # FIXME use data/supplementary undeduplciated counts to get database starting and snowballing counts outp = f""" flowchart TD; - search_db["Records identified through database searching (n={nr.nr_database_query_raw})"] --> starting_sample; - search_prev["Records identified through other sources (n={nr.nr_snowballing_raw})"] --> starting_sample["Starting sample (n={nr.FULL_RAW_SAMPLE_NOTHING_REMOVED})"]; + search_db["Records identified through database searching (n={prisma.raw_db})"] --> starting_sample; + search_prev["Records identified through other sources (n={prisma.raw_snowball})"] --> starting_sample["Starting sample (n={prisma.raw_full})"]; - starting_sample -- "Duplicate removal ({nr.nr_out_duplicates+nr.nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED})"]; + starting_sample -- "Duplicate removal ({prisma.out_duplicates+prisma.out_superseded} removed) "--> dedup["Records after duplicates removed (n={prisma.dedup_full})"]; - dedup -- "Title screening ({nr.nr_out_title} excluded)" --> title_screened["Records after titles screened (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED - nr.nr_out_title})"]; + dedup -- "Title screening ({prisma.out_title} excluded)" --> title_screened["Records after titles screened (n={prisma.dedup_full - prisma.out_title})"]; - title_screened -- "Abstract screening ({nr.nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract})"]; + title_screened -- "Abstract screening ({prisma.out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={prisma.dedup_full-prisma.out_title-prisma.out_abstract})"]; - abstract_screened -- " Language screening ({nr.nr_out_language} excluded) "--> language_screened["Records after language screened (n={nr.FULL_SAMPLE_DUPLICATES_REMOVED-nr.nr_out_title-nr.nr_out_abstract-nr.nr_out_language})"]; + abstract_screened -- " Language screening ({prisma.out_language} excluded) "--> language_screened["Records after language screened (n={prisma.dedup_full-prisma.out_title-prisma.out_abstract-prisma.out_language})"]; - language_screened -- " Full-text screening ({nr.nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr.nr_extraction_done})"]; + language_screened -- " Full-text screening ({prisma.out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={prisma.final_extracted})"]; """ print(outp)