wow-inequalities/01-codechunks/_prisma-flowchart.py

nr_database_query_raw = len(bib_sample_raw_db.entries)
nr_snowballing_raw = 2240

all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])
nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])
nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])

FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw
FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded

NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150
NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727
NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348

nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])

t3 = "`" * 3
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
# from: https://github.com/quarto-dev/quarto-cli/discussions/6508
print(f"""
```{{mermaid}}
%%| label: fig-prisma
%%| fig-cap: "Sample sorting process through identification and screening"
%%| fig-width: 6
flowchart TD;
    search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;
    search_prev["Records identified through other sources (n={nr_snowballing_raw})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];

    starting_sample -- "Duplicate removal ({nr_out_duplicates+nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={FULL_SAMPLE_DUPLICATES_REMOVED})"];

    dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title})"];

    title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract})"];

    abstract_screened -- "  Language screening ({nr_out_language} excluded)  "--> language_screened["Records after language screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language})"];

    language_screened -- "  Full-text screening ({nr_out_fulltext} excluded)  "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done})"];
{t3}
""")
feat(script): Move big code chunks out of script 2024-07-14 18:18:57 +00:00			`nr_database_query_raw = len(bib_sample_raw_db.entries)`
			`nr_snowballing_raw = 2240`

			`all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]`
			`nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])`
			`nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])`
			`nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])`

			`FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw`
			`FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded`

			`NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150`
			`NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727`
			`NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348`

			`nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED`
			`nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL`
			`nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL`
			`nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL`
			`nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])`
			`nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])`

			t3 = "`" * 3
			`# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts`
			`# from: https://github.com/quarto-dev/quarto-cli/discussions/6508`
			`print(f"""`
			```{{mermaid}}
			`%%\| label: fig-prisma`
			`%%\| fig-cap: "Sample sorting process through identification and screening"`
			`%%\| fig-width: 6`
			`flowchart TD;`
			`search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;`
			`search_prev["Records identified through other sources (n={nr_snowballing_raw})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];`

			`starting_sample -- "Duplicate removal ({nr_out_duplicates+nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={FULL_SAMPLE_DUPLICATES_REMOVED})"];`

			`dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title})"];`

			`title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract})"];`

			`abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language})"];`

			`language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done})"];`
			`{t3}`
			`""")`