feat(script): Move big code chunks out of script
This commit is contained in:
parent
76578e99d3
commit
ed6c8550b6
4 changed files with 88 additions and 173 deletions
37
01-codechunks/_prep-data.py
Normal file
37
01-codechunks/_prep-data.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
## standard imports
|
||||||
|
from IPython.core.display import Markdown as md
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from tabulate import tabulate
|
||||||
|
import bibtexparser
|
||||||
|
|
||||||
|
sns.set_style("whitegrid")
|
||||||
|
|
||||||
|
DATA_DIR=Path("./02-data")
|
||||||
|
RAW_DATA=DATA_DIR.joinpath("raw")
|
||||||
|
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
||||||
|
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
||||||
|
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
||||||
|
|
||||||
|
from src import prep_data
|
||||||
|
|
||||||
|
# raw database-search results
|
||||||
|
bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA)
|
||||||
|
# the complete library of sampled (and working) literature
|
||||||
|
bib_sample = prep_data.bib_library_from_dir(WORKING_DATA)
|
||||||
|
|
||||||
|
# load relevant studies
|
||||||
|
from src import load_data
|
||||||
|
|
||||||
|
bib_df = prep_data.observations_with_metadata_df(
|
||||||
|
raw_observations = load_data.from_yml(PROCESSED_DATA),
|
||||||
|
study_metadata = prep_data.bib_metadata_df(bib_sample),
|
||||||
|
country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
||||||
|
)
|
||||||
|
raw_observations = None
|
||||||
|
zot_df = None
|
||||||
|
df_country_groups = None
|
45
01-codechunks/_prisma-flowchart.py
Normal file
45
01-codechunks/_prisma-flowchart.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
nr_database_query_raw = len(bib_sample_raw_db.entries)
|
||||||
|
nr_snowballing_raw = 2240
|
||||||
|
|
||||||
|
all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
|
||||||
|
nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])
|
||||||
|
nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])
|
||||||
|
nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])
|
||||||
|
|
||||||
|
FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw
|
||||||
|
FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded
|
||||||
|
|
||||||
|
NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150
|
||||||
|
NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727
|
||||||
|
NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348
|
||||||
|
|
||||||
|
nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED
|
||||||
|
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL
|
||||||
|
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL
|
||||||
|
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL
|
||||||
|
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
|
||||||
|
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])
|
||||||
|
|
||||||
|
t3 = "`" * 3
|
||||||
|
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
|
||||||
|
# from: https://github.com/quarto-dev/quarto-cli/discussions/6508
|
||||||
|
print(f"""
|
||||||
|
```{{mermaid}}
|
||||||
|
%%| label: fig-prisma
|
||||||
|
%%| fig-cap: "Sample sorting process through identification and screening"
|
||||||
|
%%| fig-width: 6
|
||||||
|
flowchart TD;
|
||||||
|
search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;
|
||||||
|
search_prev["Records identified through other sources (n={nr_snowballing_raw})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];
|
||||||
|
|
||||||
|
starting_sample -- "Duplicate removal ({nr_out_duplicates+nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={FULL_SAMPLE_DUPLICATES_REMOVED})"];
|
||||||
|
|
||||||
|
dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title})"];
|
||||||
|
|
||||||
|
title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract})"];
|
||||||
|
|
||||||
|
abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language})"];
|
||||||
|
|
||||||
|
language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done})"];
|
||||||
|
{t3}
|
||||||
|
""")
|
90
article.qmd
90
article.qmd
|
@ -38,51 +38,16 @@ crossref: # to fix the appendix crossrefs being separate from main
|
||||||
latex-list-of-description: Appendix B Table
|
latex-list-of-description: Appendix B Table
|
||||||
---
|
---
|
||||||
|
|
||||||
{{< portrait >}}
|
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
#| label: load-data
|
#| label: load-data
|
||||||
#| echo: false
|
#| echo: false
|
||||||
#| output: false
|
#| output: false
|
||||||
from pathlib import Path
|
{{< include 01-codechunks/_prep-data.py >}}
|
||||||
import re
|
|
||||||
## standard imports
|
|
||||||
from IPython.core.display import Markdown as md
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from matplotlib import pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
from tabulate import tabulate
|
|
||||||
import bibtexparser
|
|
||||||
|
|
||||||
sns.set_style("whitegrid")
|
|
||||||
|
|
||||||
DATA_DIR=Path("./02-data")
|
|
||||||
RAW_DATA=DATA_DIR.joinpath("raw")
|
|
||||||
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
|
||||||
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
|
||||||
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
|
||||||
|
|
||||||
from src import prep_data
|
|
||||||
|
|
||||||
# raw database-search results
|
|
||||||
bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA)
|
|
||||||
# the complete library of sampled (and working) literature
|
|
||||||
bib_sample = prep_data.bib_library_from_dir(WORKING_DATA)
|
|
||||||
|
|
||||||
# load relevant studies
|
|
||||||
from src import load_data
|
|
||||||
|
|
||||||
bib_df = prep_data.observations_with_metadata_df(
|
|
||||||
raw_observations = load_data.from_yml(PROCESSED_DATA),
|
|
||||||
study_metadata = prep_data.bib_metadata_df(bib_sample),
|
|
||||||
country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
|
||||||
)
|
|
||||||
raw_observations = None
|
|
||||||
zot_df = None
|
|
||||||
df_country_groups = None
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
{{< portrait >}}
|
||||||
|
|
||||||
# Introduction
|
# Introduction
|
||||||
|
|
||||||
* Context and statement of the problem
|
* Context and statement of the problem
|
||||||
|
@ -124,8 +89,6 @@ with a focus on the narrowing criteria specified in @tbl-inclusion-criteria.
|
||||||
::: {#tbl-inclusion-criteria}
|
::: {#tbl-inclusion-criteria}
|
||||||
|
|
||||||
```{python}
|
```{python}
|
||||||
#| label: tbl-inclusion-criteria
|
|
||||||
|
|
||||||
inclusion_criteria = pd.read_csv("02-data/supplementary/inclusion-criteria.tsv", sep="\t")
|
inclusion_criteria = pd.read_csv("02-data/supplementary/inclusion-criteria.tsv", sep="\t")
|
||||||
md(tabulate(inclusion_criteria, showindex=False, headers="keys", tablefmt="grid"))
|
md(tabulate(inclusion_criteria, showindex=False, headers="keys", tablefmt="grid"))
|
||||||
```
|
```
|
||||||
|
@ -177,52 +140,7 @@ ultimately resulting in the process represented in the PRISMA chart in @fig-pris
|
||||||
#| label: calculate-scoping-flowchart
|
#| label: calculate-scoping-flowchart
|
||||||
#| echo: false
|
#| echo: false
|
||||||
#| output: asis
|
#| output: asis
|
||||||
|
{{< include 01-codechunks/_prisma-flowchart.py >}}
|
||||||
nr_database_query_raw = len(bib_sample_raw_db.entries)
|
|
||||||
nr_snowballing_raw = 2240
|
|
||||||
|
|
||||||
all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
|
|
||||||
nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])
|
|
||||||
nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])
|
|
||||||
nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])
|
|
||||||
|
|
||||||
FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw
|
|
||||||
FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded
|
|
||||||
|
|
||||||
NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150
|
|
||||||
NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727
|
|
||||||
NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348
|
|
||||||
|
|
||||||
nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED
|
|
||||||
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL
|
|
||||||
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL
|
|
||||||
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL
|
|
||||||
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
|
|
||||||
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])
|
|
||||||
|
|
||||||
t3 = "`" * 3
|
|
||||||
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
|
|
||||||
# from: https://github.com/quarto-dev/quarto-cli/discussions/6508
|
|
||||||
print(f"""
|
|
||||||
```{{mermaid}}
|
|
||||||
%%| label: fig-prisma
|
|
||||||
%%| fig-cap: "Sample sorting process through identification and screening"
|
|
||||||
%%| fig-width: 6
|
|
||||||
flowchart TD;
|
|
||||||
search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;
|
|
||||||
search_prev["Records identified through other sources (n={nr_snowballing_raw})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];
|
|
||||||
|
|
||||||
starting_sample -- "Duplicate removal ({nr_out_duplicates+nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={FULL_SAMPLE_DUPLICATES_REMOVED})"];
|
|
||||||
|
|
||||||
dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title})"];
|
|
||||||
|
|
||||||
title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract})"];
|
|
||||||
|
|
||||||
abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language})"];
|
|
||||||
|
|
||||||
language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done})"];
|
|
||||||
{t3}
|
|
||||||
""")
|
|
||||||
```
|
```
|
||||||
|
|
||||||
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
||||||
|
|
|
@ -21,47 +21,7 @@ crossref:
|
||||||
latex-list-of-description: Appendix Table
|
latex-list-of-description: Appendix Table
|
||||||
---
|
---
|
||||||
|
|
||||||
```{python}
|
{{< include 01-codechunks/_prep-data.qmd >}}
|
||||||
#| label: load-data
|
|
||||||
#| echo: false
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
|
||||||
## standard imports
|
|
||||||
from IPython.core.display import Markdown as md
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from matplotlib import pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
from tabulate import tabulate
|
|
||||||
import bibtexparser
|
|
||||||
|
|
||||||
sns.set_style("whitegrid")
|
|
||||||
|
|
||||||
DATA_DIR=Path("./02-data")
|
|
||||||
RAW_DATA=DATA_DIR.joinpath("raw")
|
|
||||||
WORKING_DATA=DATA_DIR.joinpath("intermediate")
|
|
||||||
PROCESSED_DATA=DATA_DIR.joinpath("processed")
|
|
||||||
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
|
|
||||||
|
|
||||||
from src import prep_data
|
|
||||||
|
|
||||||
# raw database-search results
|
|
||||||
bib_sample_raw_db = prep_data.bib_library_from_dir(RAW_DATA)
|
|
||||||
# the complete library of sampled (and working) literature
|
|
||||||
bib_sample = prep_data.bib_library_from_dir(WORKING_DATA)
|
|
||||||
|
|
||||||
# load relevant studies
|
|
||||||
from src import load_data
|
|
||||||
|
|
||||||
bib_df = prep_data.observations_with_metadata_df(
|
|
||||||
raw_observations = load_data.from_yml(PROCESSED_DATA),
|
|
||||||
study_metadata = prep_data.bib_metadata_df(bib_sample),
|
|
||||||
country_groups = prep_data.country_groups_df(Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
|
||||||
)
|
|
||||||
raw_observations = None
|
|
||||||
zot_df = None
|
|
||||||
df_country_groups = None
|
|
||||||
```
|
|
||||||
|
|
||||||
<!-- pagebreak to separate from TOC -->
|
<!-- pagebreak to separate from TOC -->
|
||||||
{{< pagebreak >}}
|
{{< pagebreak >}}
|
||||||
|
@ -415,52 +375,7 @@ and the sources will be added to the sample to undergo the same screening proces
|
||||||
#| label: calculate-scoping-flowchart
|
#| label: calculate-scoping-flowchart
|
||||||
#| echo: false
|
#| echo: false
|
||||||
#| output: asis
|
#| output: asis
|
||||||
|
{{< include 01-codechunks/_prisma-flowchart.py >}}
|
||||||
nr_database_query_raw = len(bib_sample_raw_db.entries)
|
|
||||||
nr_snowballing_raw = 2240
|
|
||||||
|
|
||||||
all_keywords = [entry["keywords"] for entry in bib_sample.entries if "keywords" in entry.fields_dict.keys()]
|
|
||||||
nr_database_deduplicated = len([1 for kw in all_keywords if "sample::database" in kw])
|
|
||||||
nr_snowballing_deduplicated = len([1 for kw in all_keywords if "sample::snowballing" in kw])
|
|
||||||
nr_out_superseded = len([1 for kw in all_keywords if "out::superseded" in kw])
|
|
||||||
|
|
||||||
FULL_RAW_SAMPLE_NOTHING_REMOVED = nr_database_query_raw + nr_snowballing_raw
|
|
||||||
FULL_SAMPLE_DUPLICATES_REMOVED = nr_database_deduplicated + nr_snowballing_deduplicated + nr_out_superseded
|
|
||||||
|
|
||||||
NON_ZOTERO_CAPTURE_TITLE_REMOVAL = 1150
|
|
||||||
NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL = 727
|
|
||||||
NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL = 348
|
|
||||||
|
|
||||||
nr_out_duplicates = FULL_RAW_SAMPLE_NOTHING_REMOVED - FULL_SAMPLE_DUPLICATES_REMOVED
|
|
||||||
nr_out_title = len([1 for kw in all_keywords if "out::title" in kw]) + NON_ZOTERO_CAPTURE_TITLE_REMOVAL
|
|
||||||
nr_out_abstract = len([1 for kw in all_keywords if "out::abstract" in kw]) + NON_ZOTERO_CAPTURE_ABSTRACT_REMOVAL
|
|
||||||
nr_out_fulltext = len([1 for kw in all_keywords if "out::full-text" in kw]) + NON_ZOTERO_CAPTURE_FULLTEXT_REMOVAL
|
|
||||||
nr_out_language = len([1 for kw in all_keywords if "out::language" in kw])
|
|
||||||
nr_extraction_done = len([1 for kw in all_keywords if "done::extracted" in kw])
|
|
||||||
|
|
||||||
t3 = "`" * 3
|
|
||||||
# FIXME use 02-data/supplementary undeduplciated counts to get database starting and snowballing counts
|
|
||||||
# from: https://github.com/quarto-dev/quarto-cli/discussions/6508
|
|
||||||
print(f"""
|
|
||||||
```{{mermaid}}
|
|
||||||
%%| label: fig-prisma
|
|
||||||
%%| fig-cap: "Sample sorting process through identification and screening"
|
|
||||||
%%| fig-width: 6
|
|
||||||
flowchart TD;
|
|
||||||
search_db["Records identified through database searching (n={nr_database_query_raw})"] --> starting_sample;
|
|
||||||
search_prev["Records identified through other sources (n={nr_snowballing_raw})"] --> starting_sample["Starting sample (n={FULL_RAW_SAMPLE_NOTHING_REMOVED})"];
|
|
||||||
|
|
||||||
starting_sample -- "Duplicate removal ({nr_out_duplicates+nr_out_superseded} removed) "--> dedup["Records after duplicates removed (n={FULL_SAMPLE_DUPLICATES_REMOVED})"];
|
|
||||||
|
|
||||||
dedup -- "Title screening ({nr_out_title} excluded)" --> title_screened["Records after titles screened (n={FULL_SAMPLE_DUPLICATES_REMOVED - nr_out_title})"];
|
|
||||||
|
|
||||||
title_screened -- "Abstract screening ({nr_out_abstract} excluded)"--> abstract_screened["Records after abstracts screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract})"];
|
|
||||||
|
|
||||||
abstract_screened -- " Language screening ({nr_out_language} excluded) "--> language_screened["Records after language screened (n={FULL_SAMPLE_DUPLICATES_REMOVED-nr_out_title-nr_out_abstract-nr_out_language})"];
|
|
||||||
|
|
||||||
language_screened -- " Full-text screening ({nr_out_fulltext} excluded) "--> full-text_screened["Full-text articles assessed for eligibility (n={nr_extraction_done})"];
|
|
||||||
{t3}
|
|
||||||
""")
|
|
||||||
```
|
```
|
||||||
|
|
||||||
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix.
|
||||||
|
|
Loading…
Reference in a new issue