From 9fd4a3c791495f5271a975cca40418930e786625 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Mon, 15 Jul 2024 22:14:03 +0200 Subject: [PATCH] chore(repo): Move manuscripts to separate dir Both the manuscripts for the working paper and the article are now collected in a separate manuscripts folder. --- _quarto-article.yml | 2 +- _quarto-workingpaper.yml | 8 ++++---- article.qmd => manuscript/article.qmd | 12 ++++++------ meeting_eoy.qmd => manuscript/meeting_eoy.qmd | 18 +++++++----------- notes.qmd => manuscript/notes.qmd | 13 ++++--------- .../presentation_summary.md | 0 .../scoping_review.qmd | 17 +++++++++-------- 7 files changed, 31 insertions(+), 39 deletions(-) rename article.qmd => manuscript/article.qmd (99%) rename meeting_eoy.qmd => manuscript/meeting_eoy.qmd (96%) rename notes.qmd => manuscript/notes.qmd (99%) rename presentation_summary.md => manuscript/presentation_summary.md (100%) rename scoping_review.qmd => manuscript/scoping_review.qmd (99%) diff --git a/_quarto-article.yml b/_quarto-article.yml index 8a59168..1672c3a 100644 --- a/_quarto-article.yml +++ b/_quarto-article.yml @@ -1,7 +1,7 @@ project: type: default render: - - article.qmd + - manuscript/article.qmd format: elsevier-html: diff --git a/_quarto-workingpaper.yml b/_quarto-workingpaper.yml index d5fc77e..74e5a75 100644 --- a/_quarto-workingpaper.yml +++ b/_quarto-workingpaper.yml @@ -1,9 +1,9 @@ project: render: - - presentation_summary.md - - notes.qmd - - meeting_eoy.qmd - - scoping_review.qmd + - manuscript/presentation_summary.md + - manuscript/notes.qmd + - manuscript/meeting_eoy.qmd + - manuscript/scoping_review.qmd toc: true format: diff --git a/article.qmd b/manuscript/article.qmd similarity index 99% rename from article.qmd rename to manuscript/article.qmd index 6032a1a..6f91f86 100644 --- a/article.qmd +++ b/manuscript/article.qmd @@ -102,7 +102,7 @@ with a focus on the narrowing criteria specified in @tbl-inclusion-criteria. ::: {#tbl-inclusion-criteria} ```{python} -inclusion_criteria = pd.read_csv("data/supplementary/inclusion-criteria.tsv", sep="\t") +inclusion_criteria = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/inclusion-criteria.tsv", sep="\t") Markdown(tabulate(inclusion_criteria, showindex=False, headers="keys", tablefmt="grid")) ``` @@ -152,7 +152,7 @@ ultimately resulting in the process represented in the PRISMA chart in @fig-pris ```{mermaid} %%| label: fig-prisma %%| fig-cap: PRISMA flowchart for scoping process -%%| file: data/processed/prisma.mmd +%%| file: ../data/processed/prisma.mmd ``` All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix. @@ -225,7 +225,7 @@ def strength_for(val): ] -findings_institutional = pd.read_csv("data/supplementary/findings-institutional.csv") +findings_institutional = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/findings-institutional.csv") outp = Markdown( tabulate( @@ -695,7 +695,7 @@ Another reason could be the actual implementation of different policy programmes ::: {#appatbl-wow-terms} ```{python} -terms_wow = pd.read_csv("data/supplementary/terms_wow.csv") +terms_wow = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/terms_wow.csv") Markdown(tabulate(terms_wow.fillna(""), showindex=False, headers="keys", tablefmt="grid")) ``` @@ -706,7 +706,7 @@ World of work term cluster ::: {#appatbl-intervention-terms} ```{python} -terms_policy = pd.read_csv("data/supplementary/terms_policy.csv") +terms_policy = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/terms_policy.csv") # different headers to include 'social norms' headers = ["General", "Institutional", "Structural", "Agency & social norms"] Markdown(tabulate(terms_policy.fillna(""), showindex=False, headers=headers, tablefmt="grid")) @@ -719,7 +719,7 @@ Policy intervention term cluster ::: {#appatbl-inequality-terms} ```{python} -terms_inequality = pd.read_csv("data/supplementary/terms_inequality.csv") +terms_inequality = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/terms_inequality.csv") Markdown(tabulate(terms_inequality.fillna(""), showindex=False, headers="keys", tablefmt="grid")) ``` diff --git a/meeting_eoy.qmd b/manuscript/meeting_eoy.qmd similarity index 96% rename from meeting_eoy.qmd rename to manuscript/meeting_eoy.qmd index 4292023..650606e 100644 --- a/meeting_eoy.qmd +++ b/manuscript/meeting_eoy.qmd @@ -1,5 +1,5 @@ --- -bibliography: data/intermediate/zotero-library.bib +bibliography: ../data/intermediate/zotero-library.bib csl: /home/marty/documents/library/utilities/styles/APA-7.csl papersize: A4 linestretch: 1.5 @@ -30,26 +30,22 @@ import bibtexparser sns.set_style("whitegrid") -DATA_DIR=Path("./data") -RAW_DATA=DATA_DIR.joinpath("raw") -WORKING_DATA=DATA_DIR.joinpath("intermediate") -PROCESSED_DATA=DATA_DIR.joinpath("processed") -SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary") +from src import globals as g bib_string="" -for partial_bib in RAW_DATA.glob("**/*.bib"): +for partial_bib in g.RAW_DATA.glob("**/*.bib"): with open(partial_bib) as f: bib_string+="\n".join(f.readlines()) bib_sample_raw_db = bibtexparser.parse_string(bib_string) bib_string="" -for partial_bib in WORKING_DATA.glob("**/*.bib"): +for partial_bib in g.WORKING_DATA.glob("**/*.bib"): with open(partial_bib) as f: bib_string+="\n".join(f.readlines()) bib_sample = bibtexparser.parse_string(bib_string) # load relevant studies -from src import load_data +from src.extract import load_data # load zotero-based metadata: citations and uses zot_df = pd.DataFrame([ @@ -63,10 +59,10 @@ zot_df = pd.DataFrame([ ], columns = ["doi", "cited", "usage", "keywords"]).drop_duplicates("doi").set_index("doi") # Add WB country grouping definitions (income group, world region) -WB_COUNTRY_GROUPS_FILE = Path(f"{SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() +WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy") -bib_df = (load_data.from_yml(f"{PROCESSED_DATA}/relevant") +bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}/relevant") .assign( doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False), zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]), diff --git a/notes.qmd b/manuscript/notes.qmd similarity index 99% rename from notes.qmd rename to manuscript/notes.qmd index 8492eee..873bee4 100644 --- a/notes.qmd +++ b/manuscript/notes.qmd @@ -1,5 +1,5 @@ --- -bibliography: data/intermediate/zotero-library.bib +bibliography: ../data/intermediate/zotero-library.bib csl: /home/marty/documents/library/utilities/styles/APA-7.csl papersize: A4 linestretch: 1.5 @@ -21,12 +21,7 @@ subtitle: Conceptual Definitions and Key Terms ```{python} #| echo: false -from pathlib import Path -DATA_DIR=Path("./data") -RAW_DATA=DATA_DIR.joinpath("raw") -WORKING_DATA=DATA_DIR.joinpath("intermediate") -PROCESSED_DATA=DATA_DIR.joinpath("processed") -SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary") +from src import globals as g ## standard imports from IPython.core.display import Markdown as md @@ -396,7 +391,7 @@ Policy *areas*, identified by @ILO2022b: #| label: tbl-inclusion-criteria #| tbl-cap: Study inclusion and exclusion scoping criteria {#tbl-inclusion-criteria} -inclusion_criteria = pd.read_csv("data/supplementary/inclusion-criteria.tsv", sep="\t") +inclusion_criteria = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/inclusion-criteria.tsv", sep="\t") md(tabulate(inclusion_criteria, showindex=False, headers="keys", tablefmt="grid")) ``` @@ -861,7 +856,7 @@ from @Pinto2021: ```{python} #| echo: false #| output: asis -with open(f"{SUPPLEMENTARY_DATA}/query.txt") as f: +with open(f"{g.SUPPLEMENTARY_DATA}/query.txt") as f: query = f.read() t3 = "`" * 3 diff --git a/presentation_summary.md b/manuscript/presentation_summary.md similarity index 100% rename from presentation_summary.md rename to manuscript/presentation_summary.md diff --git a/scoping_review.qmd b/manuscript/scoping_review.qmd similarity index 99% rename from scoping_review.qmd rename to manuscript/scoping_review.qmd index b26fbd6..08480b2 100644 --- a/scoping_review.qmd +++ b/manuscript/scoping_review.qmd @@ -26,6 +26,7 @@ crossref: #| echo: false #| output: false import src.globals as g +import re from IPython.display import display, Markdown, HTML import numpy as np import pandas as pd @@ -303,7 +304,7 @@ with the search query requiring a term from the general column and one other col ```{python} #| label: tbl-wow-terms #| tbl-cap: World of work term cluster -terms_wow = pd.read_csv("data/supplementary/terms_wow.csv") +terms_wow = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/terms_wow.csv") Markdown(tabulate(terms_wow.fillna(""), showindex=False, headers="keys", tablefmt="grid")) ``` @@ -319,7 +320,7 @@ For the database query, a single term from the general category is required to b ```{python} #| label: tbl-intervention-terms #| tbl-cap: Policy intervention term cluster -terms_policy = pd.read_csv("data/supplementary/terms_policy.csv") +terms_policy = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/terms_policy.csv") # different headers to include 'social norms' headers = ["General", "Institutional", "Structural", "Agency & social norms"] Markdown(tabulate(terms_policy.fillna(""), showindex=False, headers=headers, tablefmt="grid")) @@ -331,7 +332,7 @@ as seen in @tbl-inequality-terms. ```{python} #| label: tbl-inequality-terms #| tbl-cap: Inequality term cluster -terms_inequality = pd.read_csv("data/supplementary/terms_inequality.csv") +terms_inequality = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/terms_inequality.csv") Markdown(tabulate(terms_inequality.fillna(""), showindex=False, headers="keys", tablefmt="grid")) ``` @@ -355,7 +356,7 @@ with a focus on the narrowing criteria specified in @tbl-inclusion-criteria. ```{python} #| label: inclusion-criteria -inclusion_criteria = pd.read_csv("data/supplementary/inclusion-criteria.tsv", sep="\t") +inclusion_criteria = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/inclusion-criteria.tsv", sep="\t") Markdown(tabulate(inclusion_criteria, showindex=False, headers="keys", tablefmt="grid")) ``` @@ -382,7 +383,7 @@ The resulting process can be seen in @fig-prisma. ```{mermaid} %%| label: fig-prisma %%| fig-cap: PRISMA flowchart for scoping process -%%| file: data/processed/prisma.mmd +%%| file: ../data/processed/prisma.mmd ``` All relevant data concerning both their major findings and statistical significance are then extracted from the individual studies into a collective results matrix. @@ -559,7 +560,7 @@ study_strength_bins = { def strength_for(val): return list(study_strength_bins.keys())[list(study_strength_bins.values()).index(val)] -findings_institutional = pd.read_csv("data/supplementary/findings-institutional.csv") +findings_institutional = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/findings-institutional.csv") fd_df = validity.add_to_findings(findings_institutional, by_intervention, study_strength_bins) Markdown(tabulate(fd_df[["area of policy", "internal_validity", "external_validity", "findings", "channels"]].fillna(""), showindex=False, headers=["area of policy", "internal strength", "external strength", "main findings", "channels"], tablefmt="grid")) @@ -791,7 +792,7 @@ One limitation of the study is the modelling assumption that workers will have t #| label: tbl-findings-structural from src.model import validity -findings_structural = pd.read_csv("data/supplementary/findings-structural.csv") +findings_structural = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/findings-structural.csv") fd_df = validity.add_to_findings(findings_structural, by_intervention, study_strength_bins) Markdown(tabulate(fd_df[["area of policy", "internal_validity", "external_validity", "findings", "channels"]].fillna(""), showindex=False, headers=["area of policy", "internal strength", "external strength", "main findings", "channels"], tablefmt="grid")) @@ -1028,7 +1029,7 @@ Though the intervention clearly aims at strengthening some aspect of individual #| label: tbl-findings-agency from src.model import validity -findings_agency = pd.read_csv("data/supplementary/findings-agency.csv") +findings_agency = pd.read_csv(f"{g.SUPPLEMENTARY_DATA}/findings-agency.csv") fd_df = validity.add_to_findings(findings_agency, by_intervention, study_strength_bins) Markdown(tabulate(fd_df[["area of policy", "internal_validity", "external_validity", "findings", "channels"]].fillna(""), showindex=False, headers=["area of policy", "internal strength", "external strength", "main findings", "channels"], tablefmt="grid"))