From c5df5c01a239edb6893098163201727791220145 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 16 Jul 2024 16:26:11 +0200 Subject: [PATCH] chore(repo): Move yml files to extracted data dir --- README.md | 19 +++++++++++-------- .../relevant => extracted}/Adam2018.yml | 0 .../relevant => extracted}/Adams2015.yml | 0 .../relevant => extracted}/Ahumada2023.yml | 0 .../relevant => extracted}/Al-Mamun2014.yml | 0 .../relevant => extracted}/Alexiou2023.yml | 0 .../relevant => extracted}/Alinaghi2020.yml | 0 .../relevant => extracted}/Bailey2012.yml | 0 .../relevant => extracted}/Bartha2020.yml | 0 .../relevant => extracted}/Blumenberg2014.yml | 0 .../relevant => extracted}/Broadway2020.yml | 0 .../Cardinaleschi2019.yml | 0 .../relevant => extracted}/Carstens2018.yml | 0 .../relevant => extracted}/Chao2022.yml | 0 .../relevant => extracted}/Cieplinski2021.yml | 0 .../relevant => extracted}/Clark2019.yml | 0 .../relevant => extracted}/Coutinho2006.yml | 0 .../relevant => extracted}/Davies2022.yml | 0 .../relevant => extracted}/Debowicz2014.yml | 0 .../relevant => extracted}/Delesalle2021.yml | 0 .../relevant => extracted}/Dieckhoff2015.yml | 0 .../relevant => extracted}/Dustmann2012.yml | 0 .../relevant => extracted}/Emigh2018.yml | 0 .../relevant => extracted}/Ferguson2015.yml | 0 .../relevant => extracted}/Field2019.yml | 0 .../relevant => extracted}/Gates2000.yml | 0 .../relevant => extracted}/Gilbert2001.yml | 0 .../relevant => extracted}/Go2010.yml | 0 .../relevant => extracted}/Hardoy2015.yml | 0 .../relevant => extracted}/Hojman2019.yml | 0 .../relevant => extracted}/Khan2021.yml | 0 .../relevant => extracted}/Kuriyama2021.yml | 0 .../relevant => extracted}/Li2022.yml | 0 .../Liyanaarachchi2016.yml | 0 .../relevant => extracted}/Militaru2019.yml | 0 .../Mukhopadhaya2003.yml | 0 .../relevant => extracted}/Mun2018.yml | 0 .../relevant => extracted}/Pi2016.yml | 0 .../relevant => extracted}/Poppen2017.yml | 0 .../relevant => extracted}/Rendall2013.yml | 0 .../relevant => extracted}/Rosen2014.yml | 0 .../Shepherd-Banigan2021.yml | 0 .../relevant => extracted}/Shin2006.yml | 0 .../SilveiraNeto2011.yml | 0 .../relevant => extracted}/Sotomayor2021.yml | 0 .../relevant => extracted}/Standing2015.yml | 0 .../relevant => extracted}/Stock2021.yml | 0 .../relevant => extracted}/Suh2017.yml | 0 .../relevant => extracted}/Thoresen2021.yml | 0 .../relevant => extracted}/Wang2016.yml | 0 .../relevant => extracted}/Wang2020.yml | 0 .../relevant => extracted}/Whitworth2021.yml | 0 .../relevant => extracted}/Wong2019.yml | 0 .../relevant => extracted}/Xu2021.yml | 0 .../_AlbujaEcheverria2021.DISABLED} | 0 .../_Clibborn2022.DISABLED} | 0 .../_Dumas2018.DISABLED} | 0 .../_Eckardt2022.DISABLED} | 0 .../_Elveren2013.DISABLED} | 0 .../_Mhando2020.DISABLED} | 0 .../_Saleh2018.DISABLED} | 0 .../_deGeus2022.DISABLED} | 0 manuscript/meeting_eoy.qmd | 2 +- src/extract/load_data.py | 6 +++++- src/globals.py | 6 +++--- src/process/generate_dataframes.py | 2 +- 66 files changed, 21 insertions(+), 14 deletions(-) rename data/{processed/relevant => extracted}/Adam2018.yml (100%) rename data/{processed/relevant => extracted}/Adams2015.yml (100%) rename data/{processed/relevant => extracted}/Ahumada2023.yml (100%) rename data/{processed/relevant => extracted}/Al-Mamun2014.yml (100%) rename data/{processed/relevant => extracted}/Alexiou2023.yml (100%) rename data/{processed/relevant => extracted}/Alinaghi2020.yml (100%) rename data/{processed/relevant => extracted}/Bailey2012.yml (100%) rename data/{processed/relevant => extracted}/Bartha2020.yml (100%) rename data/{processed/relevant => extracted}/Blumenberg2014.yml (100%) rename data/{processed/relevant => extracted}/Broadway2020.yml (100%) rename data/{processed/relevant => extracted}/Cardinaleschi2019.yml (100%) rename data/{processed/relevant => extracted}/Carstens2018.yml (100%) rename data/{processed/relevant => extracted}/Chao2022.yml (100%) rename data/{processed/relevant => extracted}/Cieplinski2021.yml (100%) rename data/{processed/relevant => extracted}/Clark2019.yml (100%) rename data/{processed/relevant => extracted}/Coutinho2006.yml (100%) rename data/{processed/relevant => extracted}/Davies2022.yml (100%) rename data/{processed/relevant => extracted}/Debowicz2014.yml (100%) rename data/{processed/relevant => extracted}/Delesalle2021.yml (100%) rename data/{processed/relevant => extracted}/Dieckhoff2015.yml (100%) rename data/{processed/relevant => extracted}/Dustmann2012.yml (100%) rename data/{processed/relevant => extracted}/Emigh2018.yml (100%) rename data/{processed/relevant => extracted}/Ferguson2015.yml (100%) rename data/{processed/relevant => extracted}/Field2019.yml (100%) rename data/{processed/relevant => extracted}/Gates2000.yml (100%) rename data/{processed/relevant => extracted}/Gilbert2001.yml (100%) rename data/{processed/relevant => extracted}/Go2010.yml (100%) rename data/{processed/relevant => extracted}/Hardoy2015.yml (100%) rename data/{processed/relevant => extracted}/Hojman2019.yml (100%) rename data/{processed/relevant => extracted}/Khan2021.yml (100%) rename data/{processed/relevant => extracted}/Kuriyama2021.yml (100%) rename data/{processed/relevant => extracted}/Li2022.yml (100%) rename data/{processed/relevant => extracted}/Liyanaarachchi2016.yml (100%) rename data/{processed/relevant => extracted}/Militaru2019.yml (100%) rename data/{processed/relevant => extracted}/Mukhopadhaya2003.yml (100%) rename data/{processed/relevant => extracted}/Mun2018.yml (100%) rename data/{processed/relevant => extracted}/Pi2016.yml (100%) rename data/{processed/relevant => extracted}/Poppen2017.yml (100%) rename data/{processed/relevant => extracted}/Rendall2013.yml (100%) rename data/{processed/relevant => extracted}/Rosen2014.yml (100%) rename data/{processed/relevant => extracted}/Shepherd-Banigan2021.yml (100%) rename data/{processed/relevant => extracted}/Shin2006.yml (100%) rename data/{processed/relevant => extracted}/SilveiraNeto2011.yml (100%) rename data/{processed/relevant => extracted}/Sotomayor2021.yml (100%) rename data/{processed/relevant => extracted}/Standing2015.yml (100%) rename data/{processed/relevant => extracted}/Stock2021.yml (100%) rename data/{processed/relevant => extracted}/Suh2017.yml (100%) rename data/{processed/relevant => extracted}/Thoresen2021.yml (100%) rename data/{processed/relevant => extracted}/Wang2016.yml (100%) rename data/{processed/relevant => extracted}/Wang2020.yml (100%) rename data/{processed/relevant => extracted}/Whitworth2021.yml (100%) rename data/{processed/relevant => extracted}/Wong2019.yml (100%) rename data/{processed/relevant => extracted}/Xu2021.yml (100%) rename data/{processed/irrelevant/AlbujaEcheverria2021.DISABLED => extracted/_AlbujaEcheverria2021.DISABLED} (100%) rename data/{processed/irrelevant/Clibborn2022.DISABLED => extracted/_Clibborn2022.DISABLED} (100%) rename data/{processed/irrelevant/Dumas2018.DISABLED => extracted/_Dumas2018.DISABLED} (100%) rename data/{processed/irrelevant/Eckardt2022.yml.DISABLED => extracted/_Eckardt2022.DISABLED} (100%) rename data/{processed/irrelevant/Elveren2013.DISABLED => extracted/_Elveren2013.DISABLED} (100%) rename data/{processed/irrelevant/Mhando2020.DISABLED => extracted/_Mhando2020.DISABLED} (100%) rename data/{processed/irrelevant/Saleh2018.DISABLED => extracted/_Saleh2018.DISABLED} (100%) rename data/{processed/irrelevant/deGeus2022.DISABLED => extracted/_deGeus2022.DISABLED} (100%) diff --git a/README.md b/README.md index 24d2ff9..fbf9a15 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,21 @@ # Scoping Review: Inequalities on the Labour Market - This repository contains all data, modelling and processing source code and the complete textual content to reproduce the scoping review study. The most up-to-date version of this repository can always be found [here](https://git.martyoeh.me/professional/wow-inequalities). -Raw, intermediate and processed data can all be found in the `data/` directory: -Raw data include the unmodified database queries using the scoping review search terms. -Intermediate data are made up of the bibtex file produced by Zotero, after tagging and sorting in a Zotero library, ready to be re-imported into the application. -Processed data include the fully extracted studies which make up the main sample for the review. +Raw references, extracted and processed data can all be found in the `data/` directory: +Reference data include the unmodified database queries using the scoping review search terms, +and the bibtex file produced by Zotero after tagging and sorting in a Zotero library, ready to be re-imported into the application. +Extracted data include the fully extracted studies which make up the main sample for the review. +Processed data are ready to import into a dataframe or visualize in a report. -The full article text and code can be found in the `scoping_review.qmd` file. -It makes use of supplementary processing code which resides in the `src/` directory, +All full texts and visualization code reside in the `manuscripts/` directory. +The full working paper text and code can be found in the `scoping_review.qmd` file. +The full article text can be found in the `article.qmd` file. + +They both make use of supplementary extraction and processing code which resides in the `src/` directory, mainly to load processed data from the `data/` directory and turn it into `.csv` data, -as well as pre-processing those for visualization and validity ranking within the study. +as well as pre-processing for visualization and validity ranking within the study. ## Execution and Reproduction diff --git a/data/processed/relevant/Adam2018.yml b/data/extracted/Adam2018.yml similarity index 100% rename from data/processed/relevant/Adam2018.yml rename to data/extracted/Adam2018.yml diff --git a/data/processed/relevant/Adams2015.yml b/data/extracted/Adams2015.yml similarity index 100% rename from data/processed/relevant/Adams2015.yml rename to data/extracted/Adams2015.yml diff --git a/data/processed/relevant/Ahumada2023.yml b/data/extracted/Ahumada2023.yml similarity index 100% rename from data/processed/relevant/Ahumada2023.yml rename to data/extracted/Ahumada2023.yml diff --git a/data/processed/relevant/Al-Mamun2014.yml b/data/extracted/Al-Mamun2014.yml similarity index 100% rename from data/processed/relevant/Al-Mamun2014.yml rename to data/extracted/Al-Mamun2014.yml diff --git a/data/processed/relevant/Alexiou2023.yml b/data/extracted/Alexiou2023.yml similarity index 100% rename from data/processed/relevant/Alexiou2023.yml rename to data/extracted/Alexiou2023.yml diff --git a/data/processed/relevant/Alinaghi2020.yml b/data/extracted/Alinaghi2020.yml similarity index 100% rename from data/processed/relevant/Alinaghi2020.yml rename to data/extracted/Alinaghi2020.yml diff --git a/data/processed/relevant/Bailey2012.yml b/data/extracted/Bailey2012.yml similarity index 100% rename from data/processed/relevant/Bailey2012.yml rename to data/extracted/Bailey2012.yml diff --git a/data/processed/relevant/Bartha2020.yml b/data/extracted/Bartha2020.yml similarity index 100% rename from data/processed/relevant/Bartha2020.yml rename to data/extracted/Bartha2020.yml diff --git a/data/processed/relevant/Blumenberg2014.yml b/data/extracted/Blumenberg2014.yml similarity index 100% rename from data/processed/relevant/Blumenberg2014.yml rename to data/extracted/Blumenberg2014.yml diff --git a/data/processed/relevant/Broadway2020.yml b/data/extracted/Broadway2020.yml similarity index 100% rename from data/processed/relevant/Broadway2020.yml rename to data/extracted/Broadway2020.yml diff --git a/data/processed/relevant/Cardinaleschi2019.yml b/data/extracted/Cardinaleschi2019.yml similarity index 100% rename from data/processed/relevant/Cardinaleschi2019.yml rename to data/extracted/Cardinaleschi2019.yml diff --git a/data/processed/relevant/Carstens2018.yml b/data/extracted/Carstens2018.yml similarity index 100% rename from data/processed/relevant/Carstens2018.yml rename to data/extracted/Carstens2018.yml diff --git a/data/processed/relevant/Chao2022.yml b/data/extracted/Chao2022.yml similarity index 100% rename from data/processed/relevant/Chao2022.yml rename to data/extracted/Chao2022.yml diff --git a/data/processed/relevant/Cieplinski2021.yml b/data/extracted/Cieplinski2021.yml similarity index 100% rename from data/processed/relevant/Cieplinski2021.yml rename to data/extracted/Cieplinski2021.yml diff --git a/data/processed/relevant/Clark2019.yml b/data/extracted/Clark2019.yml similarity index 100% rename from data/processed/relevant/Clark2019.yml rename to data/extracted/Clark2019.yml diff --git a/data/processed/relevant/Coutinho2006.yml b/data/extracted/Coutinho2006.yml similarity index 100% rename from data/processed/relevant/Coutinho2006.yml rename to data/extracted/Coutinho2006.yml diff --git a/data/processed/relevant/Davies2022.yml b/data/extracted/Davies2022.yml similarity index 100% rename from data/processed/relevant/Davies2022.yml rename to data/extracted/Davies2022.yml diff --git a/data/processed/relevant/Debowicz2014.yml b/data/extracted/Debowicz2014.yml similarity index 100% rename from data/processed/relevant/Debowicz2014.yml rename to data/extracted/Debowicz2014.yml diff --git a/data/processed/relevant/Delesalle2021.yml b/data/extracted/Delesalle2021.yml similarity index 100% rename from data/processed/relevant/Delesalle2021.yml rename to data/extracted/Delesalle2021.yml diff --git a/data/processed/relevant/Dieckhoff2015.yml b/data/extracted/Dieckhoff2015.yml similarity index 100% rename from data/processed/relevant/Dieckhoff2015.yml rename to data/extracted/Dieckhoff2015.yml diff --git a/data/processed/relevant/Dustmann2012.yml b/data/extracted/Dustmann2012.yml similarity index 100% rename from data/processed/relevant/Dustmann2012.yml rename to data/extracted/Dustmann2012.yml diff --git a/data/processed/relevant/Emigh2018.yml b/data/extracted/Emigh2018.yml similarity index 100% rename from data/processed/relevant/Emigh2018.yml rename to data/extracted/Emigh2018.yml diff --git a/data/processed/relevant/Ferguson2015.yml b/data/extracted/Ferguson2015.yml similarity index 100% rename from data/processed/relevant/Ferguson2015.yml rename to data/extracted/Ferguson2015.yml diff --git a/data/processed/relevant/Field2019.yml b/data/extracted/Field2019.yml similarity index 100% rename from data/processed/relevant/Field2019.yml rename to data/extracted/Field2019.yml diff --git a/data/processed/relevant/Gates2000.yml b/data/extracted/Gates2000.yml similarity index 100% rename from data/processed/relevant/Gates2000.yml rename to data/extracted/Gates2000.yml diff --git a/data/processed/relevant/Gilbert2001.yml b/data/extracted/Gilbert2001.yml similarity index 100% rename from data/processed/relevant/Gilbert2001.yml rename to data/extracted/Gilbert2001.yml diff --git a/data/processed/relevant/Go2010.yml b/data/extracted/Go2010.yml similarity index 100% rename from data/processed/relevant/Go2010.yml rename to data/extracted/Go2010.yml diff --git a/data/processed/relevant/Hardoy2015.yml b/data/extracted/Hardoy2015.yml similarity index 100% rename from data/processed/relevant/Hardoy2015.yml rename to data/extracted/Hardoy2015.yml diff --git a/data/processed/relevant/Hojman2019.yml b/data/extracted/Hojman2019.yml similarity index 100% rename from data/processed/relevant/Hojman2019.yml rename to data/extracted/Hojman2019.yml diff --git a/data/processed/relevant/Khan2021.yml b/data/extracted/Khan2021.yml similarity index 100% rename from data/processed/relevant/Khan2021.yml rename to data/extracted/Khan2021.yml diff --git a/data/processed/relevant/Kuriyama2021.yml b/data/extracted/Kuriyama2021.yml similarity index 100% rename from data/processed/relevant/Kuriyama2021.yml rename to data/extracted/Kuriyama2021.yml diff --git a/data/processed/relevant/Li2022.yml b/data/extracted/Li2022.yml similarity index 100% rename from data/processed/relevant/Li2022.yml rename to data/extracted/Li2022.yml diff --git a/data/processed/relevant/Liyanaarachchi2016.yml b/data/extracted/Liyanaarachchi2016.yml similarity index 100% rename from data/processed/relevant/Liyanaarachchi2016.yml rename to data/extracted/Liyanaarachchi2016.yml diff --git a/data/processed/relevant/Militaru2019.yml b/data/extracted/Militaru2019.yml similarity index 100% rename from data/processed/relevant/Militaru2019.yml rename to data/extracted/Militaru2019.yml diff --git a/data/processed/relevant/Mukhopadhaya2003.yml b/data/extracted/Mukhopadhaya2003.yml similarity index 100% rename from data/processed/relevant/Mukhopadhaya2003.yml rename to data/extracted/Mukhopadhaya2003.yml diff --git a/data/processed/relevant/Mun2018.yml b/data/extracted/Mun2018.yml similarity index 100% rename from data/processed/relevant/Mun2018.yml rename to data/extracted/Mun2018.yml diff --git a/data/processed/relevant/Pi2016.yml b/data/extracted/Pi2016.yml similarity index 100% rename from data/processed/relevant/Pi2016.yml rename to data/extracted/Pi2016.yml diff --git a/data/processed/relevant/Poppen2017.yml b/data/extracted/Poppen2017.yml similarity index 100% rename from data/processed/relevant/Poppen2017.yml rename to data/extracted/Poppen2017.yml diff --git a/data/processed/relevant/Rendall2013.yml b/data/extracted/Rendall2013.yml similarity index 100% rename from data/processed/relevant/Rendall2013.yml rename to data/extracted/Rendall2013.yml diff --git a/data/processed/relevant/Rosen2014.yml b/data/extracted/Rosen2014.yml similarity index 100% rename from data/processed/relevant/Rosen2014.yml rename to data/extracted/Rosen2014.yml diff --git a/data/processed/relevant/Shepherd-Banigan2021.yml b/data/extracted/Shepherd-Banigan2021.yml similarity index 100% rename from data/processed/relevant/Shepherd-Banigan2021.yml rename to data/extracted/Shepherd-Banigan2021.yml diff --git a/data/processed/relevant/Shin2006.yml b/data/extracted/Shin2006.yml similarity index 100% rename from data/processed/relevant/Shin2006.yml rename to data/extracted/Shin2006.yml diff --git a/data/processed/relevant/SilveiraNeto2011.yml b/data/extracted/SilveiraNeto2011.yml similarity index 100% rename from data/processed/relevant/SilveiraNeto2011.yml rename to data/extracted/SilveiraNeto2011.yml diff --git a/data/processed/relevant/Sotomayor2021.yml b/data/extracted/Sotomayor2021.yml similarity index 100% rename from data/processed/relevant/Sotomayor2021.yml rename to data/extracted/Sotomayor2021.yml diff --git a/data/processed/relevant/Standing2015.yml b/data/extracted/Standing2015.yml similarity index 100% rename from data/processed/relevant/Standing2015.yml rename to data/extracted/Standing2015.yml diff --git a/data/processed/relevant/Stock2021.yml b/data/extracted/Stock2021.yml similarity index 100% rename from data/processed/relevant/Stock2021.yml rename to data/extracted/Stock2021.yml diff --git a/data/processed/relevant/Suh2017.yml b/data/extracted/Suh2017.yml similarity index 100% rename from data/processed/relevant/Suh2017.yml rename to data/extracted/Suh2017.yml diff --git a/data/processed/relevant/Thoresen2021.yml b/data/extracted/Thoresen2021.yml similarity index 100% rename from data/processed/relevant/Thoresen2021.yml rename to data/extracted/Thoresen2021.yml diff --git a/data/processed/relevant/Wang2016.yml b/data/extracted/Wang2016.yml similarity index 100% rename from data/processed/relevant/Wang2016.yml rename to data/extracted/Wang2016.yml diff --git a/data/processed/relevant/Wang2020.yml b/data/extracted/Wang2020.yml similarity index 100% rename from data/processed/relevant/Wang2020.yml rename to data/extracted/Wang2020.yml diff --git a/data/processed/relevant/Whitworth2021.yml b/data/extracted/Whitworth2021.yml similarity index 100% rename from data/processed/relevant/Whitworth2021.yml rename to data/extracted/Whitworth2021.yml diff --git a/data/processed/relevant/Wong2019.yml b/data/extracted/Wong2019.yml similarity index 100% rename from data/processed/relevant/Wong2019.yml rename to data/extracted/Wong2019.yml diff --git a/data/processed/relevant/Xu2021.yml b/data/extracted/Xu2021.yml similarity index 100% rename from data/processed/relevant/Xu2021.yml rename to data/extracted/Xu2021.yml diff --git a/data/processed/irrelevant/AlbujaEcheverria2021.DISABLED b/data/extracted/_AlbujaEcheverria2021.DISABLED similarity index 100% rename from data/processed/irrelevant/AlbujaEcheverria2021.DISABLED rename to data/extracted/_AlbujaEcheverria2021.DISABLED diff --git a/data/processed/irrelevant/Clibborn2022.DISABLED b/data/extracted/_Clibborn2022.DISABLED similarity index 100% rename from data/processed/irrelevant/Clibborn2022.DISABLED rename to data/extracted/_Clibborn2022.DISABLED diff --git a/data/processed/irrelevant/Dumas2018.DISABLED b/data/extracted/_Dumas2018.DISABLED similarity index 100% rename from data/processed/irrelevant/Dumas2018.DISABLED rename to data/extracted/_Dumas2018.DISABLED diff --git a/data/processed/irrelevant/Eckardt2022.yml.DISABLED b/data/extracted/_Eckardt2022.DISABLED similarity index 100% rename from data/processed/irrelevant/Eckardt2022.yml.DISABLED rename to data/extracted/_Eckardt2022.DISABLED diff --git a/data/processed/irrelevant/Elveren2013.DISABLED b/data/extracted/_Elveren2013.DISABLED similarity index 100% rename from data/processed/irrelevant/Elveren2013.DISABLED rename to data/extracted/_Elveren2013.DISABLED diff --git a/data/processed/irrelevant/Mhando2020.DISABLED b/data/extracted/_Mhando2020.DISABLED similarity index 100% rename from data/processed/irrelevant/Mhando2020.DISABLED rename to data/extracted/_Mhando2020.DISABLED diff --git a/data/processed/irrelevant/Saleh2018.DISABLED b/data/extracted/_Saleh2018.DISABLED similarity index 100% rename from data/processed/irrelevant/Saleh2018.DISABLED rename to data/extracted/_Saleh2018.DISABLED diff --git a/data/processed/irrelevant/deGeus2022.DISABLED b/data/extracted/_deGeus2022.DISABLED similarity index 100% rename from data/processed/irrelevant/deGeus2022.DISABLED rename to data/extracted/_deGeus2022.DISABLED diff --git a/manuscript/meeting_eoy.qmd b/manuscript/meeting_eoy.qmd index 6fe1a5e..09c11e7 100644 --- a/manuscript/meeting_eoy.qmd +++ b/manuscript/meeting_eoy.qmd @@ -52,7 +52,7 @@ zot_df = pd.DataFrame([ WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve() df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy") -bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}/relevant") +bib_df = (load_data.from_yml(f"{g.EXTRACTED_DATA}") .assign( doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False), zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]), diff --git a/src/extract/load_data.py b/src/extract/load_data.py index 82d791e..bf5a0f4 100644 --- a/src/extract/load_data.py +++ b/src/extract/load_data.py @@ -9,7 +9,11 @@ try: except ModuleNotFoundError: import yml as yaml # for directly running the package -DEFAULT_YAML_PATH = Path("data/processed") +try: + import src.globals as g + DEFAULT_YAML_PATH = g.EXTRACTED_DATA +except ModuleNotFoundError: + DEFAULT_YAML_PATH = Path("data") def to_tsv(studies: list[dict]) -> str: diff --git a/src/globals.py b/src/globals.py index df379a4..d56a9ea 100644 --- a/src/globals.py +++ b/src/globals.py @@ -5,7 +5,7 @@ PROJECT_DIR = Path(os.getenv("QUARTO_PROJECT_DIR", ".")) DATA_DIR = PROJECT_DIR.joinpath("data") -PROCESSED_DATA = DATA_DIR.joinpath("processed") -SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary") - +EXTRACTED_DATA = DATA_DIR.joinpath("extracted") REFERENCE_DATA = DATA_DIR.joinpath("references") +SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary") +PROCESSED_DATA = DATA_DIR.joinpath("processed") diff --git a/src/process/generate_dataframes.py b/src/process/generate_dataframes.py index 929bb16..d2af7b6 100644 --- a/src/process/generate_dataframes.py +++ b/src/process/generate_dataframes.py @@ -20,7 +20,7 @@ from src.extract import load_data as load # each observation in a single dataframe df = meta.observations_with_metadata_df( - raw_observations = load.from_yml(g.PROCESSED_DATA), + raw_observations = load.from_yml(g.EXTRACTED_DATA), study_metadata = meta.bib_metadata_df(bib_sample), country_groups = meta.country_groups_df(Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")), )