chore(repo): Move yml files to extracted data dir
This commit is contained in:
parent
3ec7dcd1bb
commit
c5df5c01a2
66 changed files with 21 additions and 14 deletions
19
README.md
19
README.md
|
@ -1,18 +1,21 @@
|
||||||
# Scoping Review: Inequalities on the Labour Market
|
# Scoping Review: Inequalities on the Labour Market
|
||||||
|
|
||||||
|
|
||||||
This repository contains all data, modelling and processing source code and the complete textual content to reproduce the scoping review study.
|
This repository contains all data, modelling and processing source code and the complete textual content to reproduce the scoping review study.
|
||||||
The most up-to-date version of this repository can always be found [here](https://git.martyoeh.me/professional/wow-inequalities).
|
The most up-to-date version of this repository can always be found [here](https://git.martyoeh.me/professional/wow-inequalities).
|
||||||
|
|
||||||
Raw, intermediate and processed data can all be found in the `data/` directory:
|
Raw references, extracted and processed data can all be found in the `data/` directory:
|
||||||
Raw data include the unmodified database queries using the scoping review search terms.
|
Reference data include the unmodified database queries using the scoping review search terms,
|
||||||
Intermediate data are made up of the bibtex file produced by Zotero, after tagging and sorting in a Zotero library, ready to be re-imported into the application.
|
and the bibtex file produced by Zotero after tagging and sorting in a Zotero library, ready to be re-imported into the application.
|
||||||
Processed data include the fully extracted studies which make up the main sample for the review.
|
Extracted data include the fully extracted studies which make up the main sample for the review.
|
||||||
|
Processed data are ready to import into a dataframe or visualize in a report.
|
||||||
|
|
||||||
The full article text and code can be found in the `scoping_review.qmd` file.
|
All full texts and visualization code reside in the `manuscripts/` directory.
|
||||||
It makes use of supplementary processing code which resides in the `src/` directory,
|
The full working paper text and code can be found in the `scoping_review.qmd` file.
|
||||||
|
The full article text can be found in the `article.qmd` file.
|
||||||
|
|
||||||
|
They both make use of supplementary extraction and processing code which resides in the `src/` directory,
|
||||||
mainly to load processed data from the `data/` directory and turn it into `.csv` data,
|
mainly to load processed data from the `data/` directory and turn it into `.csv` data,
|
||||||
as well as pre-processing those for visualization and validity ranking within the study.
|
as well as pre-processing for visualization and validity ranking within the study.
|
||||||
|
|
||||||
## Execution and Reproduction
|
## Execution and Reproduction
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ zot_df = pd.DataFrame([
|
||||||
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||||
|
|
||||||
bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}/relevant")
|
bib_df = (load_data.from_yml(f"{g.EXTRACTED_DATA}")
|
||||||
.assign(
|
.assign(
|
||||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||||
|
|
|
@ -9,7 +9,11 @@ try:
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
import yml as yaml # for directly running the package
|
import yml as yaml # for directly running the package
|
||||||
|
|
||||||
DEFAULT_YAML_PATH = Path("data/processed")
|
try:
|
||||||
|
import src.globals as g
|
||||||
|
DEFAULT_YAML_PATH = g.EXTRACTED_DATA
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
DEFAULT_YAML_PATH = Path("data")
|
||||||
|
|
||||||
|
|
||||||
def to_tsv(studies: list[dict]) -> str:
|
def to_tsv(studies: list[dict]) -> str:
|
||||||
|
|
|
@ -5,7 +5,7 @@ PROJECT_DIR = Path(os.getenv("QUARTO_PROJECT_DIR", "."))
|
||||||
|
|
||||||
DATA_DIR = PROJECT_DIR.joinpath("data")
|
DATA_DIR = PROJECT_DIR.joinpath("data")
|
||||||
|
|
||||||
PROCESSED_DATA = DATA_DIR.joinpath("processed")
|
EXTRACTED_DATA = DATA_DIR.joinpath("extracted")
|
||||||
SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary")
|
|
||||||
|
|
||||||
REFERENCE_DATA = DATA_DIR.joinpath("references")
|
REFERENCE_DATA = DATA_DIR.joinpath("references")
|
||||||
|
SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary")
|
||||||
|
PROCESSED_DATA = DATA_DIR.joinpath("processed")
|
||||||
|
|
|
@ -20,7 +20,7 @@ from src.extract import load_data as load
|
||||||
|
|
||||||
# each observation in a single dataframe
|
# each observation in a single dataframe
|
||||||
df = meta.observations_with_metadata_df(
|
df = meta.observations_with_metadata_df(
|
||||||
raw_observations = load.from_yml(g.PROCESSED_DATA),
|
raw_observations = load.from_yml(g.EXTRACTED_DATA),
|
||||||
study_metadata = meta.bib_metadata_df(bib_sample),
|
study_metadata = meta.bib_metadata_df(bib_sample),
|
||||||
country_groups = meta.country_groups_df(Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
country_groups = meta.country_groups_df(Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue