chore(repo): Move yml files to extracted data dir
This commit is contained in:
parent
3ec7dcd1bb
commit
c5df5c01a2
66 changed files with 21 additions and 14 deletions
19
README.md
19
README.md
|
@ -1,18 +1,21 @@
|
|||
# Scoping Review: Inequalities on the Labour Market
|
||||
|
||||
|
||||
This repository contains all data, modelling and processing source code and the complete textual content to reproduce the scoping review study.
|
||||
The most up-to-date version of this repository can always be found [here](https://git.martyoeh.me/professional/wow-inequalities).
|
||||
|
||||
Raw, intermediate and processed data can all be found in the `data/` directory:
|
||||
Raw data include the unmodified database queries using the scoping review search terms.
|
||||
Intermediate data are made up of the bibtex file produced by Zotero, after tagging and sorting in a Zotero library, ready to be re-imported into the application.
|
||||
Processed data include the fully extracted studies which make up the main sample for the review.
|
||||
Raw references, extracted and processed data can all be found in the `data/` directory:
|
||||
Reference data include the unmodified database queries using the scoping review search terms,
|
||||
and the bibtex file produced by Zotero after tagging and sorting in a Zotero library, ready to be re-imported into the application.
|
||||
Extracted data include the fully extracted studies which make up the main sample for the review.
|
||||
Processed data are ready to import into a dataframe or visualize in a report.
|
||||
|
||||
The full article text and code can be found in the `scoping_review.qmd` file.
|
||||
It makes use of supplementary processing code which resides in the `src/` directory,
|
||||
All full texts and visualization code reside in the `manuscripts/` directory.
|
||||
The full working paper text and code can be found in the `scoping_review.qmd` file.
|
||||
The full article text can be found in the `article.qmd` file.
|
||||
|
||||
They both make use of supplementary extraction and processing code which resides in the `src/` directory,
|
||||
mainly to load processed data from the `data/` directory and turn it into `.csv` data,
|
||||
as well as pre-processing those for visualization and validity ranking within the study.
|
||||
as well as pre-processing for visualization and validity ranking within the study.
|
||||
|
||||
## Execution and Reproduction
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ zot_df = pd.DataFrame([
|
|||
WB_COUNTRY_GROUPS_FILE = Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx").resolve()
|
||||
df_country_groups = pd.read_excel(WB_COUNTRY_GROUPS_FILE).set_index("Economy")
|
||||
|
||||
bib_df = (load_data.from_yml(f"{g.PROCESSED_DATA}/relevant")
|
||||
bib_df = (load_data.from_yml(f"{g.EXTRACTED_DATA}")
|
||||
.assign(
|
||||
doi=lambda _df: _df["uri"].str.extract(r"https?://(?:dx\.)?doi\.org/(.*)", expand=False),
|
||||
zot_cited=lambda _df: _df["doi"].map(zot_df["cited"]),
|
||||
|
|
|
@ -9,7 +9,11 @@ try:
|
|||
except ModuleNotFoundError:
|
||||
import yml as yaml # for directly running the package
|
||||
|
||||
DEFAULT_YAML_PATH = Path("data/processed")
|
||||
try:
|
||||
import src.globals as g
|
||||
DEFAULT_YAML_PATH = g.EXTRACTED_DATA
|
||||
except ModuleNotFoundError:
|
||||
DEFAULT_YAML_PATH = Path("data")
|
||||
|
||||
|
||||
def to_tsv(studies: list[dict]) -> str:
|
||||
|
|
|
@ -5,7 +5,7 @@ PROJECT_DIR = Path(os.getenv("QUARTO_PROJECT_DIR", "."))
|
|||
|
||||
DATA_DIR = PROJECT_DIR.joinpath("data")
|
||||
|
||||
PROCESSED_DATA = DATA_DIR.joinpath("processed")
|
||||
SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary")
|
||||
|
||||
EXTRACTED_DATA = DATA_DIR.joinpath("extracted")
|
||||
REFERENCE_DATA = DATA_DIR.joinpath("references")
|
||||
SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary")
|
||||
PROCESSED_DATA = DATA_DIR.joinpath("processed")
|
||||
|
|
|
@ -20,7 +20,7 @@ from src.extract import load_data as load
|
|||
|
||||
# each observation in a single dataframe
|
||||
df = meta.observations_with_metadata_df(
|
||||
raw_observations = load.from_yml(g.PROCESSED_DATA),
|
||||
raw_observations = load.from_yml(g.EXTRACTED_DATA),
|
||||
study_metadata = meta.bib_metadata_df(bib_sample),
|
||||
country_groups = meta.country_groups_df(Path(f"{g.SUPPLEMENTARY_DATA}/wb-country-groupings.xlsx")),
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue