chore(repo): Move references to reference data dir

Begin restructuring data dir by separating out references into their own
data sub-dir containing only references and bibtex files.
This commit is contained in:
Marty Oehme 2024-07-16 15:59:41 +02:00
parent 2a1fc9611f
commit 4f9acd0816
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
177 changed files with 77679 additions and 40 deletions

View file

@ -1,11 +1,12 @@
from pathlib import Path
import os
from pathlib import Path
PROJECT_DIR=Path(os.getenv("QUARTO_PROJECT_DIR", "."))
PROJECT_DIR = Path(os.getenv("QUARTO_PROJECT_DIR", "."))
DATA_DIR=PROJECT_DIR.joinpath("data")
DATA_DIR = PROJECT_DIR.joinpath("data")
RAW_DATA=DATA_DIR.joinpath("raw")
WORKING_DATA=DATA_DIR.joinpath("intermediate")
PROCESSED_DATA=DATA_DIR.joinpath("processed")
SUPPLEMENTARY_DATA=DATA_DIR.joinpath("supplementary")
WORKING_DATA = DATA_DIR.joinpath("intermediate")
PROCESSED_DATA = DATA_DIR.joinpath("processed")
SUPPLEMENTARY_DATA = DATA_DIR.joinpath("supplementary")
REFERENCE_DATA = DATA_DIR.joinpath("references")

View file

@ -1,8 +1,4 @@
from src.process import add_metadata as meta
from src import globals as g
bib_sample_raw_db = meta.bib_library_from_dir(g.RAW_DATA)
bib_sample = meta.bib_library_from_dir(g.WORKING_DATA)
from src.process.generate_dataframes import bib_sample_raw_db, bib_sample
class PrismaNumbers:
nr_database_query_raw = len(bib_sample_raw_db.entries)

View file

@ -31,6 +31,19 @@ def bib_library_from_dir(
return bibtexparser.parse_string(bib_string)
def bib_library_from_file(
file: Path, disable_warnings: bool = True
) -> bibtexparser.Library:
with open(file) as f:
bib_string = f.read()
if disable_warnings:
cm = all_logging_disabled
else:
cm = nullcontext
with cm():
return bibtexparser.parse_string(bib_string)
def bib_metadata_df(sample: bibtexparser.Library) -> pd.DataFrame:
"""Returns dataframe with relevant metadata extracted from a bibtex library.

View file

@ -11,9 +11,9 @@ import src.globals as g
from src.process import add_metadata as meta
# raw database-search results
bib_sample_raw_db = meta.bib_library_from_dir(g.RAW_DATA)
bib_sample_raw_db = meta.bib_library_from_dir(g.REFERENCE_DATA)
# the complete library of sampled (and working) literature
bib_sample = meta.bib_library_from_dir(g.WORKING_DATA)
bib_sample = meta.bib_library_from_file(g.REFERENCE_DATA.joinpath("zotero-library.bib"))
# load relevant studies
from src.extract import load_data as load