diff --git a/scoping_review.qmd b/scoping_review.qmd index 8d0a2e7..48c21ee 100644 --- a/scoping_review.qmd +++ b/scoping_review.qmd @@ -28,6 +28,8 @@ zotero: ```{python} #| echo: false from pathlib import Path + +from bibtexparser.model import Field DATA_DIR=Path("./02-data") RAW_SAMPLE=DATA_DIR.joinpath("raw") WORKING_SAMPLE=DATA_DIR.joinpath("intermediate") @@ -616,10 +618,19 @@ Keeping in mind that these results are not yet screened for their full relevance #| label: fig-publications-per-year #| fig-cap: Publications per year reformatted = [] -for e in sample_raw.entries: - reformatted.append([e["Year"], e["Author"], e["Title"], e["Type"], e["Times-Cited"], e["Usage-Count-Since-2013"]]) +for e in sample_relevant: + ed = e.fields_dict + reformatted.append([ed.get("year", "0000").value, + ed.get("author").value, + ed.get("title").value, + ed.get("type", Field(key="type", value=None)).value, + ed.get("times-cited", Field(key="times-cited", value=None)).value, + ed.get("usage-count-since-2013", Field(key="usage-count-since-2013", value=None)).value, + ]) +# FIXME do not just drop missing values bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage"]) -bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="%Y") +bib_df = bib_df.dropna(how="any") +bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="mixed") bib_df["Year"] = bib_df["Date"].dt.year # only keep newer entries