fix(script): Correctly import relevant bibtex entries

This commit is contained in:
Marty Oehme 2023-12-06 16:42:32 +01:00
parent 1b660f9621
commit 7a3ed53ea1
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A

View file

@ -28,6 +28,8 @@ zotero:
```{python}
#| echo: false
from pathlib import Path
from bibtexparser.model import Field
DATA_DIR=Path("./02-data")
RAW_SAMPLE=DATA_DIR.joinpath("raw")
WORKING_SAMPLE=DATA_DIR.joinpath("intermediate")
@ -616,10 +618,19 @@ Keeping in mind that these results are not yet screened for their full relevance
#| label: fig-publications-per-year
#| fig-cap: Publications per year
reformatted = []
for e in sample_raw.entries:
reformatted.append([e["Year"], e["Author"], e["Title"], e["Type"], e["Times-Cited"], e["Usage-Count-Since-2013"]])
for e in sample_relevant:
ed = e.fields_dict
reformatted.append([ed.get("year", "0000").value,
ed.get("author").value,
ed.get("title").value,
ed.get("type", Field(key="type", value=None)).value,
ed.get("times-cited", Field(key="times-cited", value=None)).value,
ed.get("usage-count-since-2013", Field(key="usage-count-since-2013", value=None)).value,
])
# FIXME do not just drop missing values
bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage"])
bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="%Y")
bib_df = bib_df.dropna(how="any")
bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="mixed")
bib_df["Year"] = bib_df["Date"].dt.year
# only keep newer entries