Marty Oehme
95ad5ed641
To not double-commit every library change, we simply export ALL of the zotero library into a single file in the 'intermediate' data directory. Technically this still works just as well since it still reflects our 'intermediate' stage of tagging, screening, keywording the library contents. It just contains the non-sampled contents as well now.
117 lines
2.7 KiB
Text
117 lines
2.7 KiB
Text
---
|
|
bibliography: ../02-data/intermediate/zotero-library.bib
|
|
csl: /home/marty/documents/library/utilities/styles/APA-7.csl
|
|
papersize: A4
|
|
linestretch: 1.5
|
|
fontfamily: lmodern
|
|
fontsize: "12"
|
|
geometry:
|
|
- left=2.2cm
|
|
- right=3.5cm
|
|
- top=2.5cm
|
|
- bottom=2.5cm
|
|
toc: false
|
|
link-citations: true
|
|
link-bibliography: true
|
|
number-sections: false
|
|
lang: en
|
|
title: Scoping review on 'what works'
|
|
subtitle: Addressing inequalities in the World of Work
|
|
---
|
|
|
|
```{python}
|
|
#| echo: false
|
|
from pathlib import Path
|
|
data_dir=Path("../02-data")
|
|
|
|
## standard imports
|
|
from IPython.core.display import Markdown as md
|
|
import numpy as np
|
|
import pandas as pd
|
|
from matplotlib import pyplot as plt
|
|
import seaborn as sns
|
|
from tabulate import tabulate
|
|
```
|
|
|
|
```{python}
|
|
sns.set_style("whitegrid")
|
|
```
|
|
|
|
```{python}
|
|
#| echo: false
|
|
# load and parse overall bibtex sample
|
|
import bibtexparser
|
|
|
|
bib_string=""
|
|
print(f"path: {data_dir.joinpath('raw/01_wos-sample_2023-11-02').absolute()}")
|
|
for partial_bib in data_dir.joinpath("raw/01_wos-sample_2023-11-02").glob("*.bib"):
|
|
with open(partial_bib) as f:
|
|
bib_string+="\n".join(f.readlines())
|
|
sample = bibtexparser.parse_string(bib_string)
|
|
```
|
|
|
|
## Description of results
|
|
|
|
```{python}
|
|
#| echo: false
|
|
|
|
sample_size = len(sample.entries)
|
|
md(f"""
|
|
The exploratory execution of queries results in an initial sample of {sample_size} studies after the identification process.
|
|
""")
|
|
```
|
|
|
|
yrs:
|
|
|
|
```{python}
|
|
reformatted = []
|
|
for e in sample.entries:
|
|
reformatted.append([e["Year"], e["Author"], e["Title"], e["Type"], e["Times-Cited"], e["Usage-Count-Since-2013"]])
|
|
bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage"])
|
|
bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="%Y")
|
|
bib_df["Year"] = bib_df["Date"].dt.year
|
|
bib_df
|
|
```
|
|
|
|
```{python}
|
|
# RESTRICT FOR NEWER STUDIES
|
|
bib_df = bib_df[bib_df["Year"] >= 2000]
|
|
```
|
|
|
|
Publications per year:
|
|
|
|
```{python}
|
|
ax = sns.countplot(bib_df[bib_df["Year"] >= 2000], x="Year")
|
|
ax.tick_params(axis='x', rotation=45)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
```
|
|
|
|
By type:
|
|
|
|
```{python}
|
|
bib_df["Type"].value_counts()
|
|
bib_df["Literature"] = np.where(bib_df["Type"].str.contains("article", case=False, regex=False), "white", "gray")
|
|
bib_df["Literature"] = bib_df["Literature"].astype("category")
|
|
```
|
|
|
|
Per type:
|
|
|
|
```{python}
|
|
ax = sns.countplot(bib_df[bib_df["Year"] >= 2000], x="Year", hue="Literature")
|
|
ax.tick_params(axis='x', rotation=45)
|
|
# ax.set_xlabel("")
|
|
plt.tight_layout()
|
|
plt.show()
|
|
```
|
|
|
|
Avg num of citations:
|
|
|
|
```{python}
|
|
bib_df["Cited"] = bib_df["Cited"].astype("int")
|
|
grpd = bib_df.groupby(["Year"], as_index=False)["Cited"].mean()
|
|
ax = sns.barplot(grpd, x="Year", y="Cited")
|
|
ax.tick_params(axis='x', rotation=45)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
```
|