Marty Oehme
4f9acd0816
Begin restructuring data dir by separating out references into their own data sub-dir containing only references and bibtex files.
119 lines
2.7 KiB
Text
119 lines
2.7 KiB
Text
---
|
|
bibliography: ../data/intermediate/zotero-library.bib
|
|
csl: /home/marty/documents/library/utilities/styles/APA-7.csl
|
|
papersize: A4
|
|
linestretch: 1.5
|
|
fontfamily: lmodern
|
|
fontsize: "12"
|
|
geometry:
|
|
- left=2.2cm
|
|
- right=3.5cm
|
|
- top=2.5cm
|
|
- bottom=2.5cm
|
|
toc: false
|
|
link-citations: true
|
|
link-bibliography: true
|
|
number-sections: false
|
|
lang: en
|
|
title: Scoping review on 'what works'
|
|
subtitle: Addressing inequalities in the World of Work
|
|
---
|
|
|
|
```{python}
|
|
#| echo: false
|
|
from pathlib import Path
|
|
import src.globals as g
|
|
data_dir = g.DATA_DIR
|
|
|
|
## standard imports
|
|
from IPython.core.display import Markdown as md
|
|
import numpy as np
|
|
import pandas as pd
|
|
from matplotlib import pyplot as plt
|
|
import seaborn as sns
|
|
from tabulate import tabulate
|
|
```
|
|
|
|
```{python}
|
|
sns.set_style("whitegrid")
|
|
```
|
|
|
|
```{python}
|
|
#| echo: false
|
|
# load and parse overall bibtex sample
|
|
import bibtexparser
|
|
|
|
bib_string=""
|
|
sample_dir = g.REFERENCE_DATA.joinpath("01_wos-sample_2023-11-02")
|
|
print(f"path: {sample_dir.absolute()}")
|
|
for partial_bib in sample_dir.glob("*.bib"):
|
|
with open(partial_bib) as f:
|
|
bib_string+="\n".join(f.readlines())
|
|
sample = bibtexparser.parse_string(bib_string)
|
|
```
|
|
|
|
## Description of results
|
|
|
|
```{python}
|
|
#| echo: false
|
|
|
|
sample_size = len(sample.entries)
|
|
md(f"""
|
|
The exploratory execution of queries results in an initial sample of {sample_size} studies after the identification process.
|
|
""")
|
|
```
|
|
|
|
yrs:
|
|
|
|
```{python}
|
|
reformatted = []
|
|
for e in sample.entries:
|
|
reformatted.append([e["Year"], e["Author"], e["Title"], e["Type"], e["Times-Cited"], e["Usage-Count-Since-2013"]])
|
|
bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage"])
|
|
bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="%Y")
|
|
bib_df["Year"] = bib_df["Date"].dt.year
|
|
bib_df
|
|
```
|
|
|
|
```{python}
|
|
# RESTRICT FOR NEWER STUDIES
|
|
bib_df = bib_df[bib_df["Year"] >= 2000]
|
|
```
|
|
|
|
Publications per year:
|
|
|
|
```{python}
|
|
ax = sns.countplot(bib_df[bib_df["Year"] >= 2000], x="Year")
|
|
ax.tick_params(axis='x', rotation=45)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
```
|
|
|
|
By type:
|
|
|
|
```{python}
|
|
bib_df["Type"].value_counts()
|
|
bib_df["Literature"] = np.where(bib_df["Type"].str.contains("article", case=False, regex=False), "white", "gray")
|
|
bib_df["Literature"] = bib_df["Literature"].astype("category")
|
|
```
|
|
|
|
Per type:
|
|
|
|
```{python}
|
|
ax = sns.countplot(bib_df[bib_df["Year"] >= 2000], x="Year", hue="Literature")
|
|
ax.tick_params(axis='x', rotation=45)
|
|
# ax.set_xlabel("")
|
|
plt.tight_layout()
|
|
plt.show()
|
|
```
|
|
|
|
Avg num of citations:
|
|
|
|
```{python}
|
|
bib_df["Cited"] = bib_df["Cited"].astype("int")
|
|
grpd = bib_df.groupby(["Year"], as_index=False)["Cited"].mean()
|
|
ax = sns.barplot(grpd, x="Year", y="Cited")
|
|
ax.tick_params(axis='x', rotation=45)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
```
|