chore(script): Lowercase all df columns

In preparation for the processed sample renamed all columns to their lowercase versions.
2023-12-07 20:40:54 +01:00 · 2023-12-07 20:40:54 +01:00 · 76ff71765c
commit 76ff71765c
parent d88c733b6d
1 changed files with 19 additions and 19 deletions
--- a/scoping_review.qmd
+++ b/scoping_review.qmd
@ -477,13 +477,13 @@ for e in sample_relevant:
                        ])

 # FIXME do not just drop missing values
-bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage", "Keywords"])
+bib_df = pd.DataFrame(reformatted, columns = ["year", "author", "title", "type", "cited", "usage", "keywords"])
 bib_df = bib_df.dropna(how="any")
-bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="mixed")
-bib_df["Year"] = bib_df["Date"].dt.year
+bib_df["date"] = pd.to_datetime(bib_df["year"], format="mixed")
+bib_df["year"] = bib_df["date"].dt.year

 # only keep newer entries
-bib_df = bib_df[bib_df["Year"] >= 2000]
+bib_df = bib_df[bib_df["year"] >= 2000]

 # Add WB country grouping definitions (income group, world region)
 # TODO Re-enable for processed study pool
@ -498,12 +498,12 @@ bib_df = bib_df[bib_df["Year"] >= 2000]
 #| fig-cap: Publications per year

 # create dummy category for white or gray lit type (based on 'article' appearing in type)
-bib_df["Type"].value_counts()
-bib_df["Literature"] = np.where(bib_df["Type"].str.contains("article", case=False, regex=False), "white", "gray")
-bib_df["Literature"] = bib_df["Literature"].astype("category")
+bib_df["type"].value_counts()
+bib_df["literature"] = np.where(bib_df["type"].str.contains("article", case=False, regex=False), "white", "gray")
+bib_df["literature"] = bib_df["literature"].astype("category")

 # plot by year, distinguished by literature type
-ax = sns.countplot(bib_df, x="Year", hue="Literature")
+ax = sns.countplot(bib_df, x="year", hue="literature")
 ax.tick_params(axis='x', rotation=45)
 # ax.set_xlabel("")
 plt.tight_layout()
@ -525,9 +525,9 @@ First, in general, citation counts are slightly decreasing - as should generally
 ```{python}
 #| label: fig-citations-per-year-avg
 #| fig-cap: Average citations per year
-bib_df["Cited"] = bib_df["Cited"].astype("int")
-grpd = bib_df.groupby(["Year"], as_index=False)["Cited"].mean()
-ax = sns.barplot(grpd, x="Year", y="Cited")
+bib_df["cited"] = bib_df["cited"].astype("int")
+grpd = bib_df.groupby(["year"], as_index=False)["cited"].mean()
+ax = sns.barplot(grpd, x="year", y="cited")
 ax.tick_params(axis='x', rotation=45)
 plt.tight_layout()
 plt.show()
@ -555,17 +555,17 @@ Should they point towards gaps (or over-optimization) of sepcific areas of inter
 #| column: page

 interv_type_df = (
-    bib_df["Keywords"]
+    bib_df["keywords"]
    .str.replace(r"\_", " ")
    .str.extractall(r"type::([\w ]+)")
    .reset_index(drop=True)
-    .rename(columns = {0:"Intervention type"})
+    .rename(columns = {0:"intervention type"})
 )

-sort_order = interv_type_df["Intervention type"].value_counts(ascending=False).index
+sort_order = interv_type_df["intervention type"].value_counts(ascending=False).index
 fig = plt.figure()
 fig.set_size_inches(12, 4)
-ax = sns.countplot(interv_type_df, x="Intervention type", order=sort_order)
+ax = sns.countplot(interv_type_df, x="intervention type", order=sort_order)
 plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
 plt.show()
@ -579,17 +579,17 @@ plt.show()
 #| column: page

 inequ_type_df = (
-    bib_df["Keywords"]
+    bib_df["keywords"]
    .str.replace(r"\_", " ")
    .str.extractall(r"inequality::([\w ]+)")
    .reset_index(drop=True)
-    .rename(columns = {0:"Inequality type"})
+    .rename(columns = {0:"inequality type"})
 )

-sort_order = inequ_type_df["Inequality type"].value_counts(ascending=False).index
+sort_order = inequ_type_df["inequality type"].value_counts(ascending=False).index
 fig = plt.figure()
 fig.set_size_inches(12, 4)
-ax = sns.countplot(inequ_type_df, x="Inequality type", order=sort_order)
+ax = sns.countplot(inequ_type_df, x="inequality type", order=sort_order)
 plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
 plt.show()