From 76ff71765cae8906e0f2d022af64d7d4d58cfc92 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Thu, 7 Dec 2023 20:40:54 +0100 Subject: [PATCH] chore(script): Lowercase all df columns In preparation for the processed sample renamed all columns to their lowercase versions. --- scoping_review.qmd | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/scoping_review.qmd b/scoping_review.qmd index 9ce1858..b491ba2 100644 --- a/scoping_review.qmd +++ b/scoping_review.qmd @@ -477,13 +477,13 @@ for e in sample_relevant: ]) # FIXME do not just drop missing values -bib_df = pd.DataFrame(reformatted, columns = ["Year", "Author", "Title", "Type", "Cited", "Usage", "Keywords"]) +bib_df = pd.DataFrame(reformatted, columns = ["year", "author", "title", "type", "cited", "usage", "keywords"]) bib_df = bib_df.dropna(how="any") -bib_df["Date"] = pd.to_datetime(bib_df["Year"], format="mixed") -bib_df["Year"] = bib_df["Date"].dt.year +bib_df["date"] = pd.to_datetime(bib_df["year"], format="mixed") +bib_df["year"] = bib_df["date"].dt.year # only keep newer entries -bib_df = bib_df[bib_df["Year"] >= 2000] +bib_df = bib_df[bib_df["year"] >= 2000] # Add WB country grouping definitions (income group, world region) # TODO Re-enable for processed study pool @@ -498,12 +498,12 @@ bib_df = bib_df[bib_df["Year"] >= 2000] #| fig-cap: Publications per year # create dummy category for white or gray lit type (based on 'article' appearing in type) -bib_df["Type"].value_counts() -bib_df["Literature"] = np.where(bib_df["Type"].str.contains("article", case=False, regex=False), "white", "gray") -bib_df["Literature"] = bib_df["Literature"].astype("category") +bib_df["type"].value_counts() +bib_df["literature"] = np.where(bib_df["type"].str.contains("article", case=False, regex=False), "white", "gray") +bib_df["literature"] = bib_df["literature"].astype("category") # plot by year, distinguished by literature type -ax = sns.countplot(bib_df, x="Year", hue="Literature") +ax = sns.countplot(bib_df, x="year", hue="literature") ax.tick_params(axis='x', rotation=45) # ax.set_xlabel("") plt.tight_layout() @@ -525,9 +525,9 @@ First, in general, citation counts are slightly decreasing - as should generally ```{python} #| label: fig-citations-per-year-avg #| fig-cap: Average citations per year -bib_df["Cited"] = bib_df["Cited"].astype("int") -grpd = bib_df.groupby(["Year"], as_index=False)["Cited"].mean() -ax = sns.barplot(grpd, x="Year", y="Cited") +bib_df["cited"] = bib_df["cited"].astype("int") +grpd = bib_df.groupby(["year"], as_index=False)["cited"].mean() +ax = sns.barplot(grpd, x="year", y="cited") ax.tick_params(axis='x', rotation=45) plt.tight_layout() plt.show() @@ -555,17 +555,17 @@ Should they point towards gaps (or over-optimization) of sepcific areas of inter #| column: page interv_type_df = ( - bib_df["Keywords"] + bib_df["keywords"] .str.replace(r"\_", " ") .str.extractall(r"type::([\w ]+)") .reset_index(drop=True) - .rename(columns = {0:"Intervention type"}) + .rename(columns = {0:"intervention type"}) ) -sort_order = interv_type_df["Intervention type"].value_counts(ascending=False).index +sort_order = interv_type_df["intervention type"].value_counts(ascending=False).index fig = plt.figure() fig.set_size_inches(12, 4) -ax = sns.countplot(interv_type_df, x="Intervention type", order=sort_order) +ax = sns.countplot(interv_type_df, x="intervention type", order=sort_order) plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") plt.show() @@ -579,17 +579,17 @@ plt.show() #| column: page inequ_type_df = ( - bib_df["Keywords"] + bib_df["keywords"] .str.replace(r"\_", " ") .str.extractall(r"inequality::([\w ]+)") .reset_index(drop=True) - .rename(columns = {0:"Inequality type"}) + .rename(columns = {0:"inequality type"}) ) -sort_order = inequ_type_df["Inequality type"].value_counts(ascending=False).index +sort_order = inequ_type_df["inequality type"].value_counts(ascending=False).index fig = plt.figure() fig.set_size_inches(12, 4) -ax = sns.countplot(inequ_type_df, x="Inequality type", order=sort_order) +ax = sns.countplot(inequ_type_df, x="inequality type", order=sort_order) plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") plt.show()