feat(code): Add examples of list handling notebook
Extracts interventions/inequalities and explodes them for value counts.
This commit is contained in:
parent
85497854c1
commit
b5e467e016
1 changed files with 99 additions and 0 deletions
|
@ -21,3 +21,102 @@ Get inequalities:
|
||||||
```{python}
|
```{python}
|
||||||
df['inequality'].str.split(";").explode().str.strip().value_counts()
|
df['inequality'].str.split(";").explode().str.strip().value_counts()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
df.groupby(["author", "year", "title"]).first().join(df['intervention'])
|
||||||
|
```
|
||||||
|
|
||||||
|
Unique values in chain method:
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
(
|
||||||
|
df.groupby(["author", "year", "title"])
|
||||||
|
.agg(
|
||||||
|
{
|
||||||
|
"intervention": lambda _col:"; ".join(_col),
|
||||||
|
"inequality": lambda _col:"; ".join(_col),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.drop_duplicates()
|
||||||
|
.explode("inequality")
|
||||||
|
["inequality"].str.strip()
|
||||||
|
.value_counts()
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Merge dataset so it is collected by *STUDY* not by *OBSERVATION*.
|
||||||
|
Any required columns can be calculated similar to the agg function here.
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
by_study = (
|
||||||
|
df.groupby(["author", "year", "title"])
|
||||||
|
.agg(
|
||||||
|
{
|
||||||
|
"intervention": lambda _col: "; ".join(_col),
|
||||||
|
"inequality": lambda _col: "; ".join(_col),
|
||||||
|
"date": lambda _col: "; ".join(_col),
|
||||||
|
"findings": lambda _col: "; ".join(_col),
|
||||||
|
# "region": lambda _col: "; ".join(_col), # only accessible when merging with WB data
|
||||||
|
# "income_group": lambda _col: "; ".join(_col),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.reset_index()
|
||||||
|
.drop_duplicates()
|
||||||
|
.assign(
|
||||||
|
# create de-duplicated joins for all observations
|
||||||
|
intervention=lambda _df: _df["intervention"].apply(
|
||||||
|
lambda _cell: set([x.strip() for x in _cell.split(";")])
|
||||||
|
),
|
||||||
|
inequality=lambda _df: _df["inequality"].apply(
|
||||||
|
lambda _cell: set([x.strip() for x in _cell.split(";")])
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
by_study = (
|
||||||
|
df.groupby(["author", "year", "title"])
|
||||||
|
.first()
|
||||||
|
.reset_index()
|
||||||
|
.drop_duplicates()
|
||||||
|
.assign(
|
||||||
|
# create de-duplicated joins for all observations
|
||||||
|
intervention=lambda _df: _df["intervention"].apply(
|
||||||
|
lambda _cell: set([x.strip() for x in _cell.split(";")])
|
||||||
|
),
|
||||||
|
inequality=lambda _df: _df["inequality"].apply(
|
||||||
|
lambda _cell: set([x.strip() for x in _cell.split(";")])
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{python}
|
||||||
|
import re
|
||||||
|
by_intervention = (
|
||||||
|
df.groupby(["author", "year", "title"])
|
||||||
|
.agg(
|
||||||
|
{
|
||||||
|
"intervention": lambda _col: "; ".join(_col),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.reset_index()
|
||||||
|
.drop_duplicates()
|
||||||
|
.assign(
|
||||||
|
intervention=lambda _df: _df["intervention"].apply(
|
||||||
|
lambda _cell: set([x.strip() for x in re.sub(r"\(.*\)", "", _cell).split(";")])
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.explode("intervention")
|
||||||
|
)
|
||||||
|
sort_order = by_intervention["intervention"].value_counts().index
|
||||||
|
|
||||||
|
fig = plt.figure()
|
||||||
|
fig.set_size_inches(6, 3)
|
||||||
|
ax = sns.countplot(by_intervention, x="intervention", order=by_intervention["intervention"].value_counts().index)
|
||||||
|
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
|
||||||
|
rotation_mode="anchor")
|
||||||
|
plt.show()
|
||||||
|
by_intervention = None
|
||||||
|
```
|
||||||
|
|
Loading…
Reference in a new issue