afd-development-contexts/data-prep.qmd

243 lines
5.8 KiB
Text
Raw Normal View History

2022-08-20 14:44:25 +00:00
```{python}
#| echo: false
import matplotlib.pyplot as plt
def prepare_plot_colors():
# "Tableau 20" colors as RGB.
colors = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
(44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
(148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
(227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
(188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
# Scale RGB values to the [0, 1] range for matplotlib
for i in range(len(colors)):
r, g, b = colors[i]
colors[i] = (r / 255., g / 255., b / 255.)
return colors
colors=prepare_plot_colors()
```
```{python}
#| echo: false
import openpyxl
import pandas as pd
df = pd.read_csv('data/cleaned/UNU-WIDER-WIID/WIID-30JUN2022_cty-select.csv', index_col="id", parse_dates=True)
```
```{python}
#| echo: false
df = df.loc[df['year'] > 1990]
2022-08-20 14:44:25 +00:00
ben = df.loc[df['c3'] == "BEN"]
dji = df.loc[df['c3'] == "DJI"]
uga = df.loc[df['c3'] == "UGA"]
vnm = df.loc[df['c3'] == "VNM"]
```
```{python}
# Set up the data extraction and figure drawing functions
import plotly.express as px
import plotly.io as pio
def gini_plot(country_df):
if svg_render:
pio.renderers.default = "png"
2022-09-02 08:02:45 +00:00
fig = px.line(country_df, x="year", y="gini", markers=True, labels={"year": "Year", "gini": "Gini coefficient"}, template="seaborn", range_y=[0,100])
2022-08-20 14:44:25 +00:00
fig.update_traces(marker_size=10)
fig.show()
def plot_consumption_gini_percapita(country_df):
gni_cnsmpt = country_df[country_df['resource'].str.contains("Consumption")]
gni_cnsmpt_percapita = gni_cnsmpt[gni_cnsmpt['scale'].str.contains("Per capita")]
gini_plot(gni_cnsmpt_percapita)
def plot_consumption_gini_percapita_ruralurban(country_df):
gni_cnsmpt = country_df[country_df['resource'].str.contains("Consumption")]
gni_cnsmpt = gni_cnsmpt[gni_cnsmpt['scale'].str.contains("Per capita")]
gni_cnsmpt = gni_cnsmpt[gni_cnsmpt['source'].str.contains("World Bank")]
gni_cnsmpt = gni_cnsmpt[gni_cnsmpt['areacovr'].str.contains("All")]
gini_plot(gni_cnsmpt)
```
```{python}
## Set up functions to grab development aids by type of donating body
## ODA donor type map, see DAC code sheet xlsx
donortypes = {
1: 'dac',
2: 'dac',
3: 'dac',
4: 'dac',
5: 'dac',
6: 'dac',
7: 'dac',
8: 'dac',
9: 'dac',
10: 'dac',
11: 'dac',
12: 'dac',
18: 'dac',
20: 'dac',
21: 'dac',
22: 'dac',
40: 'dac',
50: 'dac',
61: 'dac',
68: 'dac',
69: 'dac',
75: 'dac',
76: 'dac',
301: 'dac',
302: 'dac',
701: 'dac',
742: 'dac',
801: 'dac',
820: 'dac',
918: 'dac',
104: 'multilat',
807: 'multilat',
811: 'multilat',
812: 'multilat',
901: 'multilat',
902: 'multilat',
903: 'multilat',
905: 'multilat',
906: 'multilat',
907: 'multilat',
909: 'multilat',
913: 'multilat',
914: 'multilat',
915: 'multilat',
921: 'multilat',
923: 'multilat',
926: 'multilat',
928: 'multilat',
932: 'multilat',
940: 'multilat',
944: 'multilat',
948: 'multilat',
951: 'multilat',
952: 'multilat',
953: 'multilat',
954: 'multilat',
956: 'multilat',
958: 'multilat',
959: 'multilat',
960: 'multilat',
963: 'multilat',
964: 'multilat',
966: 'multilat',
967: 'multilat',
971: 'multilat',
974: 'multilat',
976: 'multilat',
978: 'multilat',
979: 'multilat',
980: 'multilat',
981: 'multilat',
982: 'multilat',
983: 'multilat',
988: 'multilat',
990: 'multilat',
992: 'multilat',
997: 'multilat',
1011: 'multilat',
1012: 'multilat',
1013: 'multilat',
1014: 'multilat',
1015: 'multilat',
1016: 'multilat',
1017: 'multilat',
1018: 'multilat',
1019: 'multilat',
1020: 'multilat',
1023: 'multilat',
1024: 'multilat',
1025: 'multilat',
1037: 'multilat',
1038: 'multilat',
1311: 'multilat',
1312: 'multilat',
1313: 'multilat',
30: 'nondac',
45: 'nondac',
55: 'nondac',
62: 'nondac',
70: 'nondac',
72: 'nondac',
77: 'nondac',
82: 'nondac',
83: 'nondac',
84: 'nondac',
87: 'nondac',
130: 'nondac',
133: 'nondac',
358: 'nondac',
543: 'nondac',
546: 'nondac',
552: 'nondac',
561: 'nondac',
566: 'nondac',
576: 'nondac',
611: 'nondac',
613: 'nondac',
732: 'nondac',
764: 'nondac',
765: 'nondac',
1601: 'private',
1602: 'private',
1603: 'private',
1604: 'private',
1605: 'private',
1606: 'private',
1607: 'private',
1608: 'private',
1609: 'private',
1610: 'private',
1611: 'private',
1612: 'private',
1613: 'private',
1614: 'private',
1615: 'private',
1616: 'private',
1617: 'private',
1618: 'private',
1619: 'private',
1620: 'private',
1621: 'private',
1622: 'private',
1623: 'private',
1624: 'private',
1625: 'private',
1626: 'private',
1627: 'private',
1628: 'private',
1629: 'private',
1630: 'private',
1631: 'private',
1632: 'private',
1633: 'private',
1634: 'private',
1635: 'private',
1636: 'private',
1637: 'private',
1638: 'private',
1639: 'private',
}
def totals_by_donortype(oda_frame):
totals = oda_frame.loc[
(df['RECIPIENT'] == 236) &
(df['SECTOR'] == 1000) &
(df['FLOW'] == 100) &
(df['CHANNEL'] == 100) &
(df['AMOUNTTYPE'] == 'D') &
(df['FLOWTYPE'] == 112) &
(df['AIDTYPE'] == "100") # contains mixed int and string representations
]
donortotals = totals.copy()
donortotals["Donortype"] = donortotals["DONOR"].map(donortypes)
return donortotals
```