afd/data-prep.qmd

```{python}
#| echo: false
import matplotlib.pyplot as plt
def prepare_plot_colors():
    # "Tableau 20" colors as RGB.
    colors = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
                 (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
                 (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
                 (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
                 (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]

    # Scale RGB values to the [0, 1] range for matplotlib
    for i in range(len(colors)):
        r, g, b = colors[i]
        colors[i] = (r / 255., g / 255., b / 255.)
    return colors

colors=prepare_plot_colors()
```

```{python}
#| echo: false
import openpyxl
import pandas as pd

df = pd.read_csv('data/cleaned/UNU-WIDER-WIID/WIID-30JUN2022_cty-select.csv', index_col="id", parse_dates=True)
```

```{python}
#| echo: false
df = df.loc[df['year'] > 1990]
ben = df.loc[df['c3'] == "BEN"]
dji = df.loc[df['c3'] == "DJI"]
uga = df.loc[df['c3'] == "UGA"]
vnm = df.loc[df['c3'] == "VNM"]
```

```{python}
from IPython.display import Markdown
from tabulate import tabulate
```

```{python}
# Set up the data extraction and figure drawing functions
import plotly.express as px
import plotly.io as pio

def gini_plot(country_df):
    if svg_render:
        pio.renderers.default = "png"

    fig = px.line(country_df, x="year", y="gini", markers=True, labels={"year": "Year", "gini": "Gini coefficient"}, template="seaborn", range_y=[0,100])
    fig.update_traces(marker_size=10)
    fig.show()

def plot_consumption_gini_percapita(country_df):
    gni_cnsmpt = country_df[country_df['resource'].str.contains("Consumption")]
    gni_cnsmpt_percapita = gni_cnsmpt[gni_cnsmpt['scale'].str.contains("Per capita")]
    gini_plot(gni_cnsmpt_percapita)

def plot_consumption_gini_percapita_ruralurban(country_df):
    gni_cnsmpt = country_df[country_df['resource'].str.contains("Consumption")]
    gni_cnsmpt = gni_cnsmpt[gni_cnsmpt['scale'].str.contains("Per capita")]
    gni_cnsmpt = gni_cnsmpt[gni_cnsmpt['source'].str.contains("World Bank")]
    gni_cnsmpt = gni_cnsmpt[gni_cnsmpt['areacovr'].str.contains("All")]
    gini_plot(gni_cnsmpt)
```

```{python}
## Set up functions to grab development aids by type of donating body
## ODA donor type map, see DAC code sheet xlsx
donortypes = {
    1: 'dac',
    2: 'dac',
    3: 'dac',
    4: 'dac',
    5: 'dac',
    6: 'dac',
    7: 'dac',
    8: 'dac',
    9: 'dac',
    10: 'dac',
    11: 'dac',
    12: 'dac',
    18: 'dac',
    20: 'dac',
    21: 'dac',
    22: 'dac',
    40: 'dac',
    50: 'dac',
    61: 'dac',
    68: 'dac',
    69: 'dac',
    75: 'dac',
    76: 'dac',
    301: 'dac',
    302: 'dac',
    701: 'dac',
    742: 'dac',
    801: 'dac',
    820: 'dac',
    918: 'dac',
    104: 'mlt',
    807: 'mlt',
    811: 'mlt',
    812: 'mlt',
    901: 'mlt',
    902: 'mlt',
    903: 'mlt',
    905: 'mlt',
    906: 'mlt',
    907: 'mlt',
    909: 'mlt',
    913: 'mlt',
    914: 'mlt',
    915: 'mlt',
    921: 'mlt',
    923: 'mlt',
    926: 'mlt',
    928: 'mlt',
    932: 'mlt',
    940: 'mlt',
    944: 'mlt',
    948: 'mlt',
    951: 'mlt',
    952: 'mlt',
    953: 'mlt',
    954: 'mlt',
    956: 'mlt',
    958: 'mlt',
    959: 'mlt',
    960: 'mlt',
    963: 'mlt',
    964: 'mlt',
    966: 'mlt',
    967: 'mlt',
    971: 'mlt',
    974: 'mlt',
    976: 'mlt',
    978: 'mlt',
    979: 'mlt',
    980: 'mlt',
    981: 'mlt',
    982: 'mlt',
    983: 'mlt',
    988: 'mlt',
    990: 'mlt',
    992: 'mlt',
    997: 'mlt',
    1011: 'mlt',
    1012: 'mlt',
    1013: 'mlt',
    1014: 'mlt',
    1015: 'mlt',
    1016: 'mlt',
    1017: 'mlt',
    1018: 'mlt',
    1019: 'mlt',
    1020: 'mlt',
    1023: 'mlt',
    1024: 'mlt',
    1025: 'mlt',
    1037: 'mlt',
    1038: 'mlt',
    1311: 'mlt',
    1312: 'mlt',
    1313: 'mlt',
    30: 'nondac',
    45: 'nondac',
    55: 'nondac',
    62: 'nondac',
    70: 'nondac',
    72: 'nondac',
    77: 'nondac',
    82: 'nondac',
    83: 'nondac',
    84: 'nondac',
    87: 'nondac',
    130: 'nondac',
    133: 'nondac',
    358: 'nondac',
    543: 'nondac',
    546: 'nondac',
    552: 'nondac',
    561: 'nondac',
    566: 'nondac',
    576: 'nondac',
    611: 'nondac',
    613: 'nondac',
    732: 'nondac',
    764: 'nondac',
    765: 'nondac',
    1601: 'private',
    1602: 'private',
    1603: 'private',
    1604: 'private',
    1605: 'private',
    1606: 'private',
    1607: 'private',
    1608: 'private',
    1609: 'private',
    1610: 'private',
    1611: 'private',
    1612: 'private',
    1613: 'private',
    1614: 'private',
    1615: 'private',
    1616: 'private',
    1617: 'private',
    1618: 'private',
    1619: 'private',
    1620: 'private',
    1621: 'private',
    1622: 'private',
    1623: 'private',
    1624: 'private',
    1625: 'private',
    1626: 'private',
    1627: 'private',
    1628: 'private',
    1629: 'private',
    1630: 'private',
    1631: 'private',
    1632: 'private',
    1633: 'private',
    1634: 'private',
    1635: 'private',
    1636: 'private',
    1637: 'private',
    1638: 'private',
    1639: 'private',
}

def totals_by_donortype(oda_frame):
    totals = oda_frame.loc[
        (df['RECIPIENT'] == 236) &
        (df['SECTOR'] == 1000) &
        (df['FLOW'] == 100) &
        (df['CHANNEL'] == 100) &
        (df['AMOUNTTYPE'] == 'D') &
        (df['FLOWTYPE'] == 112) &
        (df['AIDTYPE'] == "100") # contains mixed int and string representations
        ]
    donortotals = totals.copy()
    donortotals["Donortype"] = donortotals["DONOR"].map(donortypes)
    return donortotals
```