Prepare data ingestion pipeline

Add pandas and openpyxl to ingest excel data and process it quickly in
pandas.
This commit is contained in:
Marty Oehme 2022-08-19 13:58:57 +02:00
parent d021bd670f
commit 4eb91f37ce
Signed by: Marty
GPG key ID: B7538B8F50A1C800
3 changed files with 54 additions and 25 deletions

View file

@ -1,9 +1,10 @@
---
project: project:
type: default
title: "Drivers of Inequality" title: "Drivers of Inequality"
output-dir: outputs output-dir: outputs
render: render:
- drivers-of-inequality*.qmd - drivers-of-inequality*.qmd
- index.qmd
format: format:
html: html:
@ -33,4 +34,3 @@ date: \today
bibliography: bibliography:
- inputs/libraries/afd-dev-research.bib - inputs/libraries/afd-dev-research.bib
csl: /home/marty/documents/library/academia/styles/APA-7.csl csl: /home/marty/documents/library/academia/styles/APA-7.csl

29
poetry.lock generated
View file

@ -163,6 +163,14 @@ category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
[[package]]
name = "et-xmlfile"
version = "1.1.0"
description = "An implementation of lxml.xmlfile for the standard library"
category = "main"
optional = false
python-versions = ">=3.6"
[[package]] [[package]]
name = "executing" name = "executing"
version = "0.10.0" version = "0.10.0"
@ -603,6 +611,17 @@ category = "main"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
[[package]]
name = "openpyxl"
version = "3.0.10"
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
et-xmlfile = "*"
[[package]] [[package]]
name = "packaging" name = "packaging"
version = "21.3" version = "21.3"
@ -1092,7 +1111,7 @@ python-versions = ">=3.7"
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "e18ea462c49d06ea2b5a4c987b25549a7b2cce10611372cf6a12511a5437a817" content-hash = "976940804a89144dddddbd92fef1b4e7c93c4329d8c6e8d7255b8b4956dc8815"
[metadata.files] [metadata.files]
appnope = [ appnope = [
@ -1252,6 +1271,10 @@ entrypoints = [
{file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"}, {file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"},
{file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"}, {file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"},
] ]
et-xmlfile = [
{file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
{file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
]
executing = [ executing = [
{file = "executing-0.10.0-py2.py3-none-any.whl", hash = "sha256:9c745f80cda11eb22b62cbecf21156491a794eb56ab06f9d286a44e62822b24e"}, {file = "executing-0.10.0-py2.py3-none-any.whl", hash = "sha256:9c745f80cda11eb22b62cbecf21156491a794eb56ab06f9d286a44e62822b24e"},
{file = "executing-0.10.0.tar.gz", hash = "sha256:d1cd87c2e371e9966261410c5b3769d6df2f9e4a79a83eebd2662dd3388f9833"}, {file = "executing-0.10.0.tar.gz", hash = "sha256:d1cd87c2e371e9966261410c5b3769d6df2f9e4a79a83eebd2662dd3388f9833"},
@ -1571,6 +1594,10 @@ numpy = [
{file = "numpy-1.23.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:be6b350dfbc7f708d9d853663772a9310783ea58f6035eec649fb9c4371b5389"}, {file = "numpy-1.23.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:be6b350dfbc7f708d9d853663772a9310783ea58f6035eec649fb9c4371b5389"},
{file = "numpy-1.23.2.tar.gz", hash = "sha256:b78d00e48261fbbd04aa0d7427cf78d18401ee0abd89c7559bbf422e5b1c7d01"}, {file = "numpy-1.23.2.tar.gz", hash = "sha256:b78d00e48261fbbd04aa0d7427cf78d18401ee0abd89c7559bbf422e5b1c7d01"},
] ]
openpyxl = [
{file = "openpyxl-3.0.10-py2.py3-none-any.whl", hash = "sha256:0ab6d25d01799f97a9464630abacbb34aafecdcaa0ef3cba6d6b3499867d0355"},
{file = "openpyxl-3.0.10.tar.gz", hash = "sha256:e47805627aebcf860edb4edf7987b1309c1b3632f3750538ed962bbcc3bd7449"},
]
packaging = [ packaging = [
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},

View file

@ -1,5 +1,5 @@
[tool.poetry] [tool.poetry]
name = "quarto-test" name = "afd-analysis"
version = "0.1.0" version = "0.1.0"
description = "" description = ""
authors = ["Marty Oehme <marty.oehme@gmail.com>"] authors = ["Marty Oehme <marty.oehme@gmail.com>"]
@ -9,6 +9,8 @@ python = "^3.10"
matplotlib = "^3.5.3" matplotlib = "^3.5.3"
plotly-express = "^0.4.1" plotly-express = "^0.4.1"
jupyter = "^1.0.0" jupyter = "^1.0.0"
pandas = "^1.4.3"
openpyxl = "^3.0.10"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]