From 4eb91f37ce25e444e4a6daa1ec6712b9e8d89715 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Fri, 19 Aug 2022 13:58:57 +0200 Subject: [PATCH] Prepare data ingestion pipeline Add pandas and openpyxl to ingest excel data and process it quickly in pandas. --- _quarto.yml | 46 +++++++++++++++++++++++----------------------- poetry.lock | 29 ++++++++++++++++++++++++++++- pyproject.toml | 4 +++- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 7fde91b..cf9b29e 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -1,36 +1,36 @@ ---- project: + type: default title: "Drivers of Inequality" output-dir: outputs render: - drivers-of-inequality*.qmd + - index.qmd format: - html: - toc: true - code-fold: true - code-tools: true - title-block-banner: true - docx: - reference-doc: tools/justified.docx - pdf: - papersize: A4 - geometry: - - left=2cm - - right=2.5cm - - top=2.5cm - - bottom=2.5cm - indent: true - linestretch: 1.25 - fontfamily: lmodern - fontsize: "12" - # toc: true + html: + toc: true + code-fold: true + code-tools: true + title-block-banner: true + docx: + reference-doc: tools/justified.docx + pdf: + papersize: A4 + geometry: + - left=2cm + - right=2.5cm + - top=2.5cm + - bottom=2.5cm + indent: true + linestretch: 1.25 + fontfamily: lmodern + fontsize: "12" + # toc: true jupyter: python3 title: "Drivers of Inequality" author: - - Marty Oehme + - Marty Oehme date: \today bibliography: - - inputs/libraries/afd-dev-research.bib + - inputs/libraries/afd-dev-research.bib csl: /home/marty/documents/library/academia/styles/APA-7.csl - diff --git a/poetry.lock b/poetry.lock index 99dfa55..5946c52 100644 --- a/poetry.lock +++ b/poetry.lock @@ -163,6 +163,14 @@ category = "main" optional = false python-versions = ">=3.6" +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "executing" version = "0.10.0" @@ -603,6 +611,17 @@ category = "main" optional = false python-versions = ">=3.8" +[[package]] +name = "openpyxl" +version = "3.0.10" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "21.3" @@ -1092,7 +1111,7 @@ python-versions = ">=3.7" [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "e18ea462c49d06ea2b5a4c987b25549a7b2cce10611372cf6a12511a5437a817" +content-hash = "976940804a89144dddddbd92fef1b4e7c93c4329d8c6e8d7255b8b4956dc8815" [metadata.files] appnope = [ @@ -1252,6 +1271,10 @@ entrypoints = [ {file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"}, {file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"}, ] +et-xmlfile = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] executing = [ {file = "executing-0.10.0-py2.py3-none-any.whl", hash = "sha256:9c745f80cda11eb22b62cbecf21156491a794eb56ab06f9d286a44e62822b24e"}, {file = "executing-0.10.0.tar.gz", hash = "sha256:d1cd87c2e371e9966261410c5b3769d6df2f9e4a79a83eebd2662dd3388f9833"}, @@ -1571,6 +1594,10 @@ numpy = [ {file = "numpy-1.23.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:be6b350dfbc7f708d9d853663772a9310783ea58f6035eec649fb9c4371b5389"}, {file = "numpy-1.23.2.tar.gz", hash = "sha256:b78d00e48261fbbd04aa0d7427cf78d18401ee0abd89c7559bbf422e5b1c7d01"}, ] +openpyxl = [ + {file = "openpyxl-3.0.10-py2.py3-none-any.whl", hash = "sha256:0ab6d25d01799f97a9464630abacbb34aafecdcaa0ef3cba6d6b3499867d0355"}, + {file = "openpyxl-3.0.10.tar.gz", hash = "sha256:e47805627aebcf860edb4edf7987b1309c1b3632f3750538ed962bbcc3bd7449"}, +] packaging = [ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, diff --git a/pyproject.toml b/pyproject.toml index 7e68304..488e713 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "quarto-test" +name = "afd-analysis" version = "0.1.0" description = "" authors = ["Marty Oehme "] @@ -9,6 +9,8 @@ python = "^3.10" matplotlib = "^3.5.3" plotly-express = "^0.4.1" jupyter = "^1.0.0" +pandas = "^1.4.3" +openpyxl = "^3.0.10" [tool.poetry.dev-dependencies]