Validate CSV output schemas

Also moved code dir to src.
There are reasons to do standard things in standard ways. While it is
possible to get the `code/` directory to work, and recognize it as a
package path, this requires wrangling the pyproject.toml file.
Additionally, any import from the `code.something` path automatically
shadows the python stdlib `code` module. While it may not be necessary,
it still is good to not shadow standard library modules.
This commit is contained in:
Marty Oehme 2025-09-30 22:14:30 +02:00
parent de96b67fac
commit 2faeda87c3
Signed by: Marty
GPG key ID: 4E535BC19C61886E
14 changed files with 111 additions and 7 deletions

View file

@ -0,0 +1,24 @@
import dataframely as dy
import polars as pl
from tests.test_validate_date_col import DateSchema
class FilesSchema(DateSchema):
filename: dy.String = dy.String(nullable=False)
mtime: dy.Float = dy.Float(nullable=False)
filesize: dy.Integer = dy.Integer(nullable=False)
def test_files_schema():
_ = FilesSchema.validate(
pl.scan_csv(
"output/files.csv",
schema={
"date": pl.Date,
"filename": pl.String,
"mtime": pl.Float32,
"filesize": pl.UInt32,
},
).collect(engine="streaming")
)