diff --git a/README.md b/README.md
index e7c6e20..2ef42d2 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Any action can easily be started using [`just`](https://github.com/casey/just) w
 ## Dataset structure
 
 - All inputs (i.e. building blocks from other sources) are located in `input/`.
-- All custom code is located in `code/`.
+- All custom code is located in `src/`.
 - All final output data is located in `output/`
 
 ## Output data structure
@@ -51,7 +51,7 @@ Contained in `packages.csv`, 4 columns:
 
 Represents information about the unique system installations represented in the raw dataset.
 
-Contained in `packages.csv`, 2 columns:
+Contained in `unique_installs.csv`, 2 columns:
 
 - `date`: the date a specific file is relevant for
 - `unique`: the amount of unique installations counted on the observation date
diff --git a/justfile b/justfile
index ba1d05a..dca1e7f 100644
--- a/justfile
+++ b/justfile
@@ -3,16 +3,16 @@ default: versioned
 all: files kernels unique packages
 
 files:
-    python code/files.py input output
+    python src/files.py input output
 
 kernels:
-    python code/kernels.py input output
+    python src/kernels.py input output
 
 unique:
-    python code/unique.py input output
+    python src/unique.py input output
 
 packages:
-    python code/packages.py input output
+    python src/packages.py input output
 
 versioned:
     datalad run -m "Create updated output data" -i input/ -o output/ just all
diff --git a/code/.gitattributes b/src/.gitattributes
similarity index 100%
rename from code/.gitattributes
rename to src/.gitattributes
diff --git a/code/README.md b/src/README.md
similarity index 85%
rename from code/README.md
rename to src/README.md
index b1f9126..24aa885 100644
--- a/code/README.md
+++ b/src/README.md
@@ -1,6 +1,6 @@
 # Popcorn dataset code
 
-Each script can be run stand-alone like `python code/files.py <input-dir> <output-dir>`,
+Each script can be run stand-alone like `python src/files.py <input-dir> <output-dir>`,
 exchanging the script file for the one intended.
 
 It is suggested, however, to run the scripts using the `just` command runner from the
diff --git a/code/files.py b/src/files.py
similarity index 100%
rename from code/files.py
rename to src/files.py
diff --git a/code/kernels.py b/src/kernels.py
similarity index 100%
rename from code/kernels.py
rename to src/kernels.py
diff --git a/code/packages.py b/src/packages.py
similarity index 100%
rename from code/packages.py
rename to src/packages.py
diff --git a/src/tests/__init__.py b/src/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/tests/test_validate_date_col.py b/src/tests/test_validate_date_col.py
new file mode 100644
index 0000000..e36e445
--- /dev/null
+++ b/src/tests/test_validate_date_col.py
@@ -0,0 +1,10 @@
+import dataframely as dy
+import polars as pl
+
+
+class DateSchema(dy.Schema):
+    date: dy.Date = dy.Date(nullable=False)
+
+    @dy.rule()
+    def minimum_starting_date() -> pl.Expr:
+        return pl.col("date") > pl.date(2018, 5, 8)
diff --git a/src/tests/test_validate_files.py b/src/tests/test_validate_files.py
new file mode 100644
index 0000000..c232ba3
--- /dev/null
+++ b/src/tests/test_validate_files.py
@@ -0,0 +1,24 @@
+import dataframely as dy
+import polars as pl
+
+from tests.test_validate_date_col import DateSchema
+
+
+class FilesSchema(DateSchema):
+    filename: dy.String = dy.String(nullable=False)
+    mtime: dy.Float = dy.Float(nullable=False)
+    filesize: dy.Integer = dy.Integer(nullable=False)
+
+
+def test_files_schema():
+    _ = FilesSchema.validate(
+        pl.scan_csv(
+            "output/files.csv",
+            schema={
+                "date": pl.Date,
+                "filename": pl.String,
+                "mtime": pl.Float32,
+                "filesize": pl.UInt32,
+            },
+        ).collect(engine="streaming")
+    )
diff --git a/src/tests/test_validate_kernels.py b/src/tests/test_validate_kernels.py
new file mode 100644
index 0000000..bdbcfa1
--- /dev/null
+++ b/src/tests/test_validate_kernels.py
@@ -0,0 +1,22 @@
+import dataframely as dy
+import polars as pl
+
+from tests.test_validate_date_col import DateSchema
+
+
+class KernelsSchema(DateSchema):
+    kernel: dy.String = dy.String(nullable=False)
+    downloads: dy.Integer = dy.Integer(nullable=False)
+
+
+def test_kernels_schema():
+    _ = KernelsSchema.validate(
+        pl.scan_csv(
+            "output/kernels.csv",
+            schema={
+                "date": pl.Date,
+                "kernel": pl.String,
+                "downloads": pl.UInt32,
+            },
+        ).collect(engine="streaming")
+    )
diff --git a/src/tests/test_validate_packages.py b/src/tests/test_validate_packages.py
new file mode 100644
index 0000000..d986cd3
--- /dev/null
+++ b/src/tests/test_validate_packages.py
@@ -0,0 +1,24 @@
+import dataframely as dy
+import polars as pl
+
+from tests.test_validate_date_col import DateSchema
+
+
+class PackagesSchema(DateSchema):
+    package: dy.String = dy.String(nullable=False)
+    version: dy.String = dy.String(nullable=False)
+    count: dy.Integer = dy.Integer(nullable=False)
+
+
+def test_packages_schema():
+    _ = PackagesSchema.validate(
+        pl.scan_csv(
+            "output/packages.csv",
+            schema={
+                "date": pl.Date,
+                "package": pl.String,
+                "version": pl.String,
+                "count": pl.UInt16,
+            },
+        ).collect(engine="streaming")
+    )
diff --git a/src/tests/test_validate_unique_installs.py b/src/tests/test_validate_unique_installs.py
new file mode 100644
index 0000000..109ccb1
--- /dev/null
+++ b/src/tests/test_validate_unique_installs.py
@@ -0,0 +1,24 @@
+import dataframely as dy
+import polars as pl
+
+from tests.test_validate_date_col import DateSchema
+
+
+class UniquesSchema(DateSchema):
+    unique: dy.Integer = dy.Integer(nullable=False)
+
+    @dy.rule()
+    def cannot_be_zero() -> pl.Expr:
+        return pl.col("unique") > 0
+
+
+def test_uniques_schema():
+    _ = UniquesSchema.validate(
+        pl.scan_csv(
+            "output/unique_installs.csv",
+            schema={
+                "date": pl.Date,
+                "unique": pl.UInt16,
+            },
+        ).collect(engine="streaming")
+    )
diff --git a/code/unique.py b/src/unique.py
similarity index 100%
rename from code/unique.py
rename to src/unique.py