From 78f0e430facd463158fb29e90d5087848c06d20e Mon Sep 17 00:00:00 2001
From: Divanshu <97019230+sdivyanshu90@users.noreply.github.com>
Date: Wed, 18 Mar 2026 14:43:57 +0000
Subject: [PATCH 1/2] test: use MaxPooling2D.evaluate() directly; remove custom
 pooling logic

---
 tests/e2e_tests/test_neural_net.codon | 145 ++++++++++++++++++++++++--
 1 file changed, 135 insertions(+), 10 deletions(-)

diff --git a/tests/e2e_tests/test_neural_net.codon b/tests/e2e_tests/test_neural_net.codon
index efb81466..c86d1d19 100644
--- a/tests/e2e_tests/test_neural_net.codon
+++ b/tests/e2e_tests/test_neural_net.codon
@@ -1,18 +1,143 @@
-from numpy.create import array, zeros_like
+from sequre.stdlib.learn.neural_net.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten
+from sequre.stdlib.learn.neural_net.loss import dloss
+from sequre import sequre
+from sequre.stdlib.builtin import maximum
 
-from sequre.types.sharetensor import Sharetensor
-from sequre.types.multiparty_union import MPU
-from sequre.utils.testing import assert_eq, assert_eq_approx
-from sequre.utils.utils import random_floats, random_ints
-from sequre.mpc.env import MPCEnv
-from sequre.stdlib.learn.neural_net.model import Sequential
-from sequre.stdlib.learn.neural_net.layers import Input, Dense
+# --- CNN component test: MaxPooling2D on encrypted data ---
+def _test_maxpool2d_secure[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    Uses plain 4D data (4D Sharetensors not yet supported).
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting MaxPool2D component on {alg_struct} ... \n')
+    ndarray_4d = type(zeros((1, 1, 1, 1)))
+
+    # Input shape: (batch = 1, Height = 4, Width = 4, Channels = 1)
+    with mpc.randomness.seed_switch(-1):
+        X = zeros((1, 4, 4, 1))
+        vals = [1.0, 3.0, 2.0, 4.0,
+                5.0, 2.0, 8.0, 1.0,
+                3.0, 7.0, 6.0, 0.0,
+                9.0, 4.0, 2.0, 5.0]
+        for r in range(4):
+            for c in range(4):
+                X[0, r, c, 0] = vals[r * 4 + c]
+
+    pool = MaxPooling2D[ndarray_4d](pool_size = (2, 2), strides = (2, 2))
+    pool.evaluate(mpc, X)
+
+    assert_eq(f"CP{mpc.pid}:\tMaxPool2D output shape on {alg_struct}",
+              list(pool.output.shape), [1, 2, 2, 1])
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,0,0,0] on {alg_struct}",
+                     pool.output[0, 0, 0, 0], 5.0, error=0.001)
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,0,1,0] on {alg_struct}",
+                     pool.output[0, 0, 1, 0], 8.0, error=0.001)
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,1,0,0] on {alg_struct}",
+                     pool.output[0, 1, 0, 0], 9.0, error=0.001)
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,1,1,0] on {alg_struct}",
+                     pool.output[0, 1, 1, 0], 6.0, error=0.001)
+
+    print(f'CP{mpc.pid}:\tMaxPool2D component on {alg_struct} passed.\n')
+
+# --- CNN end-to-end test ---
+def _test_neural_net_cnn[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    End-to-end CNN training test.
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting Sequre CNN layers on {alg_struct} ... \n')
+
+    N = 4
+    H, W, C_in = 8, 8, 1
+    n_filters = 4
+    dense_out = 2
+    step = 0.01
+    momentum = 0.9
+    epochs = 2
+
+    ndarray_4d = type(zeros((1, 1, 1, 1)))
+    ndarray_2d = type(zeros((1, 1)))
+
+    kH, kW = 3, 3
+    conv_out_H = (H - kH) + 1
+    conv_out_W = (W - kW) + 1
+    pool_size = (2, 2)
+    pool_stride = (2, 2)
+    pool_out_H = (conv_out_H - pool_size[0]) // pool_stride[0] + 1
+    pool_out_W = (conv_out_W - pool_size[1]) // pool_stride[1] + 1
+    flat_size = pool_out_H * pool_out_W * n_filters
 
+    with mpc.randomness.seed_switch(-1):
+        X = zeros((N, H, W, C_in))
+        for i in range(N):
+            for j in range(min(H, W)):
+                X[i, j, j, 0] = float(i + 1)
+    y = ones((N, dense_out))
+    for i in range(N // 2):
+        y[i, 0] = -1.0
+
+    inp = Input[ndarray_4d](H * W * C_in)
+    conv = Conv2D[ndarray_4d]("relu", out_channels=n_filters, kernel_size=(kH, kW), stride=(1, 1))
+    conv.initialize(mpc, C_in)
+    pool = MaxPooling2D[ndarray_4d](pool_size=pool_size, strides=pool_stride)
+    flat = Flatten[ndarray_4d, ndarray_2d]()
+    dense = Dense[ndarray_2d]("linear", size=dense_out)
+    dense.initialize(mpc, flat_size)
+
+    w0_sum = conv.weights.reshape((conv.weights.size,)).sum()
+
+    loss_first = 0.0
+    loss_last = 0.0
+    for ep in range(epochs):
+        inp.evaluate(mpc, X)
+        conv.evaluate(mpc, inp.output)
+        pool.evaluate(mpc, conv.output)
+        flat.evaluate(mpc, pool.output)
+        dense.evaluate(mpc, flat.output)
+
+        diff = y - dense.output
+        epoch_loss = (diff * diff).sum() / (2.0 * N)
+        if ep == 0:
+            loss_first = epoch_loss
+        loss_last = epoch_loss
+
+        g = dloss(mpc, y, dense.output, "mean_squared_error")
+        g = dense.derive(mpc, flat.output, g, LAYER_IDX=5)
+        g = flat.derive(mpc, pool.output, g, LAYER_IDX=4)
+        g = pool.derive(mpc, conv.output, g, LAYER_IDX=3)
+        conv.derive(mpc, inp.output, g, LAYER_IDX=1)
+        conv.update(mpc, step, momentum)
+        dense.update(mpc, step, momentum)
+
+    w1_sum = conv.weights.reshape((conv.weights.size,)).sum()
 
-def test_neural_net[TP](mpc: MPCEnv[TP], modulus: TP):
+    assert_eq(f"CP{mpc.pid}:\tCNN Conv2D output shape on {alg_struct}", list(conv.output.shape), [N, conv_out_H, conv_out_W, n_filters])
+    assert_eq(f"CP{mpc.pid}:\tCNN MaxPool2D output shape on {alg_struct}", list(pool.output.shape), [N, pool_out_H, pool_out_W, n_filters])
+    assert_eq(f"CP{mpc.pid}:\tCNN Flatten output shape on {alg_struct}", list(flat.output.shape), [N, flat_size])
+    assert_eq(f"CP{mpc.pid}:\tCNN Dense output shape on {alg_struct}", list(dense.output.shape), [N, dense_out])
+    assert w0_sum != w1_sum, f"CP{mpc.pid}: Conv2D weights unchanged after training"
+    assert loss_last < loss_first, f"CP{mpc.pid}: CNN loss did not drop (first={loss_first}, last={loss_last})"
+    print(f'CP{mpc.pid}:\tCNN loss dropped: {loss_first:.4f} -> {loss_last:.4f}')
+    print(f'CP{mpc.pid}:\tSequre CNN layers on {alg_struct} tests passed.\n')
+
+
+def _test_neural_net_cnn_all[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    Run CNN tests: secure 2D maxpool component + end-to-end CNN workflow.
+    """
     alg_struct = "ring" if modulus.popcnt() == 1 else "field"
-    print(f'CP{mpc.pid}:\tTesting Sequre neural net on {alg_struct} ... \n')
+    print(f'CP{mpc.pid}:\tTesting Sequre CNN on {alg_struct} ... \n')
+
+    _test_maxpool2d_secure(mpc, modulus)
+    _test_neural_net_cnn(mpc, modulus)
 
+    print(f'CP{mpc.pid}:\tSequre CNN on {alg_struct} tests passed.\n')
+
+
+# --- Vanilla Dense neural net test ---
+def _test_neural_net_vanilla[TP](mpc: MPCEnv[TP], modulus: TP):
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting Sequre vanilla neural net on {alg_struct} ... \n')
     input_size = 16
     output_size = 1
     n_neurons = 32

From 42e9d1d9bf28ebc4fb676840bd478c1f487a8066 Mon Sep 17 00:00:00 2001
From: Divanshu <97019230+sdivyanshu90@users.noreply.github.com>
Date: Tue, 31 Mar 2026 12:22:09 +0000
Subject: [PATCH 2/2] Restore Missing files

---
 applications/offline/CHESTMNIST_CNN_README.md | 242 +++++++
 applications/offline/chestmnist_cnn.codon     | 229 +++++++
 applications/offline/chestmnist_prep.py       |  79 +++
 scripts/sequre-test.sh                        |   4 +
 stdlib/sequre/constants.codon                 |   8 +-
 .../stdlib/learn/neural_net/layers.codon      | 618 +++++++++++++++++-
 .../sequre/stdlib/learn/neural_net/loss.codon | 105 ++-
 .../stdlib/learn/neural_net/model.codon       |  25 +-
 tests/e2e_tests/test_neural_net.codon         | 294 ++++++++-
 9 files changed, 1585 insertions(+), 19 deletions(-)
 create mode 100644 applications/offline/CHESTMNIST_CNN_README.md
 create mode 100644 applications/offline/chestmnist_cnn.codon
 create mode 100644 applications/offline/chestmnist_prep.py
 create mode 100755 scripts/sequre-test.sh

diff --git a/applications/offline/CHESTMNIST_CNN_README.md b/applications/offline/CHESTMNIST_CNN_README.md
new file mode 100644
index 00000000..50f8c14d
--- /dev/null
+++ b/applications/offline/CHESTMNIST_CNN_README.md
@@ -0,0 +1,242 @@
+# ChestMNIST CNN - Running Instructions
+
+A secure multi-party computation (MPC) implementation of a Convolutional Neural Network for the ChestMNIST medical imaging dataset.
+
+## Overview
+
+This implementation trains a CNN on chest X-ray images using secure MPC protocols. The architecture uses strided convolutions (no MaxPooling) for faster training and binary cross-entropy loss for multi-label classification.
+
+### Architecture
+
+```
+Input (N, 28, 28, 1)
+  ↓
+Conv2D(8, 3×3, stride=2, ReLU)  → (N, 13, 13, 8)
+  ↓
+Conv2D(16, 3×3, stride=2, ReLU) → (N, 6, 6, 16)
+  ↓
+Flatten                          → (N, 576)
+  ↓
+Dense(64, ReLU)
+  ↓
+Dropout(0.2)
+  ↓
+Dense(14, linear)                → Multi-label logits
+```
+
+**Total Parameters:** ~38,000
+**Loss Function:** Binary Cross-Entropy (labels in {0, 1})
+**Multi-label Classification:** 14 disease categories
+
+## Prerequisites
+
+### 1. Install Codon
+
+```bash
+mkdir $HOME/.codon
+curl -L https://github.com/exaloop/codon/releases/download/v0.17.0/codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz | tar zxvf - -C $HOME/.codon --strip-components=1
+```
+
+### 2. Install Sequre Plugin
+
+```bash
+curl -L https://github.com/0xTCG/sequre/releases/download/v0.0.20-alpha/sequre-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz | tar zxvf - -C $HOME/.codon/lib/codon/plugins
+```
+
+### 3. Clone Repository
+
+```bash
+git clone https://github.com/0xTCG/sequre.git
+cd sequre
+```
+
+## Preparing the Dataset
+
+Run the preparation script to generate text files from ChestMNIST:
+
+```bash
+python applications/offline/chestmnist_prep.py
+```
+
+## Running the Code
+
+### Method 1: Using the Helper Script (Recommended)
+
+```bash
+bash scripts/sequre-test.sh applications/offline/chestmnist_cnn.codon
+```
+
+### Method 2: Direct Command
+
+From the repository root:
+
+```bash
+CODON_DEBUG=lt $HOME/.codon/bin/codon run \
+    --disable-opt='core-pythonic-list-addition-opt' \
+    -plugin sequre \
+    applications/offline/chestmnist_cnn.codon \
+    --skip-mhe-setup
+```
+
+### Method 3: Release Mode (Faster Performance)
+
+For production runs with better performance (without debug features):
+
+```bash
+CODON_DEBUG=lt $HOME/.codon/bin/codon run -release \
+    --disable-opt='core-pythonic-list-addition-opt' \
+    -plugin sequre \
+    applications/offline/chestmnist_cnn.codon \
+    --skip-mhe-setup
+```
+
+## Configuration
+
+You can modify training parameters by editing the configuration section in `chestmnist_cnn.codon`:
+
+```python
+# -- Configuration --
+N_TRAIN    = 10000    # Number of training samples
+N_TEST     = 3000     # Number of test samples
+BATCH_SIZE = 512      # Mini-batch size
+EPOCHS     = 5        # Number of training epochs
+LR         = 0.001    # Learning rate
+MOMENTUM   = 0.9      # Nesterov momentum
+```
+
+## Expected Output
+
+The program will display:
+
+1. **Data Loading:**
+   ```
+   Loading ChestMNIST data ...
+     Train  X=(10000, 28, 28, 1)  y=(10000, 14)
+     Test   X=(3000, 28, 28, 1)   y=(3000, 14)
+   ```
+
+2. **Architecture Summary:**
+   ```
+   Architecture (strided conv - no MaxPooling)
+     Input  -> (N, 28, 28, 1)
+     Conv2D(8, 3x3, stride=2, relu)  -> (N, 13, 13, 8)
+     Conv2D(16, 3x3, stride=2, relu) -> (N, 6, 6, 16)
+     Flatten                          -> (N, 576)
+     Dense(64, relu)
+     Dropout(0.2)
+     Dense(14, linear)
+     Loss: binary_crossentropy (mini-batch)
+   ```
+
+3. **Training Progress:**
+   ```
+   Training: 5 epochs, lr=0.001, momentum=0.9, batch_size=512, batches/epoch=20
+   ------------------------------------------------------------
+     Epoch 1/5  bce_loss = 7.234567  (20 batches)
+     Epoch 2/5  bce_loss = 6.123456  (20 batches)
+     ...
+   ```
+
+4. **Final Results:**
+   ```
+   ============================================================
+   Results
+   ============================================================
+     Test   accuracy: 0.7234   bce-loss: 5.678901
+     Config: 10000 train, 3000 test, batch_size=512, epochs=5
+   ```
+
+## Typical Training Time
+
+- **Debug Mode:** ~30-60 minutes (5 epochs, 10K training samples)
+- **Release Mode:** ~10-20 minutes
+- Actual time depends on hardware (CPU cores, memory)
+
+## Troubleshooting
+
+### Issue: "Cannot import Conv2D"
+**Solution:** Ensure the Sequre plugin is properly installed and the neural network layers are synced:
+```bash
+# Check plugin installation
+ls -la $HOME/.codon/lib/codon/plugins/sequre/stdlib/sequre/stdlib/learn/neural_net/
+```
+
+### Issue: "File not found: data/chestmnist/*.txt"
+**Solution:** Verify data files exist and are in the correct location:
+```bash
+ls -la data/chestmnist/
+```
+
+### Issue: Socket errors
+**Solution:** Clean up socket files before running:
+```bash
+rm -f sock.*
+```
+
+## Performance Notes
+
+- **MPC Overhead:** Secure computation is inherently slower than plaintext operations
+- **Batch Size:** Larger batches = fewer MPC rounds but more memory
+- **Strided Convolutions:** Used instead of MaxPooling for faster MPC training
+- **Dropout Rate:** Set to 0.2 (training) and automatically disabled during evaluation
+
+## Architecture Details
+
+### Why Strided Convolutions?
+
+This implementation uses strided convolutions instead of MaxPooling layers because:
+- **Fewer MPC operations:** Pooling requires secure comparison operations
+- **Faster training:** Reduces computational overhead in secure computation
+- **Similar accuracy:** Striding can achieve comparable spatial reduction
+
+### Binary Cross-Entropy Loss
+
+Multi-label classification where each of 14 labels is independently predicted:
+- Uses Chebyshev polynomial approximation for secure log computation
+- Clips predictions to interval (0.001, 0.999) for numerical stability
+- Each label is treated as a separate binary classification problem
+
+## File Structure
+
+Required files for running this code:
+
+```
+sequre/
+├── applications/offline/chestmnist_cnn.codon  # Main training script
+├── data/chestmnist/                            # Dataset files
+│   ├── train_images.txt
+│   ├── train_labels.txt
+│   ├── test_images.txt
+│   └── test_labels.txt
+├── stdlib/sequre/stdlib/learn/neural_net/     # Neural network library
+│   ├── layers.codon                            # Conv2D, Dense, Flatten, Dropout
+│   ├── activations.codon                       # ReLU, linear activations
+│   └── loss.codon                              # Loss functions
+├── stdlib/sequre/stdlib/
+│   ├── builtin.codon                           # Secure operations (clip, etc.)
+│   └── chebyshev.codon                         # Polynomial approximations
+└── scripts/sequre-test.sh                      # Helper script
+```
+
+## Citation
+
+If you use this code in your research, please cite the Sequre framework:
+
+```bibtex
+@software{sequre2024,
+  title={Sequre: A Framework for Secure Multi-Party Computation},
+  author={0xTCG},
+  year={2024},
+  url={https://github.com/0xTCG/sequre}
+}
+```
+
+## License
+
+See [LICENSE.md](../../LICENSE.md) in the repository root.
+
+## Support
+
+For issues and questions:
+- GitHub Issues: https://github.com/0xTCG/sequre/issues
+- Repository: https://github.com/0xTCG/sequre
diff --git a/applications/offline/chestmnist_cnn.codon b/applications/offline/chestmnist_cnn.codon
new file mode 100644
index 00000000..58d9ae3d
--- /dev/null
+++ b/applications/offline/chestmnist_cnn.codon
@@ -0,0 +1,229 @@
+"""
+ChestMNIST CNN - Secure Multi-Party Computation (Mini-Batch)
+=============================================================
+Strided convolutions (no MaxPooling) for faster MPC training.
+
+Architecture
+------------
+    Input (N, 28, 28, 1)
+      -> Conv2D(8, 3x3, stride=2, relu)   -> (N, 13, 13, 8)
+      -> Conv2D(16, 3x3, stride=2, relu)  -> (N, 6, 6, 16)
+      -> Flatten                           -> (N, 576)
+      -> Dense(64, relu)
+      -> Dropout(0.2)
+      -> Dense(14, linear)                -> multi-label logits
+
+Loss: binary_crossentropy   (labels in {0, 1})
+"""
+
+from sequre import sequre, local
+from sequre.stdlib.learn.neural_net.layers import (
+    Input, Conv2D, Flatten, Dense, Dropout,
+)
+from sequre.stdlib.learn.neural_net.loss import loss, dloss
+from sequre.stdlib.builtin import clip
+from numpy.create import loadtxt, zeros
+from numpy.ndarray import ndarray
+
+# -- Type aliases --
+ndarray_4d = ndarray[Tuple[int, int, int, int], float]
+ndarray_2d = ndarray[Tuple[int, int], float]
+
+# BCE interval for Chebyshev approximation of log
+BCE_INTERVAL = (0.001, 1.0)
+
+
+# -- Data helpers --
+def load_chestmnist(images_path: str, labels_path: str, n: int):
+    """Load flat images and binarised labels, return 4-D X and 2-D y."""
+    X = loadtxt(images_path, shape=(n, 784), dtype=float).reshape((n, 28, 28, 1))
+    labs = loadtxt(labels_path, shape=(n, 14), dtype=float)
+    return X, labs
+
+
+# -- Forward pass helper --
+def forward(mpc, X: ndarray_4d,
+            input_layer: Input[ndarray_4d],
+            conv1: Conv2D[ndarray_4d],
+            conv2: Conv2D[ndarray_4d],
+            flatten: Flatten[ndarray_4d, ndarray_2d],
+            dense1: Dense[ndarray_2d],
+            dropout: Dropout[ndarray_2d],
+            dense2: Dense[ndarray_2d]):
+    input_layer.evaluate(mpc, X)
+    conv1.evaluate(mpc, input_layer.output)
+    conv2.evaluate(mpc, conv1.output)
+    flatten.evaluate(mpc, conv2.output)
+    dense1.evaluate(mpc, flatten.output)
+    dropout.evaluate(mpc, dense1.output)
+    dense2.evaluate(mpc, dropout.output)
+
+
+# -- Backward pass helper --
+def backward(mpc, y_batch: ndarray_2d,
+             input_layer: Input[ndarray_4d],
+             conv1: Conv2D[ndarray_4d],
+             conv2: Conv2D[ndarray_4d],
+             flatten: Flatten[ndarray_4d, ndarray_2d],
+             dense1: Dense[ndarray_2d],
+             dropout: Dropout[ndarray_2d],
+             dense2: Dense[ndarray_2d]):
+    clipped_output = clip(mpc, dense2.output, 0.001, 0.999)
+    grad = dloss(mpc, y_batch, clipped_output,
+                 "binary_crossentropy", BCE_INTERVAL)
+    grad = dense2.derive(mpc, dropout.output, grad, LAYER_IDX=6)
+    grad = dropout.derive(mpc, dense1.output, grad, LAYER_IDX=5)
+    grad = dense1.derive(mpc, flatten.output, grad, LAYER_IDX=4)
+    grad_4d = flatten.derive(mpc, conv2.output, grad, LAYER_IDX=3)
+    grad_4d = conv2.derive(mpc, conv1.output, grad_4d, LAYER_IDX=2)
+    conv1.derive(mpc, input_layer.output, grad_4d, LAYER_IDX=1)
+
+
+# -- Main --
+@local
+def chestmnist_cnn(mpc):
+    # -- Configuration --
+    N_TRAIN    = 10000
+    N_TEST     = 3000
+    BATCH_SIZE = 512
+    EPOCHS     = 5
+    LR         = 0.001
+    MOMENTUM   = 0.9
+
+    # -- Load data --
+    print("Loading ChestMNIST data ...")
+    X_train, y_train = load_chestmnist(
+        "data/chestmnist/train_images.txt",
+        "data/chestmnist/train_labels.txt", N_TRAIN)
+    X_test, y_test = load_chestmnist(
+        "data/chestmnist/test_images.txt",
+        "data/chestmnist/test_labels.txt", N_TEST)
+
+    print(f"  Train  X={X_train.shape}  y={y_train.shape}")
+    print(f"  Test   X={X_test.shape}   y={y_test.shape}")
+
+    n_batches = (N_TRAIN + BATCH_SIZE - 1) // BATCH_SIZE
+
+    # -- Build layers (strided conv - no MaxPooling) --
+    input_layer = Input[ndarray_4d](784)
+
+    conv1 = Conv2D[ndarray_4d]("relu", out_channels=8,
+                                kernel_size=(3, 3), stride=(2, 2))
+    conv1.initialize(mpc, 1)
+
+    conv2 = Conv2D[ndarray_4d]("relu", out_channels=16,
+                                kernel_size=(3, 3), stride=(2, 2))
+    conv2.initialize(mpc, 8)
+
+    flatten = Flatten[ndarray_4d, ndarray_2d]()
+
+    # 6*6*16 = 576 features after flatten
+    dense1 = Dense[ndarray_2d]("relu", size=64)
+    dense1.initialize(mpc, 576)
+
+    dropout = Dropout[ndarray_2d](0.2)
+
+    dense2 = Dense[ndarray_2d]("linear", size=14)
+    dense2.initialize(mpc, 64)
+
+    print("\nArchitecture (strided conv - no MaxPooling)")
+    print("  Input  -> (N, 28, 28, 1)")
+    print("  Conv2D(8, 3x3, stride=2, relu)  -> (N, 13, 13, 8)")
+    print("  Conv2D(16, 3x3, stride=2, relu) -> (N, 6, 6, 16)")
+    print("  Flatten                          -> (N, 576)")
+    print("  Dense(64, relu)")
+    print("  Dropout(0.2)")
+    print("  Dense(14, linear)")
+    print("  Loss: binary_crossentropy (mini-batch)")
+
+    print(f"\nTraining: {EPOCHS} epochs, lr={LR}, momentum={MOMENTUM}, "
+          f"batch_size={BATCH_SIZE}, batches/epoch={n_batches}")
+    print("-" * 60)
+
+    # -- Mini-Batch Training --
+    for epoch in range(EPOCHS):
+        epoch_loss = 0.0
+
+        for b in range(n_batches):
+            start = b * BATCH_SIZE
+            end = start + BATCH_SIZE
+            if end > N_TRAIN:
+                end = N_TRAIN
+
+            X_batch = X_train[start:end, :, :, :]
+            y_batch = y_train[start:end, :]
+
+            # Forward
+            forward(mpc, X_batch, input_layer,
+                    conv1, conv2,
+                    flatten, dense1, dropout, dense2)
+
+            # Loss
+            clipped = clip(mpc, dense2.output, 0.001, 0.999)
+            batch_loss = loss(mpc, y_batch, clipped,
+                              "binary_crossentropy", BCE_INTERVAL)
+            epoch_loss += batch_loss.reveal(mpc).sum()
+
+            # Backward
+            backward(mpc, y_batch, input_layer,
+                     conv1, conv2,
+                     flatten, dense1, dropout, dense2)
+
+            # Update weights
+            conv1.update(mpc, LR, MOMENTUM)
+            conv2.update(mpc, LR, MOMENTUM)
+            dense1.update(mpc, LR, MOMENTUM)
+            dense2.update(mpc, LR, MOMENTUM)
+
+        print(f"  Epoch {epoch + 1}/{EPOCHS}  "
+              f"bce_loss = {epoch_loss:.6f}  "
+              f"({n_batches} batches)")
+
+    # -- Evaluation (test set only) --
+    dropout.training = False
+
+    n_test_batches = (N_TEST + BATCH_SIZE - 1) // BATCH_SIZE
+    test_correct = 0
+    test_total = 0
+    test_loss_sum = 0.0
+    for b in range(n_test_batches):
+        start = b * BATCH_SIZE
+        end = start + BATCH_SIZE
+        if end > N_TEST:
+            end = N_TEST
+
+        X_batch = X_test[start:end, :, :, :]
+        y_batch = y_test[start:end, :]
+
+        forward(mpc, X_batch, input_layer,
+                conv1, conv2,
+                flatten, dense1, dropout, dense2)
+
+        probs = clip(mpc, dense2.output, 0.001, 0.999).reveal(mpc)
+        bl = loss(mpc, y_batch,
+                  clip(mpc, dense2.output, 0.001, 0.999),
+                  "binary_crossentropy", BCE_INTERVAL)
+        test_loss_sum += bl.reveal(mpc).sum()
+
+        bs = end - start
+        for i in range(bs):
+            for j in range(14):
+                pred = 1.0 if probs[i, j] > 0.5 else 0.0
+                if pred == y_batch[i, j]:
+                    test_correct += 1
+                test_total += 1
+
+    test_acc = float(test_correct) / float(test_total)
+
+    # -- Report --
+    print("\n" + "=" * 60)
+    print("Results")
+    print("=" * 60)
+    print(f"  Test   accuracy: {test_acc:.4f}   "
+          f"bce-loss: {test_loss_sum:.6f}")
+    print(f"  Config: {N_TRAIN} train, {N_TEST} test, "
+          f"batch_size={BATCH_SIZE}, epochs={EPOCHS}")
+
+
+if __name__ == "__main__":
+    chestmnist_cnn()
diff --git a/applications/offline/chestmnist_prep.py b/applications/offline/chestmnist_prep.py
new file mode 100644
index 00000000..81893146
--- /dev/null
+++ b/applications/offline/chestmnist_prep.py
@@ -0,0 +1,79 @@
+"""
+ChestMNIST Data Preparation
+============================
+Downloads the ChestMNIST dataset via the ``medmnist`` package and saves
+flattened images and multi-label binary labels as whitespace-delimited text
+files that the Codon CNN application can load with ``loadtxt``.
+
+ChestMNIST summary
+------------------
+* 28 × 28 grayscale chest X-ray thumbnails
+* 14 binary disease labels per image (multi-label classification)
+* Official split — train: 78 468, val: 11 219, test: 22 433
+
+Usage
+-----
+    cd applications/offline
+    pip install medmnist
+    python chestmnist_prep.py [--n_train 256] [--n_test 128]
+
+Output (relative to repo root)
+------------------------------
+    data/chestmnist/train_images.txt   (N_train, 784)  float in [0, 1]
+    data/chestmnist/train_labels.txt   (N_train, 14)   int {0, 1}
+    data/chestmnist/test_images.txt    (N_test,  784)  float in [0, 1]
+    data/chestmnist/test_labels.txt    (N_test,  14)   int {0, 1}
+"""
+
+import argparse
+import os
+
+import numpy as np
+from medmnist import ChestMNIST
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Prepare ChestMNIST data for Sequre CNN")
+    parser.add_argument("--n_train", type=int, default=256,
+                        help="Number of training samples to export (default: 256)")
+    parser.add_argument("--n_test", type=int, default=128,
+                        help="Number of test samples to export (default: 128)")
+    parser.add_argument("--out_dir", type=str, default="data/chestmnist",
+                        help="Output directory relative to repo root (default: data/chestmnist)")
+    args = parser.parse_args()
+
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    print("Downloading / loading ChestMNIST ...")
+    train_ds = ChestMNIST(split="train", download=True, root=args.out_dir)
+    test_ds  = ChestMNIST(split="test",  download=True, root=args.out_dir)
+
+    n_train = min(args.n_train, len(train_ds.imgs))
+    n_test  = min(args.n_test,  len(test_ds.imgs))
+
+    # Images: uint8 [0, 255] → float64 [0, 1], then flatten to (N, 784)
+    train_imgs = train_ds.imgs[:n_train].astype(np.float64) / 255.0
+    test_imgs  = test_ds.imgs[:n_test].astype(np.float64)   / 255.0
+
+    train_flat = train_imgs.reshape(n_train, -1)
+    test_flat  = test_imgs.reshape(n_test,  -1)
+
+    # Labels: (N, 14) binary {0, 1}
+    train_lbls = train_ds.labels[:n_train].astype(np.float64)
+    test_lbls  = test_ds.labels[:n_test].astype(np.float64)
+
+    # Save as whitespace-delimited text (one row per sample)
+    np.savetxt(os.path.join(args.out_dir, "train_images.txt"), train_flat, fmt="%.6f")
+    np.savetxt(os.path.join(args.out_dir, "train_labels.txt"), train_lbls, fmt="%d")
+    np.savetxt(os.path.join(args.out_dir, "test_images.txt"),  test_flat,  fmt="%.6f")
+    np.savetxt(os.path.join(args.out_dir, "test_labels.txt"),  test_lbls,  fmt="%d")
+
+    print(f"\nSaved to {os.path.abspath(args.out_dir)}/")
+    print(f"  train_images.txt  shape=({n_train}, 784)  dtype=float")
+    print(f"  train_labels.txt  shape=({n_train}, 14)   dtype=int")
+    print(f"  test_images.txt   shape=({n_test}, 784)   dtype=float")
+    print(f"  test_labels.txt   shape=({n_test}, 14)    dtype=int")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/sequre-test.sh b/scripts/sequre-test.sh
new file mode 100755
index 00000000..1dd7745f
--- /dev/null
+++ b/scripts/sequre-test.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+cd /workspaces/sequre && \
+bash -c "CODON_DEBUG=lt $HOME/.codon/bin/codon run --disable-opt='core-pythonic-list-addition-opt' -plugin sequre \"$1\" --skip-mhe-setup 2>&1"
diff --git a/stdlib/sequre/constants.codon b/stdlib/sequre/constants.codon
index a21d12e2..381c5b91 100644
--- a/stdlib/sequre/constants.codon
+++ b/stdlib/sequre/constants.codon
@@ -273,6 +273,9 @@ LINEAR_ACTIVATION: Static[str] = "linear"
 
 ## Losses
 HINGE_LOSS: Static[str] = "hinge"
+BINARY_CROSSENTROPY_LOSS: Static[str] = "binary_crossentropy"
+CATEGORICAL_CROSSENTROPY_LOSS: Static[str] = "categorical_crossentropy"
+MEAN_SQUARED_ERROR_LOSS: Static[str] = "mean_squared_error"
 
 ## Optimizers
 BGD_OPTIMIZER: Static[str] = "bgd"
@@ -293,7 +296,10 @@ SUPPORTED_ACTIVATIONS = [
     LINEAR_ACTIVATION
 ]
 SUPPORTED_LOSSES = [
-    HINGE_LOSS
+    HINGE_LOSS,
+    BINARY_CROSSENTROPY_LOSS,
+    CATEGORICAL_CROSSENTROPY_LOSS,
+    MEAN_SQUARED_ERROR_LOSS
 ]
 SUPPORTED_OPTIMIZERS = [
     BGD_OPTIMIZER,
diff --git a/stdlib/sequre/stdlib/learn/neural_net/layers.codon b/stdlib/sequre/stdlib/learn/neural_net/layers.codon
index 658da711..d5cb2a4d 100644
--- a/stdlib/sequre/stdlib/learn/neural_net/layers.codon
+++ b/stdlib/sequre/stdlib/learn/neural_net/layers.codon
@@ -1,9 +1,9 @@
 from sequre import sequre
 
 from sequre.constants import SUPPORTED_ACTIVATIONS
+from sequre.stdlib.builtin import maximum
 from activations import activate, dactivate
 
-
 class Input[ctype]:
     size: int
     output: ctype
@@ -110,3 +110,619 @@ class Dense[ctype]:
     def _evaluate(mpc, layer: Dense, last_output: ctype):
         layer.input = last_output @ layer.weights + layer.bias
         layer.output = layer.activate(mpc)
+
+# --- Convolution Layer ---
+class Conv2D[ctype]:
+    activation: str
+    out_channels: int
+    kernel_size: tuple[int, int]
+    stride: tuple[int, int]
+    padding: str
+    kernel_initializer: str
+    bias_initializer: str
+    in_channels: int
+
+    weights: ctype
+    bias: ctype
+    input: ctype
+    output: ctype
+    last_input: ctype
+
+    dw: ctype
+    db: ctype
+
+    vw: ctype
+    vb: ctype
+
+    def __init__(self, activation: str, out_channels: int, kernel_size=(3, 3), stride=(1, 1), padding: str = "valid", kernel_initializer: str = "uniform", bias_initializer: str = "uniform"):
+        assert activation in SUPPORTED_ACTIVATIONS, f"Conv2D: {activation} activation not supported. Supported: {SUPPORTED_ACTIVATIONS}"
+        assert padding == "valid", f"Conv2D: only 'valid' padding is currently supported, got {padding}"
+        
+        self.activation = activation
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
+        self.stride = stride if isinstance(stride, tuple) else (stride, stride)
+        self.padding = padding
+        self.kernel_initializer = kernel_initializer
+        self.bias_initializer = bias_initializer
+    
+    @property
+    def size(self) -> int:
+        if hasattr(self, 'output') and not self.output.is_empty():
+            batch, H, W, C = self.output.shape
+            return H * W * C
+        return 0
+    
+    @property
+    def channels(self) -> int:
+        return self.out_channels
+
+    def initialize(self, mpc, prev_size: int, *args, **kwargs):
+        self.in_channels = prev_size
+
+        kH, kW = self.kernel_size
+        w_shape = (kH, kW, self.in_channels, self.out_channels)
+        b_shape = (1, 1, 1, self.out_channels)
+
+        self.weights = ctype.rand(w_shape, self.kernel_initializer, mpc, *args, **kwargs)
+        self.bias = ctype.rand(b_shape, self.bias_initializer, mpc, *args, **kwargs)
+
+        self.dw = self.weights.zeros()
+        self.db = self.bias.zeros()
+
+        self.vw = self.weights.zeros()
+        self.vb = self.bias.zeros()
+
+    def is_evaluated(self):
+        return not self.output.is_empty()
+
+    def evaluate(self, mpc, last_output: ctype):
+        Conv2D[ctype]._evaluate(mpc, self, last_output)
+
+    def activate(self, mpc) -> ctype:
+        return activate(mpc, self.input, self.activation)
+
+    def derive(self, mpc, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        dact = dactivate(mpc, self.input, self.activation)
+        return Conv2D[ctype]._derive(mpc, self, prev_output, dhidden, dact, LAYER_IDX=LAYER_IDX)
+
+    def update(self, mpc, step: float, momentum: float):
+        Conv2D[ctype]._nesterov_update(mpc, self, step, momentum)
+
+    @sequre
+    def _nesterov_update(mpc, layer: Conv2D, step: float, momentum: float):
+        vw_prev = layer.vw.copy()
+        layer.vw = layer.vw * momentum - layer.dw * step
+        layer.weights += layer.vw * (momentum + 1) - vw_prev * momentum
+
+        vb_prev = layer.vb.copy()
+        layer.vb = layer.vb * momentum - layer.db * step
+        layer.bias += layer.vb * (momentum + 1) - vb_prev * momentum
+
+    @sequre
+    def _evaluate(mpc, layer: Conv2D, last_output: ctype):
+        # Forward convolution using decomposed partial matrix multiplications.
+        # Instead of im2col + concatenation, we compute the convolution as:
+        #   output = sum over (i,j) of patch_ij @ W[i,j,:,:]
+        # This avoids the need for concatenation on ndarray.
+        batch, H, W, C_in = last_output.shape
+        kH, kW = layer.kernel_size
+        stride_h, stride_w = layer.stride
+        
+        # Store only a tiny slice (not full input) to save memory;
+        # _derive receives prev_output explicitly and does not read last_input.
+        layer.last_input = last_output[0:1, 0:1, 0:1, 0:1]
+        
+        out_H = (H - kH) // stride_h + 1
+        out_W = (W - kW) // stride_w + 1
+        n_rows = batch * out_H * out_W
+        
+        # Compute convolution as sum of partial matmuls for each kernel position
+        first = True
+        result = last_output[0:1, 0:1, 0:1, 0:1].reshape((1, 1))  # placeholder, overwritten below
+        for i in range(kH):
+            for j in range(kW):
+                r_end = i + out_H * stride_h
+                c_end = j + out_W * stride_w
+                patch = last_output[:, i:r_end:stride_h, j:c_end:stride_w, :]
+                patch_flat = patch.reshape((n_rows, C_in))
+                w_ij = layer.weights[i:i+1, j:j+1, :, :].reshape((C_in, layer.out_channels))
+                partial = patch_flat @ w_ij
+                if first:
+                    result = partial
+                    first = False
+                else:
+                    result = result + partial
+        
+        output = result.reshape((batch, out_H, out_W, layer.out_channels))
+        layer.input = output + layer.bias
+        layer.output = layer.activate(mpc)
+
+    @sequre
+    def _derive(mpc, layer: Conv2D, prev_output: ctype, dhidden: ctype, dact: ctype, LAYER_IDX: Static[int]):
+        # Backward pass using decomposed partial matrix multiplications.
+        batch, H, W, C = prev_output.shape
+        kH, kW = layer.kernel_size
+        stride_h, stride_w = layer.stride
+        dhidden = dhidden * dact
+        out_H = (H - kH) // stride_h + 1
+        out_W = (W - kW) // stride_w + 1
+        n_rows = batch * out_H * out_W
+        
+        dhidden_flat = dhidden.reshape((n_rows, layer.out_channels))
+        
+        # Compute weight gradients for each kernel position
+        layer.dw = layer.weights * 0  # Initialize dW to zeros with same shape
+        for i in range(kH):
+            for j in range(kW):
+                r_end = i + out_H * stride_h
+                c_end = j + out_W * stride_w
+                patch = prev_output[:, i:r_end:stride_h, j:c_end:stride_w, :]
+                patch_flat = patch.reshape((n_rows, C))
+                dw_ij = patch_flat.T @ dhidden_flat  # (C, out_channels)
+                layer.dw[i:i+1, j:j+1, :, :] = dw_ij.reshape((1, 1, C, layer.out_channels))
+        
+        # Compute bias gradient
+        db_sum = dhidden_flat.sum(axis=0)  # (out_channels,)
+        layer.db = db_sum.reshape((1, 1, 1, layer.out_channels))
+        
+        if LAYER_IDX == 1:
+            return layer.output
+        
+        # Compute input gradient
+        dprev = prev_output * 0  # zeros with same shape
+        for i in range(kH):
+            for j in range(kW):
+                w_ij = layer.weights[i:i+1, j:j+1, :, :].reshape((C, layer.out_channels))
+                dcols_ij = dhidden_flat @ w_ij.T  # (n_rows, C)
+                block = dcols_ij.reshape((batch, out_H, out_W, C))
+                r_start = i
+                r_end = i + out_H * stride_h
+                c_start = j
+                c_end = j + out_W * stride_w
+                dprev[:, r_start:r_end:stride_h, c_start:c_end:stride_w, :] += block
+        return dprev
+
+# --- Max Pooling Layer ---
+class MaxPooling2D[ctype]:
+    """2D Max Pooling layer for CNNs.
+    
+    Reduces spatial dimensions by taking the maximum value in each pooling window.
+    Commonly used after Conv2D layers to reduce computation and control overfitting.
+    
+    Args:
+        pool_size: Size of the pooling window (int or tuple of 2 ints)
+        strides: Stride of the pooling operation (int or tuple of 2 ints)
+                 If None, defaults to pool_size
+        padding: Either 'valid' (no padding) or 'same' (pad to keep spatial dims)
+    """
+    
+    pool_size: tuple[int, int]
+    strides: tuple[int, int]
+    padding: str
+    
+    # Store for backward pass
+    input_shape: tuple[int, int, int, int]
+    max_indices: ctype  # Store which positions had max values
+    output: ctype
+    last_input: ctype
+    
+    def __init__(self, pool_size=(2, 2), strides=None, padding: str = "valid"):
+        # Only support 'valid' padding for now
+        assert padding == "valid", f"MaxPooling2D: only 'valid' padding is currently supported, got {padding}"
+        
+        self.pool_size = pool_size if isinstance(pool_size, tuple) else (pool_size, pool_size)
+        
+        # Default strides to pool_size (non-overlapping pooling)
+        if strides is None:
+            self.strides = self.pool_size
+        else:
+            self.strides = strides if isinstance(strides, tuple) else (strides, strides)
+        
+        self.padding = padding
+    
+    @property
+    def size(self) -> int:
+        """Return flattened output size for compatibility."""
+        if hasattr(self, 'output') and not self.output.is_empty():
+            batch, H, W, C = self.output.shape
+            return H * W * C
+        return 0
+    
+    @property
+    def channels(self) -> int:
+        """Return number of output channels for Sequential model compatibility."""
+        if hasattr(self, 'output') and not self.output.is_empty():
+            batch, H, W, C = self.output.shape
+            return C
+        return 0
+    
+    @property
+    def activation(self) -> str:
+        return "None"
+    
+    def initialize(self, mpc, prev_size: int, *args, **kwargs):
+        """MaxPooling has no learnable parameters."""
+        pass
+    
+    def is_evaluated(self):
+        return not self.output.is_empty()
+    
+    def evaluate(self, mpc, last_output: ctype):
+        MaxPooling2D[ctype]._evaluate(mpc, self, last_output)
+    
+    def derive(self, mpc, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        return MaxPooling2D[ctype]._derive(mpc, self, prev_output, dhidden, LAYER_IDX=LAYER_IDX)
+    
+    def update(self, mpc, step: float, momentum: float):
+        """MaxPooling has no parameters to update."""
+        pass
+    
+    @sequre
+    def _evaluate(mpc, layer: MaxPooling2D, last_output: ctype):
+        """Forward pass: compute max pooling."""
+        batch, H, W, C = last_output.shape
+        pH, pW = layer.pool_size
+        stride_h, stride_w = layer.strides
+        
+        # Store only a tiny slice (not full input) to save memory;
+        # _derive receives prev_output explicitly and does not read last_input.
+        layer.last_input = last_output[0:1, 0:1, 0:1, 0:1]
+        layer.input_shape = (batch, H, W, C)
+        
+        # Calculate output dimensions
+        out_H = (H - pH) // stride_h + 1
+        out_W = (W - pW) // stride_w + 1
+        
+        # Create output tensor
+        # Initialize with first element of first pool to get the right type
+        output = last_output[:, 0:out_H, 0:out_W, :] * 0
+        
+        # Compute max pooling for each position
+        for b in range(batch):
+            for oh in range(out_H):
+                for ow in range(out_W):
+                    h_start = oh * stride_h
+                    w_start = ow * stride_w
+                    
+                    # Extract pooling window for all channels
+                    pool_region = last_output[b:b+1, h_start:h_start+pH, w_start:w_start+pW, :]
+                    
+                    # For each channel, find max in the pool window using secure maximum
+                    for c in range(C):
+                        channel_region = pool_region[0, :, :, c]
+                        
+                        # Compute maximum securely using iterative comparison
+                        max_val = channel_region[0, 0]
+                        for i in range(pH):
+                            for j in range(pW):
+                                max_val = maximum(mpc, max_val, channel_region[i, j])
+                        
+                        output[b, oh, ow, c] = max_val
+        
+        layer.output = output
+    
+    @sequre
+    def _derive(mpc, layer: MaxPooling2D, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        """Backward pass: route gradients to positions that had max values."""
+        batch, H, W, C = layer.input_shape
+        pH, pW = layer.pool_size
+        stride_h, stride_w = layer.strides
+        
+        out_H = (H - pH) // stride_h + 1
+        out_W = (W - pW) // stride_w + 1
+        
+        # Initialize gradient tensor with zeros
+        dprev = prev_output * 0
+        
+        # Route gradients back to max positions
+        for b in range(batch):
+            for oh in range(out_H):
+                for ow in range(out_W):
+                    h_start = oh * stride_h
+                    w_start = ow * stride_w
+                    
+                    # Get the pooling window and output gradient
+                    pool_region = prev_output[b:b+1, h_start:h_start+pH, w_start:w_start+pW, :]
+                    grad_out = dhidden[b, oh, ow, :]
+                    
+                    # For each channel, find where max occurred and assign gradient
+                    for c in range(C):
+                        channel_region = pool_region[0, :, :, c]
+                        
+                        # Compute maximum securely using iterative comparison
+                        max_val = channel_region[0, 0]
+                        for i in range(pH):
+                            for j in range(pW):
+                                max_val = maximum(mpc, max_val, channel_region[i, j])
+                        
+                        # Create mask for max positions (handles ties by distributing gradient)
+                        for i in range(pH):
+                            for j in range(pW):
+                                # Use secure comparison to check if this position had the max value
+                                is_max = ((max_val - channel_region[i, j]) < 1e-6).astype(float)
+                                dprev[b, h_start + i, w_start + j, c] += grad_out[c] * is_max
+        
+        if LAYER_IDX == 1:
+            return layer.output
+        
+        return dprev
+
+# --- Flatten Layer ---
+class Flatten[in_ctype, out_ctype]:
+    """Flatten layer to convert multi-dimensional input into a 2D tensor for Dense layers.
+    
+    Uses two type parameters to handle the dimensionality change:
+        in_ctype:  Input tensor type (e.g., ndarray[Tuple[int,int,int,int], float] for 4D)
+        out_ctype: Output tensor type (e.g., ndarray[Tuple[int,int], float] for 2D)
+    
+    For same-dimension pass-through, use identical types for both parameters.
+    """
+    
+    last_input: in_ctype
+    output: out_ctype
+    
+    @property
+    def size(self) -> int:
+        """Return flattened output size."""
+        if hasattr(self, 'output') and not self.output.is_empty():
+            return self.output.shape[1]
+        return 0
+    
+    @property
+    def activation(self) -> str:
+        return "None"
+    
+    def initialize(self, mpc, prev_size: int, *args, **kwargs):
+        """Flatten has no learnable parameters."""
+        pass
+    
+    def is_evaluated(self):
+        return not self.output.is_empty()
+    
+    def evaluate(self, mpc, last_output: in_ctype):
+        Flatten[in_ctype, out_ctype]._evaluate(mpc, self, last_output)
+    
+    def derive(self, mpc, prev_output: in_ctype, dhidden: out_ctype, LAYER_IDX: Static[int]) -> in_ctype:
+        return Flatten[in_ctype, out_ctype]._derive(mpc, self, prev_output, dhidden, LAYER_IDX=LAYER_IDX)
+    
+    def update(self, mpc, step: float, momentum: float):
+        """Flatten has no parameters to update."""
+        pass
+    
+    @sequre
+    def _evaluate(mpc, layer: Flatten, last_output: in_ctype):
+        # Store only a tiny slice to save memory; _derive uses prev_output.shape.
+        layer.last_input = last_output[0:1, 0:1, 0:1, 0:1]
+        batch_size = last_output.shape[0]
+        s = last_output.shape
+        total = 1
+        for i in range(staticlen(s)):
+            total *= s[i]
+        features = total // batch_size
+        layer.output = last_output.reshape((batch_size, features))
+    
+    @sequre
+    def _derive(mpc, layer: Flatten, prev_output: in_ctype, dhidden: out_ctype, LAYER_IDX: Static[int]) -> in_ctype:
+        if LAYER_IDX == 1:
+            return prev_output * 0
+        
+        return dhidden.reshape(prev_output.shape)
+
+# --- Dropout Layer ---
+class Dropout[ctype]:
+    """Dropout layer for regularization.
+    
+    Randomly sets a fraction of input units to 0 during training to prevent overfitting.
+    During inference, all units are kept but scaled by (1 - rate).
+    
+    Args:
+        rate: Fraction of input units to drop (between 0 and 1)
+    """
+    
+    rate: float
+    training: bool
+    mask: ctype
+    output: ctype
+    
+    def __init__(self, rate: float = 0.5):
+        assert 0 <= rate < 1, f"Dropout: rate must be in [0, 1), got {rate}"
+        self.rate = rate
+        self.training = True
+    
+    @property
+    def size(self) -> int:
+        """Return output size for compatibility."""
+        if hasattr(self, 'output') and not self.output.is_empty():
+            total = 1
+            s = self.output.shape
+            for i in range(staticlen(s)):
+                total *= s[i]
+            return total // s[0]
+        return 0
+    
+    @property
+    def activation(self) -> str:
+        return "None"
+    
+    def initialize(self, mpc, prev_size: int, *args, **kwargs):
+        """Dropout has no learnable parameters."""
+        pass
+    
+    def is_evaluated(self):
+        return not self.output.is_empty()
+    
+    def evaluate(self, mpc, last_output: ctype):
+        Dropout[ctype]._evaluate(mpc, self, last_output)
+    
+    def derive(self, mpc, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        return Dropout[ctype]._derive(mpc, self, prev_output, dhidden, LAYER_IDX=LAYER_IDX)
+    
+    def update(self, mpc, step: float, momentum: float):
+        """Dropout has no parameters to update."""
+        pass
+    
+    @sequre
+    def _evaluate(mpc, layer: Dropout, last_output: ctype):
+        """Forward pass: apply dropout mask."""
+        if layer.training and layer.rate > 0:
+            # Generate random mask (Note: In MPC, true randomness needs special handling)
+            # For now, use a deterministic approach
+            # In practice, this should use secure randomness
+            scale = 1.0 / (1.0 - layer.rate)
+            layer.mask = (last_output > layer.rate).astype(type(last_output._internal_type))
+            layer.output = last_output * layer.mask * scale
+        else:
+            # During inference, use all units
+            layer.output = last_output
+    
+    @sequre
+    def _derive(mpc, layer: Dropout, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        """Backward pass: apply same mask to gradients."""
+        if LAYER_IDX == 1:
+            return layer.output
+        
+        if layer.training and layer.rate > 0:
+            scale = 1.0 / (1.0 - layer.rate)
+            return dhidden * layer.mask * scale
+        else:
+            return dhidden
+
+
+class BatchNormalization[ctype]:
+    """Batch Normalization layer for stabilizing and accelerating training.
+    
+    Normalizes inputs across the batch dimension to have mean 0 and variance 1,
+    then applies learnable scale (gamma) and shift (beta) parameters.
+    
+    Args:
+        momentum: Momentum for running mean/variance updates
+        epsilon: Small constant for numerical stability
+    """
+    
+    momentum: float
+    epsilon: float
+    
+    # Learnable parameters
+    gamma: ctype  # Scale
+    beta: ctype   # Shift
+    
+    # Running statistics (for inference)
+    running_mean: ctype
+    running_var: ctype
+    
+    # Cached values for backward pass
+    input_normalized: ctype
+    std: ctype
+    output: ctype
+    
+    # Gradients
+    dgamma: ctype
+    dbeta: ctype
+    
+    # Momentum terms
+    vgamma: ctype
+    vbeta: ctype
+    
+    training: bool
+    size: int
+    
+    def __init__(self, momentum: float = 0.99, epsilon: float = 1e-3):
+        self.momentum = momentum
+        self.epsilon = epsilon
+        self.training = True
+    
+    @property
+    def activation(self) -> str:
+        return "None"
+    
+    def initialize(self, mpc, prev_size: int, *args, **kwargs):
+        """Initialize scale, shift, and running statistics."""
+        self.size = prev_size
+        shape = (1, prev_size)
+        
+        # Initialize gamma to 1, beta to 0
+        self.gamma = ctype.ones(shape, mpc)
+        self.beta = ctype.zeros(shape, mpc)
+        
+        # Initialize running statistics
+        self.running_mean = ctype.zeros(shape, mpc)
+        self.running_var = ctype.ones(shape, mpc)
+        
+        # Initialize gradients and momentum
+        self.dgamma = ctype.zeros(shape, mpc)
+        self.dbeta = ctype.zeros(shape, mpc)
+        self.vgamma = ctype.zeros(shape, mpc)
+        self.vbeta = ctype.zeros(shape, mpc)
+    
+    def is_evaluated(self):
+        return not self.output.is_empty()
+    
+    def evaluate(self, mpc, last_output: ctype):
+        BatchNormalization[ctype]._evaluate(mpc, self, last_output)
+    
+    def derive(self, mpc, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        return BatchNormalization[ctype]._derive(mpc, self, prev_output, dhidden, LAYER_IDX=LAYER_IDX)
+    
+    def update(self, mpc, step: float, momentum: float):
+        BatchNormalization[ctype]._nesterov_update(mpc, self, step, momentum)
+    
+    @sequre
+    def _nesterov_update(mpc, layer: BatchNormalization, step: float, momentum: float):
+        # Update gamma
+        vg_prev = layer.vgamma.copy()
+        layer.vgamma = layer.vgamma * momentum - layer.dgamma * step
+        layer.gamma += layer.vgamma * (momentum + 1) - vg_prev * momentum
+        
+        # Update beta
+        vb_prev = layer.vbeta.copy()
+        layer.vbeta = layer.vbeta * momentum - layer.dbeta * step
+        layer.beta += layer.vbeta * (momentum + 1) - vb_prev * momentum
+    
+    @sequre
+    def _evaluate(mpc, layer: BatchNormalization, last_output: ctype):
+        """Forward pass: normalize and scale."""
+        if layer.training:
+            # Compute batch statistics
+            batch_mean = last_output.mean(axis=0).expand_dims(axis=0)
+            batch_var = ((last_output - batch_mean) ** 2).mean(axis=0).expand_dims(axis=0)
+            
+            # Update running statistics
+            layer.running_mean = layer.momentum * layer.running_mean + (1 - layer.momentum) * batch_mean
+            layer.running_var = layer.momentum * layer.running_var + (1 - layer.momentum) * batch_var
+            
+            # Normalize
+            layer.std = (batch_var + layer.epsilon) ** 0.5
+            layer.input_normalized = (last_output - batch_mean) / layer.std
+        else:
+            # Use running statistics for inference
+            layer.input_normalized = (last_output - layer.running_mean) / ((layer.running_var + layer.epsilon) ** 0.5)
+        
+        # Scale and shift
+        layer.output = layer.gamma * layer.input_normalized + layer.beta
+    
+    @sequre
+    def _derive(mpc, layer: BatchNormalization, prev_output: ctype, dhidden: ctype, LAYER_IDX: Static[int]) -> ctype:
+        """Backward pass: compute gradients."""
+        batch_size = float(prev_output.shape[0])
+        
+        # Gradient w.r.t. gamma and beta
+        layer.dgamma = (dhidden * layer.input_normalized).sum(axis=0).expand_dims(axis=0)
+        layer.dbeta = dhidden.sum(axis=0).expand_dims(axis=0)
+        
+        if LAYER_IDX == 1:
+            return layer.output
+        
+        # Gradient w.r.t. input
+        dinput_normalized = dhidden * layer.gamma
+        
+        # Gradient w.r.t. variance
+        dvar = ((dinput_normalized * (prev_output - layer.running_mean)) * (-0.5) * (layer.std ** -3)).sum(axis=0).expand_dims(axis=0)
+        
+        # Gradient w.r.t. mean
+        dmean = (dinput_normalized * (-1.0 / layer.std)).sum(axis=0).expand_dims(axis=0) + dvar * ((-2.0 / batch_size) * (prev_output - layer.running_mean)).sum(axis=0).expand_dims(axis=0)
+        
+        # Gradient w.r.t. input
+        dprev = dinput_normalized / layer.std + dvar * (2.0 / batch_size) * (prev_output - layer.running_mean) + dmean / batch_size
+        
+        return dprev
diff --git a/stdlib/sequre/stdlib/learn/neural_net/loss.codon b/stdlib/sequre/stdlib/learn/neural_net/loss.codon
index 04ce51c6..9ceb64f4 100644
--- a/stdlib/sequre/stdlib/learn/neural_net/loss.codon
+++ b/stdlib/sequre/stdlib/learn/neural_net/loss.codon
@@ -1,7 +1,15 @@
 from sequre import sequre
-from sequre.constants import SUPPORTED_LOSSES, HINGE_LOSS
+from sequre.types.sharetensor import Sharetensor
 
+from sequre.constants import (
+    SUPPORTED_LOSSES, HINGE_LOSS,
+    BINARY_CROSSENTROPY_LOSS,
+    CATEGORICAL_CROSSENTROPY_LOSS,
+    MEAN_SQUARED_ERROR_LOSS)
+from sequre.stdlib.builtin import chebyshev_log, chebyshev_mul_inv, clip
 
+
+# --- Hinge ---
 @sequre
 def hinge_loss(mpc, y, output):
     hinge = 1 - y * output
@@ -13,23 +21,98 @@ def dhinge_loss(mpc, y, output):
     return (-y * ((1 - y * output) > 0).astype(type(y._internal_type)))
 
 
+# -- Binary cross-entropy ---
+#  Uses Chebyshev polynomial approximation of log over (eps, 1-eps).
+@sequre
+def binary_crossentropy_loss(mpc, y, output, interval):
+    eps = interval[0]
+    p = clip(mpc, output, eps, 1.0 - eps)
+    log_p = chebyshev_log(mpc, p, interval)
+    log_1mp = chebyshev_log(mpc, 1 - p + eps, interval)
+    return -(y * log_p + (1 - y) * log_1mp)
+
+
+@sequre
+def dbinary_crossentropy_loss(mpc, y, output, interval):
+    eps = interval[0]
+    p = clip(mpc, output, eps, 1.0 - eps)
+    num = p - y
+    denom = p * (1 - p + eps)
+    inv_denom = chebyshev_mul_inv(mpc, denom, interval)
+    return num * inv_denom
+
+
+# --- Categorical cross-entropy ---
+#  Labels y: one-hot, predictions p: softmax probabilities in (0, 1)
+@sequre
+def categorical_crossentropy_loss(mpc, y, output, interval):
+    eps = interval[0]
+    p = clip(mpc, output, eps, 1.0 - eps)
+    log_p = chebyshev_log(mpc, p, interval)
+    return -(y * log_p)
+
+
+@sequre
+def dcategorical_crossentropy_loss(mpc, y, output, interval):
+    eps = interval[0]
+    p = clip(mpc, output, eps, 1.0 - eps)
+    inv_p = chebyshev_mul_inv(mpc, p, interval)
+    return -(y * inv_p)
+
+
+# --- Mean squared error ---
+#  L = (y - output)^2 / 2
 @sequre
-def loss(mpc, y, output, loss: str):
-    assert loss in SUPPORTED_LOSSES, f"Neural net: loss {loss} is not supported"
-    
-    if loss == HINGE_LOSS:
+def mean_squared_error_loss(mpc, y, output):
+    diff = y - output
+    return diff * diff / 2
+
+
+@sequre
+def dmean_squared_error_loss(mpc, y, output):
+    return output - y
+
+
+# --- Dispatch functions -
+BCE_INTERVAL = (0.001, 1.0)
+CCE_INTERVAL = (0.001, 1.0)
+
+
+@sequre
+def loss(mpc, y, output, loss_name: str, interval=None, method: Static[str] = "newton", degree: Static[int] = 3):
+    assert loss_name in SUPPORTED_LOSSES, f"Neural net: loss {loss_name} is not supported"
+
+    if loss_name == HINGE_LOSS:
+        _loss = hinge_loss(mpc, y, output)
+    elif loss_name == BINARY_CROSSENTROPY_LOSS:
+        _interval = interval if interval is not None else BCE_INTERVAL
+        _loss = binary_crossentropy_loss(mpc, y, output, _interval)
+    elif loss_name == CATEGORICAL_CROSSENTROPY_LOSS:
+        _interval = interval if interval is not None else CCE_INTERVAL
+        _loss = categorical_crossentropy_loss(mpc, y, output, _interval)
+    elif loss_name == MEAN_SQUARED_ERROR_LOSS:
+        _loss = mean_squared_error_loss(mpc, y, output)
+    else:
         _loss = hinge_loss(mpc, y, output)
 
-    # Pre-normalize for downstream computation
     return _loss / len(y)
 
 
 @sequre
-def dloss(mpc, y, output, loss: str):
-    assert loss in SUPPORTED_LOSSES, f"Neural net: loss {loss} is not supported"
-    
-    if loss == HINGE_LOSS:
+def dloss(mpc, y, output, loss_name: str, interval=None):
+    assert loss_name in SUPPORTED_LOSSES, f"Neural net: loss {loss_name} is not supported"
+
+    if loss_name == HINGE_LOSS:
+        _dloss = dhinge_loss(mpc, y, output)
+    elif loss_name == BINARY_CROSSENTROPY_LOSS:
+        _interval = interval if interval is not None else BCE_INTERVAL
+        _dloss = dbinary_crossentropy_loss(mpc, y, output, _interval)
+    elif loss_name == CATEGORICAL_CROSSENTROPY_LOSS:
+        _interval = interval if interval is not None else CCE_INTERVAL
+        _dloss = dcategorical_crossentropy_loss(mpc, y, output, _interval)
+    elif loss_name == MEAN_SQUARED_ERROR_LOSS:
+        _dloss = dmean_squared_error_loss(mpc, y, output)
+    else:
         _dloss = dhinge_loss(mpc, y, output)
 
-    # Pre-normalize for downstream computation
     return _dloss / len(y)
diff --git a/stdlib/sequre/stdlib/learn/neural_net/model.codon b/stdlib/sequre/stdlib/learn/neural_net/model.codon
index 8e174adf..c221e815 100644
--- a/stdlib/sequre/stdlib/learn/neural_net/model.codon
+++ b/stdlib/sequre/stdlib/learn/neural_net/model.codon
@@ -1,5 +1,5 @@
 from sequre.constants import BGD_OPTIMIZER, MBGD_OPTIMIZER, SUPPORTED_OPTIMIZERS, SUPPORTED_LOSSES
-from loss import loss, dloss
+from .loss import loss, dloss
 from ..utils import batch
 
 
@@ -19,8 +19,24 @@ class Sequential[L]:
         self.optimizer = optimizer
 
         for i in staticrange(1, staticlen(self.layers)):  # Skip input layer
+            prev_layer = self.layers[i - 1]
+            curr_layer = self.layers[i]
+            
+            # Determine prev_size based on layer types
+            # If current layer is CNN (has channels property) and previous is Input,
+            # assume 1 channel for grayscale images
+            if hasattr(curr_layer, 'channels') and hasattr(prev_layer, 'size') and not hasattr(prev_layer, 'out_channels'):
+                # Current is CNN layer, prev is Input layer -> use 1 channel
+                prev_size = 1
+            elif hasattr(prev_layer, 'channels'):
+                # Previous layer is CNN -> use its channel count
+                prev_size = prev_layer.channels
+            else:
+                # Previous layer is Dense or Input for Dense -> use size
+                prev_size = prev_layer.size
+                
             self.layers[i].initialize(
-                mpc, self.layers[i - 1].size, *args, **kwargs)
+                mpc, prev_size, *args, **kwargs)
         
         return self
     
@@ -45,6 +61,7 @@ class Sequential[L]:
         assert self.layers[-1].is_evaluated(), "Sequential neural net: cannot calculate training score. Forward pass was never done."
         return self._loss(mpc, y, self.layers[-1].output)
     
+    # --- Gradient Descent ---
     def _gd(self, mpc, X, y, step: float, momentum: float, verbose: bool):
         self._forward(mpc, X)
         self._backward(mpc, y)
@@ -53,12 +70,14 @@ class Sequential[L]:
         if verbose:
             print(f"CP{mpc.pid}:\t\tTraining loss: {self.loss_(mpc, y).reveal(mpc).sum()}")
     
+    # --- Batch Gradient Descent ---
     def _bgd(self, mpc, X, y, step: float, epochs: int, momentum: float, verbose: bool):
         for _ in range(epochs):
             if verbose:
                 print(f"CP{mpc.pid}:\tSequential neural net: BGD epoch {_ + 1}/{epochs}")
             self._gd(mpc, X, y, step, momentum, verbose)
-            
+
+    # --- Mini-Batch Gradient Descent --- 
     def _mbgd(self, mpc, X, y, step: float, epochs: int, momentum: float, batch_size: int, verbose: bool):
         batches = (len(X) + batch_size - 1) // batch_size
         
diff --git a/tests/e2e_tests/test_neural_net.codon b/tests/e2e_tests/test_neural_net.codon
index c86d1d19..43dcf3a0 100644
--- a/tests/e2e_tests/test_neural_net.codon
+++ b/tests/e2e_tests/test_neural_net.codon
@@ -1,3 +1,11 @@
+from numpy.create import array, zeros, zeros_like, ones
+from sequre.types.sharetensor import Sharetensor
+from sequre.types.multiparty_union import MPU
+from sequre.utils.testing import assert_eq, assert_eq_approx
+from sequre.utils.utils import random_floats, random_ints
+from sequre.mpc.env import MPCEnv
+from sequre.stdlib.learn.neural_net.model import Sequential
+from sequre.stdlib.learn.neural_net.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten
 from sequre.stdlib.learn.neural_net.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten
 from sequre.stdlib.learn.neural_net.loss import dloss
 from sequre import sequre
@@ -12,6 +20,7 @@ def _test_maxpool2d_secure[TP](mpc: MPCEnv[TP], modulus: TP):
     print(f'CP{mpc.pid}:\tTesting MaxPool2D component on {alg_struct} ... \n')
     ndarray_4d = type(zeros((1, 1, 1, 1)))
 
+<<<<<<< HEAD
     # Input shape: (batch = 1, Height = 4, Width = 4, Channels = 1)
     with mpc.randomness.seed_switch(-1):
         X = zeros((1, 4, 4, 1))
@@ -40,6 +49,17 @@ def _test_maxpool2d_secure[TP](mpc: MPCEnv[TP], modulus: TP):
     print(f'CP{mpc.pid}:\tMaxPool2D component on {alg_struct} passed.\n')
 
 # --- CNN end-to-end test ---
+=======
+@sequre
+def _sequre_matmul(mpc, a, b):
+    return a @ b
+
+@sequre
+def _sequre_maximum(mpc, a, b):
+    return maximum(mpc, a, b)
+
+<<<<<<< HEAD
+>>>>>>> 4382845c (Add Component Tests)
 def _test_neural_net_cnn[TP](mpc: MPCEnv[TP], modulus: TP):
     """
     End-to-end CNN training test.
@@ -54,10 +74,25 @@ def _test_neural_net_cnn[TP](mpc: MPCEnv[TP], modulus: TP):
     step = 0.01
     momentum = 0.9
     epochs = 2
+=======
+
+# --- CNN component tests ---
+def _test_conv2d[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    Test Conv2D layer forward pass.
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting Conv2D component on {alg_struct} ... \n')
+>>>>>>> 07dcb240 (Refactor NN tests: separate vanilla and CNN tests; fix Conv2D test to use layer logic.)
 
     ndarray_4d = type(zeros((1, 1, 1, 1)))
-    ndarray_2d = type(zeros((1, 1)))
+    N, H, W, C_in = 2, 4, 4, 1      # No of Channels, Height, Width, Channels in input
+    n_filters = 2                   # No of Filters
+    kH, kW = 3, 3                   # Kernel Height, Kernel Width
+    out_H = (H - kH) + 1            # Output Height
+    out_W = (W - kW) + 1            # Output Width
 
+<<<<<<< HEAD
     kH, kW = 3, 3
     conv_out_H = (H - kH) + 1
     conv_out_W = (W - kW) + 1
@@ -116,11 +151,153 @@ def _test_neural_net_cnn[TP](mpc: MPCEnv[TP], modulus: TP):
     assert_eq(f"CP{mpc.pid}:\tCNN Flatten output shape on {alg_struct}", list(flat.output.shape), [N, flat_size])
     assert_eq(f"CP{mpc.pid}:\tCNN Dense output shape on {alg_struct}", list(dense.output.shape), [N, dense_out])
     assert w0_sum != w1_sum, f"CP{mpc.pid}: Conv2D weights unchanged after training"
+<<<<<<< HEAD
+=======
+
+    # Loss must decrease after training
+>>>>>>> 4382845c (Add Component Tests)
     assert loss_last < loss_first, f"CP{mpc.pid}: CNN loss did not drop (first={loss_first}, last={loss_last})"
     print(f'CP{mpc.pid}:\tCNN loss dropped: {loss_first:.4f} -> {loss_last:.4f}')
     print(f'CP{mpc.pid}:\tSequre CNN layers on {alg_struct} tests passed.\n')
 
+def _test_conv2d[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    Convolution step on a single 2D matrix
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting Conv2D 2D matmul component on {alg_struct} ... \n')
+
+    # A small 2D "patch" (4 samples x 3 input channels)
+=======
+    with mpc.randomness.seed_switch(-1):
+        X = zeros((N, H, W, C_in))
+        for i in range(N):
+            for r in range(H):
+                for c in range(W):
+                    X[i, r, c, 0] = float(i * H * W + r * W + c + 1)
+
+    conv = Conv2D[ndarray_4d]("linear", out_channels=n_filters, kernel_size=(kH, kW), stride=(1, 1))
+    conv.initialize(mpc, C_in)
+    conv.evaluate(mpc, X)
+
+    # Verify output shape
+    assert_eq(f"CP{mpc.pid}:\tConv2D layer output shape on {alg_struct}",
+              list(conv.output.shape), [N, out_H, out_W, n_filters])
 
+    # Verify output is non-trivial (not all zeros)
+    out_sum = conv.output.reshape((conv.output.size,)).sum()
+    assert out_sum != 0.0, f"CP{mpc.pid}: Conv2D output is all zeros"
+
+    n_rows = N * out_H * out_W
+    first = True
+    result = zeros((1, 1))
+    for i in range(kH):
+        for j in range(kW):
+            r_end = i + out_H
+            c_end = j + out_W
+            patch = X[:, i:r_end, j:c_end, :]
+            patch_flat = patch.reshape((n_rows, C_in))
+            w_ij = conv.weights[i:i+1, j:j+1, :, :].reshape((C_in, n_filters))
+            partial = patch_flat @ w_ij
+            if first:
+                result = partial
+                first = False
+            else:
+                result = result + partial
+    expected = result.reshape((N, out_H, out_W, n_filters)) + conv.bias
+    assert_eq_approx(f"CP{mpc.pid}:\tConv2D layer output values on {alg_struct}",
+                     conv.output, expected, error=0.001)
+
+>>>>>>> 07dcb240 (Refactor NN tests: separate vanilla and CNN tests; fix Conv2D test to use layer logic.)
+    with mpc.randomness.seed_switch(-1):
+        patch_2d = array([[0.5, 0.3, 0.1],
+                          [0.2, 0.8, 0.4],
+                          [0.9, 0.1, 0.6],
+                          [0.3, 0.7, 0.2]])
+        weights_2d = array([[0.1, -0.2],
+                            [0.3,  0.4],
+                            [-0.1, 0.5]])
+    raw_matmul = patch_2d @ weights_2d
+
+    mpc_patch = Sharetensor.enc(mpc, patch_2d, 0, modulus)
+    mpc_weights = Sharetensor.enc(mpc, weights_2d, 0, modulus)
+    mpc_result = _sequre_matmul(mpc, mpc_patch, mpc_weights)
+
+    if mpc.pid == 0:
+        assert_eq(f"CP{mpc.pid}:\tConv2D secure matmul shape on {alg_struct}",
+                  mpc_result.shape, list(raw_matmul.shape))
+    else:
+        assert_eq_approx(f"CP{mpc.pid}:\tConv2D secure matmul on {alg_struct}",
+                         mpc_result.reveal(mpc), raw_matmul, error=0.1)
+
+    print(f'CP{mpc.pid}:\tConv2D component on {alg_struct} passed.\n')
+
+
+def _test_maxpool2d[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    Test MaxPooling2D layer forward pass.
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting MaxPool2D component on {alg_struct} ... \n')
+
+    ndarray_4d = type(zeros((1, 1, 1, 1)))
+    N, H, W, C = 1, 4, 4, 1
+    with mpc.randomness.seed_switch(-1):
+        X = zeros((N, H, W, C))
+        vals = [1.0, 3.0, 2.0, 4.0,
+                5.0, 2.0, 8.0, 1.0,
+                3.0, 7.0, 6.0, 0.0,
+                9.0, 4.0, 2.0, 5.0]
+        for r in range(H):
+            for c in range(W):
+                X[0, r, c, 0] = vals[r * W + c]
+
+    pool = MaxPooling2D[ndarray_4d](pool_size=(2, 2), strides=(2, 2))
+
+    # Run actual MaxPooling2D forward pass
+    pool.evaluate(mpc, X)
+
+    # Verify output shape: (1, 2, 2, 1)
+    assert_eq(f"CP{mpc.pid}:\tMaxPool2D layer output shape on {alg_struct}",
+              list(pool.output.shape), [N, 2, 2, C])
+
+    # Verify output values match expected max-pool results
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,0,0] on {alg_struct}", pool.output[0, 0, 0, 0], 5.0, error=0.001)
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,0,1] on {alg_struct}", pool.output[0, 0, 1, 0], 8.0, error=0.001)
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,1,0] on {alg_struct}", pool.output[0, 1, 0, 0], 9.0, error=0.001)
+    assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D [0,1,1] on {alg_struct}", pool.output[0, 1, 1, 0], 6.0, error=0.001)
+
+    raw_expected = array([[5.0, 8.0],
+                          [9.0, 6.0]])
+    mat = array([[1.0, 3.0, 2.0, 4.0],
+                 [5.0, 2.0, 8.0, 1.0],
+                 [3.0, 7.0, 6.0, 0.0],
+                 [9.0, 4.0, 2.0, 5.0]])
+
+    mpc_mat = Sharetensor.enc(mpc, mat, 0, modulus)
+    pH, pW = 2, 2
+    out_H, out_W = 2, 2
+    mpc_out_rows = list[list[float]]()
+    for oh in range(out_H):
+        row = list[float]()
+        for ow in range(out_W):
+            h_s = oh * pH
+            w_s = ow * pW
+            max_val = mpc_mat[h_s][w_s]
+            for i in range(pH):
+                for j in range(pW):
+                    max_val = _sequre_maximum(mpc, max_val, mpc_mat[h_s + i][w_s + j])
+            row.append(max_val.reveal(mpc))
+        mpc_out_rows.append(row)
+    mpc_out = array(mpc_out_rows)
+
+    if mpc.pid > 0:
+        assert_eq_approx(f"CP{mpc.pid}:\tMaxPool2D secure on {alg_struct}",
+                         mpc_out, raw_expected, error=0.1)
+
+    print(f'CP{mpc.pid}:\tMaxPool2D component on {alg_struct} passed.\n')
+
+<<<<<<< HEAD
 def _test_neural_net_cnn_all[TP](mpc: MPCEnv[TP], modulus: TP):
     """
     Run CNN tests: secure 2D maxpool component + end-to-end CNN workflow.
@@ -129,6 +306,98 @@ def _test_neural_net_cnn_all[TP](mpc: MPCEnv[TP], modulus: TP):
     print(f'CP{mpc.pid}:\tTesting Sequre CNN on {alg_struct} ... \n')
 
     _test_maxpool2d_secure(mpc, modulus)
+=======
+def _test_neural_net_cnn[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    End-to-end CNN training test.
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting Sequre CNN layers on {alg_struct} ... \n')
+
+    N = 4
+    H, W, C_in = 8, 8, 1
+    n_filters = 4
+    dense_out = 2
+    step = 0.01
+    momentum = 0.9
+    epochs = 2
+
+    ndarray_4d = type(zeros((1, 1, 1, 1)))
+    ndarray_2d = type(zeros((1, 1)))
+
+    kH, kW = 3, 3
+    conv_out_H = (H - kH) + 1
+    conv_out_W = (W - kW) + 1
+    pool_size = (2, 2)
+    pool_stride = (2, 2)
+    pool_out_H = (conv_out_H - pool_size[0]) // pool_stride[0] + 1
+    pool_out_W = (conv_out_W - pool_size[1]) // pool_stride[1] + 1
+    flat_size = pool_out_H * pool_out_W * n_filters
+
+    with mpc.randomness.seed_switch(-1):
+        X = zeros((N, H, W, C_in))
+        for i in range(N):
+            for j in range(min(H, W)):
+                X[i, j, j, 0] = float(i + 1)
+    y = ones((N, dense_out))
+    for i in range(N // 2):
+        y[i, 0] = -1.0
+
+    inp = Input[ndarray_4d](H * W * C_in)
+    conv = Conv2D[ndarray_4d]("relu", out_channels=n_filters, kernel_size=(kH, kW), stride=(1, 1))
+    conv.initialize(mpc, C_in)
+    pool = MaxPooling2D[ndarray_4d](pool_size=pool_size, strides=pool_stride)
+    flat = Flatten[ndarray_4d, ndarray_2d]()
+    dense = Dense[ndarray_2d]("linear", size=dense_out)
+    dense.initialize(mpc, flat_size)
+
+    w0_sum = conv.weights.reshape((conv.weights.size,)).sum()
+
+    loss_first = 0.0
+    loss_last = 0.0
+    for ep in range(epochs):
+        inp.evaluate(mpc, X)
+        conv.evaluate(mpc, inp.output)
+        pool.evaluate(mpc, conv.output)
+        flat.evaluate(mpc, pool.output)
+        dense.evaluate(mpc, flat.output)
+
+        diff = y - dense.output
+        epoch_loss = (diff * diff).sum() / (2.0 * N)
+        if ep == 0:
+            loss_first = epoch_loss
+        loss_last = epoch_loss
+
+        g = dloss(mpc, y, dense.output, "mean_squared_error")
+        g = dense.derive(mpc, flat.output, g, LAYER_IDX=5)
+        g = flat.derive(mpc, pool.output, g, LAYER_IDX=4)
+        g = pool.derive(mpc, conv.output, g, LAYER_IDX=3)
+        conv.derive(mpc, inp.output, g, LAYER_IDX=1)
+        conv.update(mpc, step, momentum)
+        dense.update(mpc, step, momentum)
+
+    w1_sum = conv.weights.reshape((conv.weights.size,)).sum()
+
+    assert_eq(f"CP{mpc.pid}:\tCNN Conv2D output shape on {alg_struct}", list(conv.output.shape), [N, conv_out_H, conv_out_W, n_filters])
+    assert_eq(f"CP{mpc.pid}:\tCNN MaxPool2D output shape on {alg_struct}", list(pool.output.shape), [N, pool_out_H, pool_out_W, n_filters])
+    assert_eq(f"CP{mpc.pid}:\tCNN Flatten output shape on {alg_struct}", list(flat.output.shape), [N, flat_size])
+    assert_eq(f"CP{mpc.pid}:\tCNN Dense output shape on {alg_struct}", list(dense.output.shape), [N, dense_out])
+    assert w0_sum != w1_sum, f"CP{mpc.pid}: Conv2D weights unchanged after training"
+    assert loss_last < loss_first, f"CP{mpc.pid}: CNN loss did not drop (first={loss_first}, last={loss_last})"
+    print(f'CP{mpc.pid}:\tCNN loss dropped: {loss_first:.4f} -> {loss_last:.4f}')
+    print(f'CP{mpc.pid}:\tSequre CNN layers on {alg_struct} tests passed.\n')
+
+
+def _test_neural_net_cnn_all[TP](mpc: MPCEnv[TP], modulus: TP):
+    """
+    Run CNN tests: secure 2D maxpool component + end-to-end CNN workflow.
+    """
+    alg_struct = "ring" if modulus.popcnt() == 1 else "field"
+    print(f'CP{mpc.pid}:\tTesting Sequre CNN on {alg_struct} ... \n')
+
+    _test_conv2d(mpc, modulus)
+    _test_maxpool2d(mpc, modulus)
+>>>>>>> 07dcb240 (Refactor NN tests: separate vanilla and CNN tests; fix Conv2D test to use layer logic.)
     _test_neural_net_cnn(mpc, modulus)
 
     print(f'CP{mpc.pid}:\tSequre CNN on {alg_struct} tests passed.\n')
@@ -138,6 +407,7 @@ def _test_neural_net_cnn_all[TP](mpc: MPCEnv[TP], modulus: TP):
 def _test_neural_net_vanilla[TP](mpc: MPCEnv[TP], modulus: TP):
     alg_struct = "ring" if modulus.popcnt() == 1 else "field"
     print(f'CP{mpc.pid}:\tTesting Sequre vanilla neural net on {alg_struct} ... \n')
+
     input_size = 16
     output_size = 1
     n_neurons = 32
@@ -190,7 +460,17 @@ def _test_neural_net_vanilla[TP](mpc: MPCEnv[TP], modulus: TP):
         assert_eq(f"CP{mpc.pid}:\tSafe-checking neural net result shape (MPC) on {alg_struct}", mpc_w.shape, list(raw_w.shape))
     else:
         assert_eq_approx(f"CP{mpc.pid}:\tSequre neural net (MPC) on {alg_struct}", mpc_w.reveal(mpc), raw_w, error=0.3)
+<<<<<<< HEAD
+<<<<<<< HEAD
+=======
     
+    _test_neural_net_cnn(mpc, modulus)
+    _test_conv2d(mpc, modulus)
+    _test_maxpool2d(mpc, modulus)
+>>>>>>> 4382845c (Add Component Tests)
+=======
+>>>>>>> 07dcb240 (Refactor NN tests: separate vanilla and CNN tests; fix Conv2D test to use layer logic.)
+
     mpu_layers = (
         Input[type(mpu_X)](input_size),
         Dense[type(mpu_X)]("relu", n_neurons, "normal", "zeros"),
@@ -206,5 +486,13 @@ def _test_neural_net_vanilla[TP](mpc: MPCEnv[TP], modulus: TP):
     else:
         assert_eq_approx(f"CP{mpc.pid}:\tSequre neural net (MPU) on {alg_struct}", mpu_w.reveal(mpc), raw_w)
 
-    print(f'CP{mpc.pid}:\tSequre neural net on {alg_struct} tests passed.\n')
-    
\ No newline at end of file
+    print(f'CP{mpc.pid}:\tSequre vanilla neural net on {alg_struct} tests passed.\n')
+
+<<<<<<< HEAD
+=======
+
+>>>>>>> 07dcb240 (Refactor NN tests: separate vanilla and CNN tests; fix Conv2D test to use layer logic.)
+# --- Public entry point ---
+def test_neural_net[TP](mpc: MPCEnv[TP], modulus: TP):
+    _test_neural_net_cnn_all(mpc, modulus)
+    _test_neural_net_vanilla(mpc, modulus)