Add automated notebook testing with Papermill (#602)

drbenvincent · cursoragent · web-flow · commit f36e9db20a7a · 2026-02-28T10:36:59.000Z
* Add automated notebook testing with Papermill

Introduce CI workflow to validate that docs notebooks execute without errors.

- Add `.github/workflows/test_notebook.yml` running notebooks in 3 parallel
  splits (PyMC, sklearn, other) on Python 3.12
- Add `scripts/run_notebooks/runner.py` for Papermill-based execution with
  nbclient widget output guards and optional `--parallel` flag
- Add `scripts/run_notebooks/injected.py` to mock `pm.sample` with prior
  predictive draws for fast CI execution
- Add `scripts/run_notebooks/skip_notebooks.yml` for notebooks incompatible
  with the CI environment (JAX-dependent IV notebooks)
- Add papermill to test dependencies in pyproject.toml
- Fix sampling bug in iv_pymc.ipynb uncertainty plot
- Remove watermark cell from inv_prop_latent.ipynb

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;

* Fix zizmor security alerts in test_notebook.yml

- Add workflow-level `permissions: {}` and job-level `contents: read`
- Pin actions/checkout and actions/setup-python to SHA digests
- Set `persist-credentials: false` on checkout

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;

---------

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;
diff --git a/.github/workflows/test_notebook.yml b/.github/workflows/test_notebook.yml
@@ -0,0 +1,57 @@
+name: Test Notebooks
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "pyproject.toml"
+      - "causalpy/**"
+      - ".github/workflows/test_notebook.yml"
+      - "scripts/run_notebooks/**"
+      - "docs/source/notebooks/**"
+  push:
+    branches: [main]
+    paths:
+      - "pyproject.toml"
+      - "causalpy/**"
+      - ".github/workflows/test_notebook.yml"
+      - "scripts/run_notebooks/**"
+      - "docs/source/notebooks/**"
+
+permissions: {}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  notebooks:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    timeout-minutes: 60
+    strategy:
+      matrix:
+        split:
+          - "--pattern *_pymc*.ipynb"
+          - "--pattern *_skl*.ipynb"
+          - "--exclude-pattern _pymc --exclude-pattern _skl"
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.12"
+
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y graphviz
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e ".[test,docs]"
+
+      - name: Run notebooks
+        run: python scripts/run_notebooks/runner.py ${{ matrix.split }}
diff --git a/docs/source/notebooks/inv_prop_latent.ipynb b/docs/source/notebooks/inv_prop_latent.ipynb
@@ -4751,49 +4751,6 @@
     ":filter: docname in docnames\n",
     ":::"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Watermark"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Last updated: Tue Jul 29 2025\n",
-      "\n",
-      "Python implementation: CPython\n",
-      "Python version       : 3.13.5\n",
-      "IPython version      : 9.4.0\n",
-      "\n",
-      "pytensor: 2.31.7\n",
-      "xarray  : 2025.7.0\n",
-      "\n",
-      "matplotlib: 3.10.3\n",
-      "arviz     : 0.21.0\n",
-      "pandas    : 2.3.1\n",
-      "causalpy  : 0.4.2\n",
-      "patsy     : 1.0.1\n",
-      "pymc      : 5.23.0\n",
-      "numpy     : 2.3.1\n",
-      "\n",
-      "Watermark: 2.5.0\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p pytensor,xarray"
-   ]
   }
  ],
  "metadata": {
@@ -4812,7 +4769,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.5"
+   "version": "3.14.2"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/notebooks/iv_pymc.ipynb b/docs/source/notebooks/iv_pymc.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,11 +21,11 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "from matplotlib.lines import Line2D\n",
+    "from sklearn.linear_model import LinearRegression as sk_lin_reg\n",
     "\n",
     "import causalpy as cp\n",
     "from causalpy import InstrumentalVariable\n",
-    "from causalpy.pymc_models import InstrumentalVariableRegression\n",
-    "from causalpy.skl_models import LinearRegression as sk_lin_reg"
+    "from causalpy.pymc_models import InstrumentalVariableRegression"
    ]
   },
   {
@@ -861,7 +861,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -918,7 +918,8 @@
     "    Line2D([0], [0], color=\"black\", lw=4),\n",
     "]\n",
     "\n",
-    "uncertainty.sample(500).T.plot(legend=False, color=\"orange\", alpha=0.4, ax=axs[1])\n",
+    "n_samples = min(500, len(uncertainty))\n",
+    "uncertainty.sample(n_samples).T.plot(legend=False, color=\"orange\", alpha=0.4, ax=axs[1])\n",
     "axs[1].plot(x, ols, color=\"black\", label=\"OLS fit\")\n",
     "axs[1].set_title(\"OLS versus Instrumental Regression Fits\", fontsize=20)\n",
     "axs[1].legend(custom_lines, [\"IV fits\", \"OlS fit\"])\n",
diff --git a/pyproject.toml b/pyproject.toml
@@ -93,7 +93,7 @@ docs = [
     "sphinx-togglebutton",
 ]
 lint = ["interrogate", "pre-commit", "ruff", "mypy"]
-test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert"]
+test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert", "papermill"]
 
 [project.urls]
 Homepage = "https://github.com/pymc-labs/CausalPy"
diff --git a/scripts/run_notebooks/README.md b/scripts/run_notebooks/README.md
@@ -0,0 +1,78 @@
+# Notebook Runner
+
+This script runs Jupyter notebooks from `docs/source/notebooks/` to validate they execute without errors.
+
+## How It Works
+
+1. **Mocks `pm.sample()`** — Replaces MCMC sampling with prior predictive (1 chain × 100 draws) for speed
+2. **Uses Papermill** — Executes notebooks programmatically
+3. **Clears saved outputs** — Avoids widget state issues during execution
+4. **Guards widget updates** — Patches nbclient to ignore display_id assertion errors
+5. **Discards outputs** — Only checks for errors, doesn't save results
+
+## Dependencies
+
+The notebook runner mirrors the CI setup and expects a full docs/test environment.
+
+1. **Install Python dependencies**
+
+   ```bash
+   pip install -e ".[test,docs]"
+   ```
+
+   This brings in Papermill, Jupyter, nbclient, and notebook-related dependencies.
+
+2. **Install Graphviz (system dependency)**
+
+   - macOS:
+     ```bash
+     brew install graphviz
+     ```
+   - Ubuntu/Debian:
+     ```bash
+     sudo apt-get update && sudo apt-get install -y graphviz
+     ```
+
+3. **Optional: parallel execution**
+
+   ```bash
+   pip install joblib
+   ```
+
+## Notes
+
+- The runner executes using the `python3` Jupyter kernel. Ensure your environment
+  provides that kernel (e.g., from `ipykernel` installed via the docs extras).
+- The CI workflow uses Python 3.12 and installs the same extras.
+
+## Usage
+
+```bash
+# Run all notebooks
+python scripts/run_notebooks/runner.py
+
+# Run only PyMC notebooks
+python scripts/run_notebooks/runner.py --pattern "*_pymc*.ipynb"
+
+# Run only sklearn notebooks
+python scripts/run_notebooks/runner.py --pattern "*_skl*.ipynb"
+
+# Exclude PyMC and sklearn notebooks (run others)
+python scripts/run_notebooks/runner.py --exclude-pattern _pymc --exclude-pattern _skl
+
+# Run notebooks in parallel (requires joblib)
+python scripts/run_notebooks/runner.py --parallel
+```
+
+## CI Integration
+
+The GitHub Actions workflow (`.github/workflows/test_notebook.yml`) runs this script in parallel:
+- Job 1: PyMC notebooks
+- Job 2: Sklearn notebooks
+- Job 3: Other notebooks
+
+## Files
+
+- `runner.py` — Main script
+- `injected.py` — Code injected into notebooks to mock `pm.sample()`
+- `skip_notebooks.yml` — List of notebooks to skip (incompatible with mock sampling)
diff --git a/scripts/run_notebooks/injected.py b/scripts/run_notebooks/injected.py
@@ -0,0 +1,58 @@
+"""Injected code to mock pm.sample for faster notebook execution."""
+
+import numpy as np
+import pymc as pm
+import xarray as xr
+
+# Minimum draws needed to satisfy notebook code that iterates over posterior samples
+MIN_DRAWS = 100
+
+
+def mock_sample(*args, **kwargs):
+    """Mock pm.sample using prior predictive sampling for speed."""
+    random_seed = kwargs.get("random_seed")
+    model = kwargs.get("model")
+
+    # If no model is provided via kwargs, try to infer it from positional args
+    if model is None and args:
+        first_arg = args[0]
+        if isinstance(first_arg, pm.Model):
+            model = first_arg
+
+    requested_draws = kwargs.get("draws")
+    if requested_draws is None and len(args) > 1 and isinstance(args[1], int):
+        requested_draws = args[1]
+
+    # Ensure enough draws for notebook code while keeping execution fast.
+    n_draws = max(MIN_DRAWS, requested_draws or MIN_DRAWS)
+
+    idata = pm.sample_prior_predictive(
+        model=model,
+        random_seed=random_seed,
+        draws=n_draws,
+    )
+    idata.add_groups(posterior=idata.prior)
+
+    # Create mock sample stats with diverging data
+    if "sample_stats" not in idata:
+        n_chains = 1
+        sample_stats = xr.Dataset(
+            {
+                "diverging": xr.DataArray(
+                    np.zeros((n_chains, n_draws), dtype=int),
+                    dims=("chain", "draw"),
+                )
+            }
+        )
+        idata.add_groups(sample_stats=sample_stats)
+
+    del idata.prior
+    if "prior_predictive" in idata:
+        del idata.prior_predictive
+
+    return idata
+
+
+pm.sample = mock_sample
+pm.HalfFlat = pm.HalfNormal
+pm.Flat = pm.Normal
diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py
diff --git a/scripts/run_notebooks/skip_notebooks.yml b/scripts/run_notebooks/skip_notebooks.yml

Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ docs = [`
`93`	`93`	`"sphinx-togglebutton",`
`94`	`94`	`]`
`95`	`95`	`lint = ["interrogate", "pre-commit", "ruff", "mypy"]`
`96`		`-test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert"]`
	`96`	`+test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert", "papermill"]`
`97`	`97`
`98`	`98`	`[project.urls]`
`99`	`99`	`Homepage = "https://github.com/pymc-labs/CausalPy"`