mnemom · alexgarden-mnemom · May 8, 2026 · May 8, 2026
@@ -18,14 +18,21 @@ module.exports = {
       health: '/health',
     },
     prover: {
-      base: process.env.E2E_PROVER_URL || 'https://mnemom--mnemom-prover-prover-service.modal.run',
+      // Probe the CPU-only health sentinel (Modal function:
+      // `health_sentinel`), NOT the H100 `prover_service`. The sentinel
+      // is the readiness boundary: it answers from a CPU container
+      // (scaledown_window=300s) by querying Postgres for proofs stuck
+      // in pending/proving > 10 min. Cheap, no GPU cold-start to pay.
+      //
+      // Probing prover_service directly forced an H100 cold-start
+      // (60-180s for SP1 init) on every quiet probe, which busted this
+      // gate's 60s timeout and blocked every other repo's production
+      // deploy. See mnemom-prover PR #40 (May 6, 2026 incident).
+      base: process.env.E2E_PROVER_URL || 'https://mnemom--mnemom-prover-health-sentinel.modal.run',
       health: '/health',
-      // Modal scales the prover to zero when idle; the first request after
-      // a quiet period triggers a ~30s cold boot that hits exactly the
-      // default timeout. Bump to 60s so a cold prover doesn't fail e2e
-      // and block production approval. Trade-off accepted: a real prover
-      // outage now takes 60s to surface instead of 30s.
-      timeout: 60000,
+      // CPU cold-start is sub-5s; 10s is generous while still surfacing
+      // a real prover outage well before the 60s deploy-gate budget.
+      timeout: 10000,
     },
     website: {
       base: process.env.E2E_WEBSITE_URL || 'https://mnemom-staging.netlify.app',