chihacknight · sartaj · Feb 21, 2026 · Feb 21, 2026 · May 13, 2026
diff --git a/.github/workflows/synthetic-test.yml b/.github/workflows/synthetic-test.yml
@@ -118,6 +118,18 @@ jobs:
           echo "=== Build output ==="
           ls -la /tmp/govbot-test/docs/
 
+      - name: Run demo tapes in prod mode
+        if: github.event_name != 'pull_request'
+        run: |
+          cd actions/govbot
+          export TEST_MODE=prod
+          for tape in tapes/*.tape; do
+            [ -f "$tape" ] || continue
+            echo "::group::Recording $tape (prod mode)"
+            vhs "$tape" || true
+            echo "::endgroup::"
+          done
+
       - name: Upload GIF to synthetic-test release
         if: always()
         env:
@@ -175,7 +187,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: synthetic-test-recording
-          path: /tmp/govbot-synthetic-test.gif
+          path: |
+            /tmp/govbot-synthetic-test.gif
+            actions/govbot/tapes/*.gif
           retention-days: 7
 
       - name: Upload diagnostics

diff --git a/.github/workflows/terminal-screenshots.yml b/.github/workflows/terminal-screenshots.yml
@@ -3,15 +3,16 @@ name: Terminal Screenshots
 on:
   pull_request:
     paths:
-      - "actions/govbot/src/**"
       - "actions/govbot/tapes/*.tape"
+      - "actions/govbot/tapes/expected/**"
+      - "actions/govbot/src/**"
       - "actions/govbot/Cargo.toml"
       - "actions/govbot/Cargo.lock"
       - ".github/workflows/terminal-screenshots.yml"
 
 jobs:
   terminal-screenshots:
-    name: Generate Terminal GIFs
+    name: Generate Terminal GIFs (Mock Mode)
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -59,19 +60,41 @@ jobs:
           rm vhs.deb
           vhs --version
 
-      - name: Record terminal GIFs
+      - name: Record demo tapes (mock mode with snapshot assertions)
         run: |
           cd actions/govbot
+          export TEST_MODE=mock
+          FAILED=0
           for tape in tapes/*.tape; do
             [ -f "$tape" ] || continue
             echo "::group::Recording $tape"
-            vhs "$tape"
-            echo "::endgroup::"
+            if vhs "$tape"; then
+              echo "::endgroup::"
+            else
+              echo "::endgroup::"
+              echo "::error::Tape failed: $tape"
+              FAILED=1
+            fi
           done
+          if [ "$FAILED" -eq 1 ]; then
+            echo "::error::One or more tapes failed snapshot assertions"
+            exit 1
+          fi
+
+      - name: Record synthetic test (mock mode)
+        run: |
+          cd actions/govbot
+          export TEST_MODE=mock
+          export GOVBOT_SRC="$PWD"
+          rm -rf /tmp/govbot-test
+          vhs tapes/nightly/synthetic-test.tape
 
       - name: Upload GIFs as artifacts
+        if: always()
         uses: actions/upload-artifact@v4
         with:
           name: terminal-screenshots
-          path: actions/govbot/tapes/*.gif
+          path: |
+            actions/govbot/tapes/*.gif
+            /tmp/govbot-synthetic-test.gif
           retention-days: 30
diff --git a/.github/workflows/validate-snapshots.yml b/.github/workflows/validate-snapshots.yml
@@ -144,12 +144,88 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-cargo-
 
-
       - name: Run tests
         run: |
           cd actions/govbot
           cargo test
 
+  govbot-mock-schema-validation:
+    name: Validate Govbot Mock Data
+    runs-on: ubuntu-latest
+    needs: detect-changes
+    if: needs.detect-changes.outputs.govbot == 'true'
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install jsonschema
+        run: pip install jsonschema
+
+      - name: Validate govbot mock data against schemas
+        run: |
+          python3 << 'PYEOF'
+          import json
+          import sys
+          from pathlib import Path
+          from jsonschema import validate, ValidationError
+
+          schemas_dir = Path("actions/format/schemas")
+          mocks_dir = Path("actions/govbot/mocks/.govbot/repos")
+
+          # Load schemas
+          metadata_schema = json.load(open(schemas_dir / "metadata.schema.json"))
+          action_log_schema = json.load(open(schemas_dir / "action_log.schema.json"))
+          vote_event_log_schema = json.load(open(schemas_dir / "vote_event_log.schema.json"))
+
+          errors = []
+          validated = 0
+
+          # Validate all metadata.json files in mock repos
+          for metadata_file in mocks_dir.rglob("metadata.json"):
+              # Skip data.json (DCAT descriptor, different schema)
+              if metadata_file.name == "data.json":
+                  continue
+              try:
+                  data = json.load(open(metadata_file))
+                  validate(instance=data, schema=metadata_schema)
+                  print(f"  {metadata_file}")
+                  validated += 1
+              except ValidationError as e:
+                  errors.append(f"{metadata_file}: {e.message}")
+                  print(f"  {metadata_file}: {e.message}")
+
+          # Validate all log files in mock repos
+          for log_file in mocks_dir.rglob("logs/*.json"):
+              try:
+                  data = json.load(open(log_file))
+                  if "action" in data and "bill_id" in data:
+                      validate(instance=data, schema=action_log_schema)
+                  elif "votes" in data and "counts" in data:
+                      validate(instance=data, schema=vote_event_log_schema)
+                  else:
+                      print(f"  {log_file}: unknown log type, skipping")
+                      continue
+                  print(f"  {log_file}")
+                  validated += 1
+              except ValidationError as e:
+                  errors.append(f"{log_file}: {e.message}")
+                  print(f"  {log_file}: {e.message}")
+
+          print(f"\nValidated {validated} file(s)")
+          if errors:
+              print(f"{len(errors)} error(s):")
+              for err in errors:
+                  print(f"  - {err}")
+              sys.exit(1)
+          else:
+              print("All mock data validates against schemas")
+          PYEOF
+
   format-snapshots:
     runs-on: ubuntu-latest
     needs: detect-changes

diff --git a/TESTING.md b/TESTING.md
@@ -0,0 +1,199 @@
+# Testing Strategy
+
+This document describes how testing works across the govbot monorepo.
+
+## Data Lineage & Mock Dependencies
+
+```
+UPSTREAM PIPELINE (produces git repos)
+┌─────────┐    ┌──────────┐    ┌───────────┐    ┌──────────────────┐
+│  scrape  │ →  │  format   │ →  │  extract  │ →  │  git repos       │
+│          │    │           │    │           │    │  (wy-legislation) │
+└─────────┘    └──────────┘    └───────────┘    └──────────────────┘
+     │               │                                    │
+  scrape/            format/snapshots/                    │
+  prod-mocks         (tested via render_snapshot.sh       │
+                      + schema validation)                │
+                                                          │
+═══════════════ GIT REPO STRUCTURE IS THE CONTRACT ═══════════════
+                                                          │
+                        govbot/mocks/.govbot/repos/       │
+                        (captured from real repos          │
+                         via `just mocks`)                 │
+                              │                            │
+                    ┌─────────────────────────────────┐   │
+                    │  govbot clone → logs → tag → build  │
+                    └─────────────────────────────────┘
+
+Mock data sources:
+  • scrape/prod-mocks-*     → captured from real scrape runs → fed to format tests
+  • govbot/mocks/.govbot/   → captured from real published repos → fed to govbot tests
+  • format/snapshots/wy/    → generated by running format on scrape mocks
+```
+
+## Testing Tiers
+
+```
+Tier 3: Nightly Visual E2E      VHS tapes in prod mode (live data, smoke assertions)
+   │                             GIF uploaded to releases for visual review
+   │
+Tier 2: PR Visual Integration   VHS tapes in mock mode (deterministic, snapshot assertions)
+   │                             GIF uploaded as PR artifact
+   │
+Tier 1: Schema Contracts         JSON Schema validation of mock data + format output
+   │                             Catches interface breakage between pipeline stages
+   │
+Tier 0: Unit Tests               Wizard round-trip tests, pure logic (keep in Rust/insta)
+                                  No I/O, no VHS needed
+```
+
+## Dual-Mode VHS Testing
+
+Every VHS tape runs in two modes controlled by environment variables:
+
+| Mode | When | Data Source | Assertion Level |
+|------|------|-------------|-----------------|
+| **Mock** (`TEST_MODE=mock`) | PR (fast) | Pre-populated from `mocks/` | Snapshot diff (byte-exact match against committed expected files) |
+| **Prod** (`TEST_MODE=prod`) | Nightly | Live cloned repos | Smoke test (exit code 0, non-empty output) |
+
+### Environment Variables
+
+- `TEST_MODE` — `mock` or `prod` (default: `prod`)
+- `GOVBOT_DIR` — Path to `.govbot` directory containing repos (default: `mocks/.govbot` for demo tapes)
+
+### How Assertions Work
+
+Each tape defines a short helper function `sk()` (snapshot check) and uses `Wait+Screen`
+to assert VHS sees `SNAP_OK` on screen:
+
+```tape
+# Define assertion helper at tape start
+Type "sk() { if [ ${TEST_MODE:-prod} = mock ]; then diff $1 $2 && echo SNAP_OK || echo SNAP_FAIL; else [ -s $1 ] && echo SNAP_OK || echo SNAP_FAIL; fi; }"
+Enter
+
+# Run command, capture output to temp file, display it
+Type "govbot logs --govbot-dir mocks/.govbot > /tmp/lb.txt 2>&1 && cat /tmp/lb.txt"
+Enter
+Sleep 3s
+
+# Clear screen so assertion output is visible
+Type "clear"
+Enter
+
+# Assert: mock mode diffs against expected file, prod mode checks non-empty
+Type "sk /tmp/lb.txt tapes/expected/logs-basic.txt"
+Enter
+Wait+Screen@5s /SNAP_OK/
+```
+
+## Per-Action Test Inventory
+
+### govbot (Rust CLI)
+
+| Test | Type | Location | Mode |
+|------|------|----------|------|
+| Wizard round-trip | Rust unit test (insta) | `tests/wizard_tests.rs` | `cargo test` |
+| `govbot --help` | VHS tape | `tapes/govbot-help.tape` | mock/prod |
+| `govbot clone --list` | VHS tape | `tapes/govbot-clone-list.tape` | mock/prod |
+| `govbot logs` | VHS tape | `tapes/logs-basic.tape` | mock/prod |
+| Full pipeline E2E | VHS tape | `tapes/nightly/synthetic-test.tape` | mock/prod |
+
+### format (Python)
+
+| Test | Type | Location |
+|------|------|----------|
+| Schema validation | JSON Schema | `validate-snapshots.yml` → format-snapshots job |
+| Snapshot comparison | `render-snapshots.sh` | `validate-snapshots.yml` → format-snapshots job |
+
+### pipeline-manager / report-publisher
+
+| Test | Type | Location |
+|------|------|----------|
+| Snapshot comparison | `render-snapshots.sh` | `validate-snapshots.yml` |
+
+## Running Tests Locally
+
+### Rust unit tests (Tier 0)
+
+```bash
+cd actions/govbot
+just test              # Run all tests
+just test-single wizard_tests  # Run specific test
+just review            # Review snapshot changes (insta)
+```
+
+### VHS demo tapes in mock mode (Tier 2)
+
+```bash
+cd actions/govbot
+
+# Build binary first
+just build-release
+
+# Record all demo tapes
+TEST_MODE=mock just record
+
+# Record a specific tape
+TEST_MODE=mock just record govbot-help
+```
+
+### VHS synthetic test in mock mode (Tier 2)
+
+```bash
+cd actions/govbot
+export PATH="$PWD/target/release:$PATH"
+TEST_MODE=mock GOVBOT_SRC="$PWD" vhs tapes/nightly/synthetic-test.tape
+```
+
+### VHS tapes in prod mode (Tier 3)
+
+```bash
+cd actions/govbot
+export PATH="$PWD/target/release:$PATH"
+# Requires network access — clones real repos
+vhs tapes/nightly/synthetic-test.tape
+```
+
+## Updating Mocks
+
+When upstream data format changes, refresh mock data:
+
+```bash
+cd actions/govbot
+just mocks          # Default: refreshes wy and gu
+just mocks il ny    # Refresh specific states
+```
+
+This clones real repos, prunes to 5 bills / 3 logs per session, and removes `.git` directories.
+
+## Updating Expected Output
+
+When govbot output changes (new fields, formatting changes):
+
+```bash
+cd actions/govbot
+
+# Regenerate expected output files
+govbot --help > tapes/expected/govbot-help.txt 2>&1
+govbot clone --list > tapes/expected/govbot-clone-list.txt 2>&1
+govbot logs --govbot-dir mocks/.govbot > tapes/expected/logs-basic.txt 2>&1
+
+# Verify tapes still pass
+TEST_MODE=mock just record
+```
+
+## CI Workflows
+
+| Workflow | Trigger | What it does |
+|----------|---------|--------------|
+| `validate-snapshots.yml` | Push to main, PR | Rust unit tests + schema validation |
+| `terminal-screenshots.yml` | PR (tape/src/Cargo changes) | VHS demo tapes in mock mode with snapshot assertions |
+| `synthetic-test.yml` | Nightly + PR (tape/src changes) | Full E2E pipeline in prod and/or mock mode |
+
+## Decision Tree: When to Add What Kind of Test
+
+1. **Pure logic with no I/O?** → Rust unit test with insta (`tests/wizard_tests.rs`)
+2. **CLI command with deterministic output?** → VHS demo tape with expected output file
+3. **Full pipeline flow?** → Add to synthetic-test.tape
+4. **Data format contract?** → JSON Schema in `actions/format/schemas/`
+5. **New mock data needed?** → `just mocks <locale>`, then validate schemas pass
diff --git a/actions/govbot/examples/govbot-clone-list.sh b/actions/govbot/examples/govbot-clone-list.sh
diff --git a/actions/govbot/examples/govbot-help.sh b/actions/govbot/examples/govbot-help.sh
diff --git a/actions/govbot/examples/logs-basic.sh b/actions/govbot/examples/logs-basic.sh