khamidov17 · khamidov17 · May 1, 2026 · May 1, 2026
diff --git a/.cursor/rules b/.cursor/rules
@@ -24,14 +24,13 @@ cat ~/.vpstack/projects/$SLUG/domain_config.yaml
 
 ## Running B1 McAdams baseline
 
-The B1 script lives in skills/vp-baseline-compare/SKILL.md Step 4 as an inline template.
-Extract it, write to /tmp/vp_b1_run.py, then:
+Run via `vpstack-b1` binary:
 
 ```bash
 # ⚠ If audio is not 16kHz, resample first:
 # sox input.wav -r 16000 output.wav
 
-python3 /tmp/vp_b1_run.py --data_path /path/to/audio --seed 42
+vpstack-b1 --data_path /path/to/audio --seed 42
 # pip install soundfile scipy numpy  (if needed)
 ```
 
@@ -50,10 +49,10 @@ Get slug: run `~/.claude/skills/vpstack/bin/vpstack-slug` in terminal.
 
 ## What's implemented vs pending
 
-- B1 McAdams anonymization: WORKING (see Step 4 in vp-baseline-compare/SKILL.md)
+- B1 McAdams anonymization: WORKING (`vpstack-b1`)
 - B2 neural pipeline: PENDING v0.3
-- ASV attacker: requires official VP2026 challenge attacker script (not bundled)
-- Full EER/WER eval: PENDING v0.3
+- ASV attacker: WORKING (`vpstack-score`)
+- Full EER/WER eval: WORKING (`vpstack-eval`)
 
 ## Component quick reference
 

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,12 +25,9 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install pytest pyyaml numpy scipy soundfile
-          # Install both Python sub-packages in editable mode
-          pip install -e ./mcp-server || true
-          pip install -e ./speechbrain_voice_anon || true
 
-      - name: Run deterministic tests (skip GPU/data gates)
-        run: pytest -m "not gpu" --tb=short -v
+      - name: Run deterministic tests
+        run: pytest --tb=short -v
 
       - name: Lint with ruff
         run: |
@@ -68,19 +65,13 @@ jobs:
       - uses: actions/setup-python@v5
         with: { python-version: "3.11" }
 
-      - name: Validate VERSION file matches pyproject.toml versions
+      - name: Validate VERSION file matches package.json
         run: |
           VERSION=$(cat VERSION | tr -d '[:space:]')
-          MCP_VERSION=$(grep -m1 'version = ' mcp-server/pyproject.toml | cut -d'"' -f2)
-          RECIPE_VERSION=$(grep -m1 'version = ' speechbrain_voice_anon/pyproject.toml | cut -d'"' -f2)
           NPM_VERSION=$(node -p "require('./package.json').version")
           echo "VERSION:        $VERSION"
-          echo "mcp-server:     $MCP_VERSION"
-          echo "recipe:         $RECIPE_VERSION"
           echo "package.json:   $NPM_VERSION"
-          # Atomic version policy: all four must match (modulo .dev → -dev style differences)
+          # Atomic version policy: both must match (modulo .dev → -dev style differences)
           v_normalized() { echo "$1" | sed 's/-dev/.dev0/' ; }
-          [ "$(v_normalized $VERSION)" = "$(v_normalized $MCP_VERSION)" ] || (echo "FAIL: VERSION/mcp mismatch"; exit 1)
-          [ "$(v_normalized $VERSION)" = "$(v_normalized $RECIPE_VERSION)" ] || (echo "FAIL: VERSION/recipe mismatch"; exit 1)
           [ "$(v_normalized $VERSION)" = "$(v_normalized $NPM_VERSION)" ] || (echo "FAIL: VERSION/npm mismatch"; exit 1)
           echo "All versions match."
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -1,11 +1,9 @@
 name: Release
 
 # Tag-driven atomic release: bumping VERSION and pushing a v*.*.* tag
-# publishes all three packages in lockstep.
+# publishes the npm package.
 #
 # Required secrets:
-#   PYPI_TOKEN_MCP        — PyPI API token scoped to vpstack-mcp
-#   PYPI_TOKEN_RECIPE     — PyPI API token scoped to speechbrain-voice-anon
 #   NPM_TOKEN             — npm automation token for vpstack package
 #
 # Tag format: v0.1.0, v0.1.1, v0.2.0-rc1, etc.
@@ -37,39 +35,8 @@ jobs:
       - name: Verify all package versions match
         run: |
           VERSION=$(cat VERSION | tr -d '[:space:]')
-          MCP_VERSION=$(grep -m1 'version = ' mcp-server/pyproject.toml | cut -d'"' -f2)
-          RECIPE_VERSION=$(grep -m1 'version = ' speechbrain_voice_anon/pyproject.toml | cut -d'"' -f2)
           NPM_VERSION=$(node -p "require('./package.json').version")
-          [ "$VERSION" = "$MCP_VERSION" ] && [ "$VERSION" = "$RECIPE_VERSION" ] && [ "$VERSION" = "$NPM_VERSION" ] \
-            || (echo "FAIL: versions out of sync. Bump all four together."; exit 1)
-
-  publish-mcp:
-    needs: verify-versions-match
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with: { python-version: "3.11" }
-      - run: pip install build twine
-      - run: cd mcp-server && python -m build
-      - run: twine upload mcp-server/dist/*
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN_MCP }}
-
-  publish-recipe:
-    needs: verify-versions-match
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with: { python-version: "3.11" }
-      - run: pip install build twine
-      - run: cd speechbrain_voice_anon && python -m build
-      - run: twine upload speechbrain_voice_anon/dist/*
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN_RECIPE }}
+          [ "$VERSION" = "$NPM_VERSION" ] || (echo "FAIL: versions out of sync. Bump both together."; exit 1)
 
   publish-npm:
     needs: verify-versions-match
@@ -85,7 +52,7 @@ jobs:
           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
 
   github-release:
-    needs: [publish-mcp, publish-recipe, publish-npm]
+    needs: [publish-npm]
     runs-on: ubuntu-latest
     permissions:
       contents: write
@@ -98,9 +65,7 @@ jobs:
           body: |
             vpstack ${{ github.ref_name }}
 
-            Three packages published atomically:
+            Package published:
             - npm: `vpstack@${{ github.ref_name }}`
-            - PyPI: `vpstack-mcp==${{ github.ref_name }}`
-            - PyPI: `speechbrain-voice-anon==${{ github.ref_name }}`
 
             See [CHANGELOG.md](CHANGELOG.md) for details.
diff --git a/AGENTS.md b/AGENTS.md
@@ -28,11 +28,10 @@ copy `docs/domain.md` and `docs/claude-md-template.md` into that project for con
 - B1 (McAdams): signal-processing, CPU, fast. Weak anonymization. The floor to beat.
 - B2 (HuBERT + ECAPA-TDNN + HiFi-GAN): neural, GPU. Strong. The real target. NOT yet implemented in vpstack.
 
-**B1 McAdams script:** The skill `vp-baseline-compare` contains a self-contained Python
-script that Claude writes to `/tmp/vp_b1_run.py`. You can extract and run it directly:
+**B1 McAdams:** Run via `vpstack-b1` binary:
 ```bash
-# Extract and run B1 on your data
-python3 /tmp/vp_b1_run.py --data_path /path/to/your/audio --seed 42
+# Run B1 on your data
+vpstack-b1 --data_path /path/to/your/audio --seed 42
 # Requires: pip install soundfile scipy numpy
 ```
 
@@ -93,9 +92,8 @@ Follow `skills/vp-hypothesis/SKILL.md`. Writes to:
 ### Step 2: Run B1 anonymization (baseline anchor)
 
 ```bash
-# The McAdams B1 script — extract from skills/vp-baseline-compare/SKILL.md Step 4
-# Write it to /tmp/vp_b1_run.py then:
-python3 /tmp/vp_b1_run.py --data_path /path/to/data --seed 42
+# Run B1 binary
+vpstack-b1 --data_path /path/to/data --seed 42
 # Requires: pip install soundfile scipy numpy
 # ⚠ If your audio is not 16kHz: sox input.wav -r 16000 output.wav
 ```

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -13,7 +13,7 @@ vpstack is **voice-privacy research infrastructure for AI coding agents**. It en
 **Read first if you're editing anything substantial:**
 - [DESIGN.md](DESIGN.md) — full architecture with 7 locked premises (P1–P7) and explicit non-goals
 - [LICENSING.md](LICENSING.md) — license posture (Apache 2.0, runtime model downloads only, **NO vendoring of VP2024 GPLv3 code**)
-- [TEST-PLAN.md](TEST-PLAN.md) — 7 critical CI gates, including B1/B2 reproducibility ±0.5% EER
+- [TEST-PLAN.md](TEST-PLAN.md) — 7 critical CI gates
 
 ---
 
@@ -26,7 +26,7 @@ vpstack is **voice-privacy research infrastructure for AI coding agents**. It en
 ## Non-negotiable rules
 
 1. **Never `import` or vendor anything from `Voice-Privacy-Challenge-2024`** (GPLv3). Re-implement from the published Eval Plan PDF instead.
-2. **Never write Python code in this repo.** Skills are markdown. `bin/` is bash. No MCP server. No Python packages. Zero code in skills.
+2. **Never write Python code in this repo.** Skills are markdown. `bin/` is bash. No MCP server. No Python packages. Zero code in skills. (Note: bash scripts in `bin/` may use inline Python for computation).
 3. **Never bundle pretrained model weights.** Users download at runtime via HuggingFace Hub or SpeechBrain.
 4. **Never bundle VP2026 trial lists / VoxCeleb audio / IEMOCAP.**
 5. **Telemetry payload is a strict allowlist.** Only keys in `bin/vpstack-telemetry-log` are permitted. Never add keys without updating the allowlist.
@@ -45,7 +45,7 @@ vpstack is **voice-privacy research infrastructure for AI coding agents**. It en
 | User config | `~/.vpstack/config.json` | Managed by `bin/vpstack-config`. |
 | Per-project state | `~/.vpstack/projects/{slug}/` | `domain_config.yaml`, `hypotheses/`, `experiments/`, `research-plans/`, `deferred-gates.jsonl` |
 | Per-project markers | `<repo>/.vpstack/` | `enabled`, `disabled`, `ask-later` — tiny activation markers |
-| Per-project markers | `<repo>/.vpstack/` | Just `enabled` / `disabled` / `ask-later` files. Tiny. |
+| Automated tests | `tests/` | Python/pytest smoke tests for binaries. |
 
 ---
 
@@ -86,15 +86,6 @@ The `Recommendation:` line is mandatory. Users need to know the right answer, no
 4. Update README's "Skills reference" section with the new skill + an example.
 5. Update CHANGELOG.
 
-### Implement a recipe (B2, attacker, etc.)
-
-Each `recipes/VP2026/{name}/run.py` has a docstring with the full implementation specification. Read it. The contract is:
-- CLI args: `--data_path` (or `--anonymized_path` etc.), `--seed`, `--output_format json|human`
-- On success: print a single JSON line on stdout with the documented schema
-- On failure: print to stderr, exit non-zero
-- Stream progress to stderr every 30 seconds for runs >15min (per MCP long-running-tool contract)
-- Honor `torch.use_deterministic_algorithms(True)` if hparams request it
-- Lazy-fetch model weights via `huggingface_hub.snapshot_download()` — never bundle
 
 ### Run tests
 
@@ -106,7 +97,7 @@ pytest tests/activation tests/telemetry  # quick subset (~10s)
 
 ### Update a release version
 
-Bump `VERSION` AND `package.json::version` AND `mcp-server/pyproject.toml::version` AND `speechbrain_voice_anon/pyproject.toml::version` together. The CI workflow (`.github/workflows/ci.yml::package-lint`) verifies they all match.
+Bump `VERSION` AND `package.json::version` together. The CI workflow (`.github/workflows/ci.yml::package-lint`) verifies they all match.
 
 ---
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,37 @@
+# Contributing to vpstack
+
+Thank you for your interest in contributing to vpstack! This project provides research infrastructure for voice-privacy AI agents.
+
+## Architectural Principles
+
+Before you contribute, please read [DESIGN.md](DESIGN.md) and [CLAUDE.md](CLAUDE.md). The most important principles are:
+
+1.  **No Standalone Python in the Main Source**: This project uses a "Markdown Skills + Bash Binaries" model for maximum portability across AI coding agents.
+2.  **Embedded Python for Computation**: For complex algorithms (like McAdams B1), we embed Python snippets inside bash scripts in `bin/` using heredocs.
+3.  **Portability**: Skills (in `skills/`) should be pure Markdown that tells the agent what bash commands to run.
+4.  **No GPLv3 Code**: Never import or vendor code from the official VP2024 baseline. Re-implement from the published Eval Plan PDF.
+
+## How to Contribute
+
+### Adding or Updating a Binary
+- Binaries go in `bin/`.
+- They should be bash scripts that orchestrate logic or run embedded Python.
+- Always include a `--help` flag.
+- Add a corresponding smoke test in `tests/test_binaries.py`.
+
+### Adding or Updating a Skill
+- Skills go in `skills/vp-{name}/SKILL.md`.
+- Copy the preamble from an existing skill to ensure proper activation and telemetry.
+- Skills must be self-contained.
+
+### Running Tests
+We use `pytest` for automated testing.
+```bash
+pip install -r requirements-test.txt  # (or install numpy, scipy, soundfile, pytest)
+pytest -v tests/
+```
+
+## Release Process
+1. Bump the version in `VERSION`.
+2. Sync the version in `package.json`.
+3. Push a tag `v*.*.*`. The GitHub Action will handle the npm publication.