diff --git a/src/forge/proxy/handler.py b/src/forge/proxy/handler.py
index 6393f59..1a6bcba 100644
--- a/src/forge/proxy/handler.py
+++ b/src/forge/proxy/handler.py
@@ -46,7 +46,7 @@
 )
 
 # Body fields forge owns and reasons about — never go into passthrough.
-_FORGE_OWNED = frozenset({"messages", "tools", "stream", "system"})
+_FORGE_OWNED = frozenset({"messages", "tools", "stream", "stream_options", "system"})
 
 
 def _extract_sampling(body: dict[str, Any]) -> dict[str, Any] | None:
diff --git a/tests/unit/test_proxy_handler.py b/tests/unit/test_proxy_handler.py
index 1db3a90..1a4203a 100644
--- a/tests/unit/test_proxy_handler.py
+++ b/tests/unit/test_proxy_handler.py
@@ -341,6 +341,27 @@ async def test_passthrough_carries_unknown_body_fields(self):
         }
 
 
+    @pytest.mark.asyncio
+    async def test_stream_options_excluded_from_passthrough(self):
+        """stream_options must not leak into passthrough.
+
+        Forge controls streaming independently — when it makes non-streaming
+        calls to the backend, a leaked stream_options causes validation
+        errors on strict backends (e.g. vLLM rejects stream_options when
+        stream is not True).
+        """
+        client = _mock_client(TextResponse(content="ok"))
+        body = _body(messages=[{"role": "user", "content": "hi"}])
+        body["stream"] = True
+        body["stream_options"] = {"include_usage": True}
+        body["max_tokens"] = 256
+
+        await handle_chat_completions(body, client, _context_manager(), max_retries=1)
+
+        passthrough = client.send.call_args.kwargs["passthrough"]
+        assert "stream_options" not in passthrough
+        assert passthrough == {"model": "test", "max_tokens": 256}
+
 # ── Anthropic protocol routing ───────────────────────────────