diff --git a/src/forge/proxy/handler.py b/src/forge/proxy/handler.py index 6393f59..1a6bcba 100644 --- a/src/forge/proxy/handler.py +++ b/src/forge/proxy/handler.py @@ -46,7 +46,7 @@ ) # Body fields forge owns and reasons about — never go into passthrough. -_FORGE_OWNED = frozenset({"messages", "tools", "stream", "system"}) +_FORGE_OWNED = frozenset({"messages", "tools", "stream", "stream_options", "system"}) def _extract_sampling(body: dict[str, Any]) -> dict[str, Any] | None: diff --git a/tests/unit/test_proxy_handler.py b/tests/unit/test_proxy_handler.py index 1db3a90..1a4203a 100644 --- a/tests/unit/test_proxy_handler.py +++ b/tests/unit/test_proxy_handler.py @@ -341,6 +341,27 @@ async def test_passthrough_carries_unknown_body_fields(self): } + @pytest.mark.asyncio + async def test_stream_options_excluded_from_passthrough(self): + """stream_options must not leak into passthrough. + + Forge controls streaming independently — when it makes non-streaming + calls to the backend, a leaked stream_options causes validation + errors on strict backends (e.g. vLLM rejects stream_options when + stream is not True). + """ + client = _mock_client(TextResponse(content="ok")) + body = _body(messages=[{"role": "user", "content": "hi"}]) + body["stream"] = True + body["stream_options"] = {"include_usage": True} + body["max_tokens"] = 256 + + await handle_chat_completions(body, client, _context_manager(), max_retries=1) + + passthrough = client.send.call_args.kwargs["passthrough"] + assert "stream_options" not in passthrough + assert passthrough == {"model": "test", "max_tokens": 256} + # ── Anthropic protocol routing ───────────────────────────────