From 6c1dde498f97929484099f78607a3b55a43f3f23 Mon Sep 17 00:00:00 2001 From: Patrick Buckley Date: Sun, 4 Jan 2026 01:26:39 -0800 Subject: [PATCH] [Bugfix] Sanitize malformed tool call recipients in Harmony parser Some GPT-OSS base models occasionally generate malformed Harmony format sequences like `to=functions.bash<|channel|>commentary` instead of the correct `to=functions.bash <|constrain|>json`. This causes the function name to be parsed incorrectly as `bash<|channel|>commentary` instead of `bash`. This fix sanitizes the recipient string by stripping `<|channel|>` and everything after it before extracting the function name. The fix is applied in three locations to cover all API endpoints: - `harmony_utils.py`: /v1/responses (non-streaming) - `openai_tool_parser.py`: /v1/chat/completions (non-streaming) - `serving_chat_stream_harmony.py`: /v1/chat/completions (streaming) The /v1/responses streaming endpoint already worked correctly because it captures the recipient before malformed tokens can corrupt it. - Before: ~35% failure rate - After: 0% failure rate --- .../openai/parser/test_harmony_utils.py | 23 +++++++++++++++++++ .../openai/parser/harmony_utils.py | 5 ++++ .../openai/serving_chat_stream_harmony.py | 9 +++++++- vllm/tool_parsers/openai_tool_parser.py | 8 ++++++- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py index 1d34fc51ad56..aaea88f1525b 100644 --- a/tests/entrypoints/openai/parser/test_harmony_utils.py +++ b/tests/entrypoints/openai/parser/test_harmony_utils.py @@ -888,6 +888,29 @@ def test_commentary_with_function_recipient_creates_function_call(self): assert output_items[0].call_id.startswith("call_") assert output_items[0].id.startswith("fc_") + def test_malformed_recipient_with_channel_token_is_sanitized(self): + """Test that malformed recipients containing <|channel|> are sanitized. + + The model sometimes outputs malformed sequences like + 'functions.bash<|channel|>commentary' instead of 'functions.bash'. + This test verifies the sanitization handles this case. + """ + message = Message.from_role_and_content( + Role.ASSISTANT, '{"command": "date"}' + ) + message = message.with_channel("commentary") + # Simulate malformed recipient from model output + message = message.with_recipient("functions.bash<|channel|>commentary") + + output_items = parse_output_message(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseFunctionToolCall) + assert output_items[0].type == "function_call" + # The function name should be sanitized to just "bash" + assert output_items[0].name == "bash" + assert output_items[0].arguments == '{"command": "date"}' + def test_commentary_with_python_recipient_creates_reasoning(self): """Test that commentary with recipient='python' creates reasoning items.""" message = Message.from_role_and_content( diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py index 376d97a03964..9b9f5af3bd3d 100644 --- a/vllm/entrypoints/openai/parser/harmony_utils.py +++ b/vllm/entrypoints/openai/parser/harmony_utils.py @@ -535,6 +535,11 @@ def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutput def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]: """Parse function calls into function tool call items.""" + # Sanitize recipient: the model sometimes outputs malformed sequences + # like "to=functions.bash<|channel|>commentary" instead of the correct + # "to=functions.bash <|constrain|>json". Strip the malformed part. + if "<|channel|>" in recipient: + recipient = recipient.split("<|channel|>")[0].strip() function_name = recipient.split(".")[-1] output_items = [] for content in message.content: diff --git a/vllm/entrypoints/openai/serving_chat_stream_harmony.py b/vllm/entrypoints/openai/serving_chat_stream_harmony.py index 1b5ae620651c..431beba1f190 100644 --- a/vllm/entrypoints/openai/serving_chat_stream_harmony.py +++ b/vllm/entrypoints/openai/serving_chat_stream_harmony.py @@ -48,6 +48,13 @@ def extract_harmony_streaming_delta( and cur_recipient and cur_recipient.startswith("functions.") ): + # Sanitize recipient: the model sometimes outputs malformed sequences + # like "functions.bash<|channel|>commentary" instead of "functions.bash". + # Strip the malformed part. + sanitized_recipient = cur_recipient + if "<|channel|>" in sanitized_recipient: + sanitized_recipient = sanitized_recipient.split("<|channel|>")[0].strip() + # Count completed tool calls to determine index base_index = 0 for msg in harmony_parser.messages: @@ -59,7 +66,7 @@ def extract_harmony_streaming_delta( base_index += 1 if prev_recipient != cur_recipient: - tool_name = cur_recipient.split("functions.", 1)[1] + tool_name = sanitized_recipient.split("functions.", 1)[1] delta_message = DeltaMessage( tool_calls=[ DeltaToolCall( diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index da1a9c773f78..ca6e59d32ca2 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -65,11 +65,17 @@ def extract_tool_calls( tool_args = msg_text else: tool_args = msg_text + # Sanitize recipient: the model sometimes outputs malformed + # sequences like "functions.bash<|channel|>commentary" + # instead of "functions.bash". Strip the malformed part. + recipient = msg.recipient + if "<|channel|>" in recipient: + recipient = recipient.split("<|channel|>")[0].strip() tool_calls.append( ToolCall( type="function", function=FunctionCall( - name=msg.recipient.split("functions.")[1], + name=recipient.split("functions.")[1], arguments=tool_args, ), )