From e4bb67129743334c13eac4aa6457e1624a2f78ed Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Wed, 17 Jun 2026 09:16:28 +0800
Subject: [PATCH] fix(reflect): don't surface raw harmony tool-call scaffolding
 as the answer (#2222)

When a provider (e.g. gpt-oss / harmony-format models via litellm Vertex MaaS)
fails to parse a tool-call turn into structured tool_calls, the raw harmony
channel scaffolding (or a done() payload emitted as bare JSON with the tool's
internal citation id fields) lands in message.content with tool_calls empty.
The reflect agent then accepted that content verbatim as the final answer,
surfacing corrupt, unsynthesized text (with empty citations) to end users.

Detect that leaked scaffolding in the no-tool_calls branch and route it to the
existing forced final-synthesis path instead of returning it. Detection is
narrowly gated to avoid rejecting valid answers.
---
 .../hindsight_api/engine/reflect/agent.py     |  80 ++++++++++++-
 .../tests/test_reflect_agent.py               | 107 ++++++++++++++++++
 2 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py
index 04ea2427b..c8bf007cc 100644
--- a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py
+++ b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py
@@ -109,6 +109,72 @@ def _clean_answer_text(text: str) -> str:
     return cleaned if cleaned else text
 
 
+# Raw, unparsed tool-call scaffolding that some providers leak as answer text
+# when they fail to turn a model's tool-call turn into structured tool_calls
+# (notably gpt-oss / harmony-format models served via litellm Vertex MaaS).
+# See issue #2222.
+#
+# Harmony frame tokens delimit a real harmony turn. A frame token must be present
+# before anything is flagged, so prose that merely quotes the routing namespace
+# (``to=functions.x``) or a lone control token while explaining the format is not
+# mistaken for a leak.
+_HARMONY_FRAME_TOKEN = re.compile(r"<\|(?:start|channel|message)\|>")
+# Evidence of an actual tool call inside that frame: the ``<|call|>`` commit token
+# that terminates a call, or the tool-routing namespace (e.g. ``to=functions.recall``).
+_HARMONY_CALL_TOKEN = re.compile(r"<\|call\|>")
+_HARMONY_TOOL_ROUTING = re.compile(r"(?:^|[\s>])to=(?:functions|tool)\b", re.IGNORECASE)
+# The done() tool's internal citation fields. They never appear in a legitimate
+# free-text answer, so a JSON object carrying any of them is an unambiguous
+# leaked done() payload (vs. a plain ``{"answer": ...}`` blob with no id fields,
+# which is intentionally left alone since it is indistinguishable from a valid
+# JSON answer).
+_DONE_TOOL_ID_FIELDS = ("memory_ids", "mental_model_ids", "observation_ids")
+
+
+def _looks_like_unparsed_tool_call(text: str) -> bool:
+    """Return True if ``text`` is raw, unparsed tool-call scaffolding.
+
+    Some providers intermittently fail to parse a model's tool-call turn into
+    structured ``tool_calls`` (notably gpt-oss / harmony-format models served
+    through litellm Vertex MaaS). The raw harmony tool-call structure
+    (``<|channel|>commentary to=functions.recall<|message|>{...}<|call|>``), or a
+    done() payload emitted as bare JSON that carries the tool's internal citation
+    id fields, then land in ``message.content`` with no tool executed. Surfacing
+    that verbatim returns corrupt, unsynthesized output to the user (issue
+    #2222), so it must be treated as a failed parse and routed to final synthesis
+    instead of accepted as an answer.
+
+    Detection is intentionally narrow to avoid rejecting valid answers. Harmony
+    scaffolding is only flagged when a harmony frame token is present together
+    with call evidence (a ``<|call|>`` commit token or the ``to=functions/tool``
+    routing namespace); an answer that merely quotes one of those tokens while
+    explaining the format has no such pairing and is left untouched. A bare
+    ``{"answer": ...}`` object with no id fields is likewise left alone, being
+    indistinguishable from a valid JSON answer. (An answer that embeds a full
+    harmony tool-call example verbatim would be re-synthesized rather than echoed
+    — an accepted, non-destructive trade-off given how rare that is for a
+    memory-synthesis answer and that the fallback reuses the same evidence.)
+    """
+    if not text:
+        return False
+    stripped = text.strip()
+    if _HARMONY_FRAME_TOKEN.search(stripped) and (
+        _HARMONY_CALL_TOKEN.search(stripped) or _HARMONY_TOOL_ROUTING.search(stripped)
+    ):
+        return True
+    # done() emitted as bare JSON text instead of a structured tool call. Key on
+    # the internal id fields (which never appear in a real answer); the ``answer``
+    # field is optional so a leaked payload that omits it is still caught.
+    if stripped.startswith("{") and stripped.endswith("}"):
+        try:
+            obj = json.loads(stripped)
+        except (ValueError, TypeError):
+            return False
+        if isinstance(obj, dict) and any(f in obj for f in _DONE_TOOL_ID_FIELDS):
+            return True
+    return False
+
+
 def _clean_done_answer(text: str) -> str:
     """Clean up the answer field from a done() tool call.
 
@@ -750,7 +816,19 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
                 bool(available_memory_ids) or bool(available_mental_model_ids) or bool(available_observation_ids)
             )
             directive_leak_risk = directives and not has_gathered_evidence
-            if result.content and not directive_leak_risk:
+            # Some providers (e.g. gpt-oss / harmony models via litellm Vertex
+            # MaaS) intermittently fail to parse a tool-call turn into structured
+            # ``tool_calls`` and leave raw harmony scaffolding (or a done() payload
+            # carrying internal citation ids) in ``content`` with ``tool_calls``
+            # empty. Accepting that as the answer surfaces corrupt, unsynthesized
+            # text to the user (issue #2222), so route it to final synthesis.
+            unparsed_tool_call = bool(result.content) and _looks_like_unparsed_tool_call(result.content)
+            if unparsed_tool_call:
+                logger.warning(
+                    f"[REFLECT {reflect_id}] Discarding unparsed tool-call scaffolding in answer "
+                    f"text on iteration {iteration + 1}; forcing final synthesis."
+                )
+            if result.content and not directive_leak_risk and not unparsed_tool_call:
                 answer = _clean_answer_text(result.content.strip())
 
                 # The call_with_tools call above is intentionally uncapped so the
diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py
index 5eb86ddb6..5a355f5a7 100644
--- a/hindsight-api-slim/tests/test_reflect_agent.py
+++ b/hindsight-api-slim/tests/test_reflect_agent.py
@@ -20,6 +20,7 @@
     _count_messages_tokens,
     _is_context_overflow_error,
     _is_done_tool,
+    _looks_like_unparsed_tool_call,
     _normalize_tool_name,
     run_reflect_agent,
 )
@@ -142,6 +143,53 @@ def test_clean_answer_multiline_with_markdown(self):
         assert "mental_model_ids" not in cleaned
 
 
+class TestUnparsedToolCallDetection:
+    """Detect raw harmony / tool-call scaffolding leaking as answer text (issue #2222)."""
+
+    @pytest.mark.parametrize(
+        "text",
+        [
+            # Full leaked harmony tool call: frame tokens + routing + commit.
+            "<|start|>assistant<|channel|>commentary to=functions.recall<|message|>{\"query\": \"x\"}<|call|>",
+            # Frame token + <|call|> commit (no routing namespace visible).
+            "<|channel|>commentary<|message|>{\"name\": \"recall\"}<|call|>",
+            # Frame token + routing, truncated before the commit token.
+            "<|channel|>commentary to=functions.recall<|message|>{\"query\": \"x\"}",
+            # done() emitted as bare JSON carrying the tool's internal id fields.
+            '{"answer": "Confirmed.", "memory_ids": ["mem-1"]}',
+            '  {"answer": "no data", "observation_ids": [], "mental_model_ids": []}  ',
+            # Leaked done payload that omits "answer" but keeps the id fields.
+            '{"memory_ids": ["mem-1"], "mental_model_ids": []}',
+        ],
+    )
+    def test_detects_scaffolding(self, text):
+        assert _looks_like_unparsed_tool_call(text) is True
+
+    @pytest.mark.parametrize(
+        "text",
+        [
+            "",
+            None,
+            "The team meets every Tuesday to review the roadmap.",
+            "Here is the answer: the launch is on Friday.",
+            "I found that the user prefers concise answers and dislikes long emails.",
+            "The function to=string mapping was discussed, but no tool was called.",
+            # Legitimate answers that merely *explain* harmony syntax must not be
+            # flagged: a frame token alone, or the routing namespace alone, is not
+            # a leaked call.
+            "The `<|channel|>` token marks a section boundary in the harmony format.",
+            "Use `<|start|>` and `<|message|>` to frame a turn; they are control tokens.",
+            "Use `to=functions.recall` for the recall tool and `to=tool.foo` for foo.",
+            # A bare answer object with no internal id fields is indistinguishable
+            # from a valid JSON answer, so it is deliberately NOT flagged.
+            '{"answer": "I\'m sorry, but I don\'t have that information."}',
+            '{"result": 42, "status": "ok"}',
+        ],
+    )
+    def test_passes_clean_prose(self, text):
+        assert _looks_like_unparsed_tool_call(text) is False
+
+
 class TestToolNameNormalization:
     """Test tool name normalization for various LLM output formats."""
 
@@ -750,6 +798,65 @@ async def test_short_circuit_answer_under_cap_is_not_rewritten(self, mock_llm, m
         assert result.text == short_answer
         mock_llm.call.assert_not_called()
 
+    @pytest.mark.asyncio
+    async def test_unparsed_harmony_scaffolding_is_not_returned_as_answer(self, mock_llm, mock_functions):
+        """If a provider leaves raw harmony tool-call scaffolding in ``content`` with empty
+        ``tool_calls`` (litellm failing to parse a gpt-oss tool turn, issue #2222), the agent
+        must NOT surface that scaffolding; it routes to final synthesis instead.
+        """
+        leaked = (
+            "<|start|>assistant<|channel|>commentary "
+            'to=functions.recall<|message|>{"query": "recent activity"}<|call|>'
+        )
+        # Every agent turn leaks scaffolding instead of a parsed tool call.
+        mock_llm.call_with_tools.return_value = LLMToolCallResult(
+            tool_calls=[],
+            content=leaked,
+            finish_reason="stop",
+            input_tokens=10,
+            output_tokens=40,
+        )
+
+        result = await run_reflect_agent(
+            llm_config=mock_llm,
+            bank_id="test-bank",
+            query="summarize recent activity",
+            bank_profile={"name": "Test", "mission": "Testing"},
+            **mock_functions,
+        )
+
+        # The corrupt scaffolding must never reach the user-visible answer.
+        assert "<|" not in result.text
+        assert "to=functions" not in result.text
+        # Instead the agent fell through to the forced final-synthesis path.
+        assert result.text == "Fallback answer from final iteration"
+        mock_llm.call.assert_called()
+
+    @pytest.mark.asyncio
+    async def test_leaked_done_payload_with_ids_is_not_returned_as_answer(self, mock_llm, mock_functions):
+        """A done() payload emitted as bare JSON carrying the tool's internal id fields
+        (no real tool call) must be rejected and routed to final synthesis, not surfaced.
+        """
+        mock_llm.call_with_tools.return_value = LLMToolCallResult(
+            tool_calls=[],
+            content='{"answer": "I\'m sorry, but I don\'t have that information.", "memory_ids": []}',
+            finish_reason="stop",
+            input_tokens=10,
+            output_tokens=15,
+        )
+
+        result = await run_reflect_agent(
+            llm_config=mock_llm,
+            bank_id="test-bank",
+            query="what is the status?",
+            bank_profile={"name": "Test", "mission": "Testing"},
+            **mock_functions,
+        )
+
+        assert "memory_ids" not in result.text
+        assert result.text == "Fallback answer from final iteration"
+        mock_llm.call.assert_called()
+
     @pytest.mark.asyncio
     async def test_max_iterations_reached(self, mock_llm, mock_functions):
         """Test that agent stops after max iterations even with errors."""