From 6808f926ad34d36f0c90aea5d25ff6430b60599a Mon Sep 17 00:00:00 2001
From: Codex <codex@primeintellect.ai>
Date: Fri, 19 Jun 2026 16:30:00 +0000
Subject: [PATCH] Add truncated reasoning response error

---
 tests/test_error_chain.py            | 15 ++++++++++++++-
 tests/test_renderer_client.py        | 15 ++++++++++++++-
 verifiers/clients/renderer_client.py | 10 +++++++++-
 verifiers/errors.py                  |  6 ++++++
 verifiers/utils/error_utils.py       |  1 +
 5 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/tests/test_error_chain.py b/tests/test_error_chain.py
index 1b88ea5cfe..4a095c1fbb 100644
--- a/tests/test_error_chain.py
+++ b/tests/test_error_chain.py
@@ -1,7 +1,12 @@
 """Tests for verifiers.utils.error_utils.ErrorChain."""
 
 import verifiers as vf
-from verifiers.utils.error_utils import ErrorChain, get_vf_error_chain
+from verifiers.utils.error_utils import (
+    ErrorChain,
+    error_data,
+    error_from_data,
+    get_vf_error_chain,
+)
 
 
 class TestErrorChain:
@@ -147,3 +152,11 @@ def test_hashable_for_counter(self):
         # error1 and error2 have same type, should be counted together
         assert counter[ErrorChain(ValueError("any"))] == 2
         assert counter[ErrorChain(TypeError("any"))] == 1
+
+    def test_truncated_reasoning_error_round_trips(self):
+        error = vf.TruncatedReasoningError("truncated reasoning")
+
+        rebuilt = error_from_data(error_data(error))
+
+        assert isinstance(rebuilt, vf.TruncatedReasoningError)
+        assert isinstance(rebuilt, vf.EmptyModelResponseError)
diff --git a/tests/test_renderer_client.py b/tests/test_renderer_client.py
index 39efe2cd95..5f9b7c955f 100644
--- a/tests/test_renderer_client.py
+++ b/tests/test_renderer_client.py
@@ -16,7 +16,7 @@
     _step_token_ids,
     _to_renderer_message,
 )
-from verifiers.errors import EmptyModelResponseError
+from verifiers.errors import EmptyModelResponseError, TruncatedReasoningError
 from verifiers.types import (
     AssistantMessage,
     SystemMessage,
@@ -363,6 +363,19 @@ async def test_renderer_client_rejects_reasoning_only_native_response():
         await client.raise_from_native_response({"reasoning_content": "hidden chain"})
 
 
+@pytest.mark.asyncio
+async def test_renderer_client_rejects_truncated_reasoning_native_response():
+    client = object.__new__(RendererClient)
+
+    with pytest.raises(
+        TruncatedReasoningError, match="length limit after reasoning"
+    ) as exc_info:
+        await client.raise_from_native_response(
+            {"reasoning_content": "hidden chain", "finish_reason": "length"}
+        )
+    assert isinstance(exc_info.value, EmptyModelResponseError)
+
+
 @pytest.mark.asyncio
 async def test_from_native_response_uses_request_id_and_token_lengths():
     """vLLM's /inference/v1/generate returns ``request_id`` (not ``id``) and
diff --git a/verifiers/clients/renderer_client.py b/verifiers/clients/renderer_client.py
index 64ca4ec89d..c32c3f6bb6 100644
--- a/verifiers/clients/renderer_client.py
+++ b/verifiers/clients/renderer_client.py
@@ -39,7 +39,11 @@
 from verifiers.clients.openai_chat_completions_client import (
     handle_openai_overlong_prompt,
 )
-from verifiers.errors import EmptyModelResponseError, OverlongPromptError
+from verifiers.errors import (
+    EmptyModelResponseError,
+    OverlongPromptError,
+    TruncatedReasoningError,
+)
 from verifiers.types import (
     AssistantMessage,
     ClientConfig,
@@ -643,6 +647,10 @@ async def raise_from_native_response(self, response: dict[str, Any]) -> None:
         has_reasoning = bool(response.get("reasoning_content"))
         if not (has_content or has_tool_calls):
             if has_reasoning:
+                if response.get("finish_reason") == "length":
+                    raise TruncatedReasoningError(
+                        "Model hit length limit after reasoning but before content or tool calls"
+                    )
                 raise EmptyModelResponseError(
                     "Model returned reasoning but no content and did not call any tools"
                 )
diff --git a/verifiers/errors.py b/verifiers/errors.py
index e725580e48..f55fae4f0b 100644
--- a/verifiers/errors.py
+++ b/verifiers/errors.py
@@ -20,6 +20,12 @@ class EmptyModelResponseError(InvalidModelResponseError):
     pass
 
 
+class TruncatedReasoningError(EmptyModelResponseError):
+    """Model hit a length limit before returning content or tool calls."""
+
+    pass
+
+
 class OverlongPromptError(Error):
     """Used to catch overlong prompt errors (e.g. prompt + requested number of tokens exceeds model context length)"""
 
diff --git a/verifiers/utils/error_utils.py b/verifiers/utils/error_utils.py
index 37ac1a4058..ed93a13730 100644
--- a/verifiers/utils/error_utils.py
+++ b/verifiers/utils/error_utils.py
@@ -115,6 +115,7 @@ def vf_error_types() -> tuple[type[vf.Error], ...]:
         vf.SandboxError,
         vf.TunnelError,
         vf.InfraError,
+        vf.TruncatedReasoningError,
         vf.EmptyModelResponseError,
         vf.InvalidModelResponseError,
         vf.ModelError,