Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion tests/test_error_chain.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""Tests for verifiers.utils.error_utils.ErrorChain."""

import verifiers as vf
from verifiers.utils.error_utils import ErrorChain, get_vf_error_chain
from verifiers.utils.error_utils import (
ErrorChain,
error_data,
error_from_data,
get_vf_error_chain,
)


class TestErrorChain:
Expand Down Expand Up @@ -147,3 +152,11 @@ def test_hashable_for_counter(self):
# error1 and error2 have same type, should be counted together
assert counter[ErrorChain(ValueError("any"))] == 2
assert counter[ErrorChain(TypeError("any"))] == 1

def test_truncated_reasoning_error_round_trips(self):
error = vf.TruncatedReasoningError("truncated reasoning")

rebuilt = error_from_data(error_data(error))

assert isinstance(rebuilt, vf.TruncatedReasoningError)
assert isinstance(rebuilt, vf.EmptyModelResponseError)
15 changes: 14 additions & 1 deletion tests/test_renderer_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_step_token_ids,
_to_renderer_message,
)
from verifiers.errors import EmptyModelResponseError
from verifiers.errors import EmptyModelResponseError, TruncatedReasoningError
from verifiers.types import (
AssistantMessage,
SystemMessage,
Expand Down Expand Up @@ -363,6 +363,19 @@ async def test_renderer_client_rejects_reasoning_only_native_response():
await client.raise_from_native_response({"reasoning_content": "hidden chain"})


@pytest.mark.asyncio
async def test_renderer_client_rejects_truncated_reasoning_native_response():
client = object.__new__(RendererClient)

with pytest.raises(
TruncatedReasoningError, match="length limit after reasoning"
) as exc_info:
await client.raise_from_native_response(
{"reasoning_content": "hidden chain", "finish_reason": "length"}
)
assert isinstance(exc_info.value, EmptyModelResponseError)


@pytest.mark.asyncio
async def test_from_native_response_uses_request_id_and_token_lengths():
"""vLLM's /inference/v1/generate returns ``request_id`` (not ``id``) and
Expand Down
10 changes: 9 additions & 1 deletion verifiers/clients/renderer_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@
from verifiers.clients.openai_chat_completions_client import (
handle_openai_overlong_prompt,
)
from verifiers.errors import EmptyModelResponseError, OverlongPromptError
from verifiers.errors import (
EmptyModelResponseError,
OverlongPromptError,
TruncatedReasoningError,
)
from verifiers.types import (
AssistantMessage,
ClientConfig,
Expand Down Expand Up @@ -643,6 +647,10 @@ async def raise_from_native_response(self, response: dict[str, Any]) -> None:
has_reasoning = bool(response.get("reasoning_content"))
if not (has_content or has_tool_calls):
if has_reasoning:
if response.get("finish_reason") == "length":
raise TruncatedReasoningError(
"Model hit length limit after reasoning but before content or tool calls"
)
raise EmptyModelResponseError(
"Model returned reasoning but no content and did not call any tools"
)
Expand Down
6 changes: 6 additions & 0 deletions verifiers/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ class EmptyModelResponseError(InvalidModelResponseError):
pass


class TruncatedReasoningError(EmptyModelResponseError):
"""Model hit a length limit before returning content or tool calls."""

pass


class OverlongPromptError(Error):
"""Used to catch overlong prompt errors (e.g. prompt + requested number of tokens exceeds model context length)"""

Expand Down
1 change: 1 addition & 0 deletions verifiers/utils/error_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def vf_error_types() -> tuple[type[vf.Error], ...]:
vf.SandboxError,
vf.TunnelError,
vf.InfraError,
vf.TruncatedReasoningError,
vf.EmptyModelResponseError,
vf.InvalidModelResponseError,
vf.ModelError,
Expand Down
Loading