diff --git a/src/agent/BotNexus.Agent.Core/Loop/AgentLoopRunner.cs b/src/agent/BotNexus.Agent.Core/Loop/AgentLoopRunner.cs index 0886a64b..46e3a6b8 100644 --- a/src/agent/BotNexus.Agent.Core/Loop/AgentLoopRunner.cs +++ b/src/agent/BotNexus.Agent.Core/Loop/AgentLoopRunner.cs @@ -206,7 +206,14 @@ private static async Task RunLoopAsync( return; } - hasMoreToolCalls = assistantMessage.ToolCalls is { Count: > 0 }; + // #1666: dispatch tool calls only when the provider terminated the turn with + // a ToolUse stop reason. A truncated turn (Length/content filter/stream EOF) + // can still surface a parsed -- but half-formed -- tool call; executing it + // would run with incomplete/garbled arguments. Every provider promotes a + // legitimate, complete tool call to ToolUse at the parser/provider layer, so + // this guard blocks only the truncated case and never a real tool turn. + hasMoreToolCalls = assistantMessage.FinishReason == StopReason.ToolUse + && assistantMessage.ToolCalls is { Count: > 0 }; var executionContext = new AgentContext( currentContext.SystemPrompt, messages, diff --git a/src/agent/BotNexus.Agent.Core/Loop/StreamAccumulator.cs b/src/agent/BotNexus.Agent.Core/Loop/StreamAccumulator.cs index 26bccbe9..17686758 100644 --- a/src/agent/BotNexus.Agent.Core/Loop/StreamAccumulator.cs +++ b/src/agent/BotNexus.Agent.Core/Loop/StreamAccumulator.cs @@ -211,7 +211,7 @@ await emit(new MessageUpdateEvent( break; case DoneEvent done: - final = MessageConverter.ToAgentMessage(done.Message); + final = StripNonExecutableToolCalls(MessageConverter.ToAgentMessage(done.Message)); ReplacePartialWithFinal(contextMessages, final, partialAddedToContext); if (!_startEmitted) { @@ -242,7 +242,7 @@ await emit(new MessageUpdateEvent( } var result = await stream.GetResultAsync().ConfigureAwait(false); - final = MessageConverter.ToAgentMessage(result); + final = StripNonExecutableToolCalls(MessageConverter.ToAgentMessage(result)); ReplacePartialWithFinal(contextMessages, final, partialAddedToContext); if (!_startEmitted) @@ -254,6 +254,42 @@ await emit(new MessageUpdateEvent( return final; } + /// + /// Removes non-executable tool-call content from a finalized assistant message (#1666). + /// A turn that did not terminate with -- truncation + /// (), a content filter, or a stream that ended without a + /// terminal event -- can still carry a parsed but half-formed tool call. Returning the + /// message with that tool call intact would let a later code path re-dispatch it with + /// incomplete arguments, so the tool call is stripped from both + /// and the ContentBlocks mirror. + /// The visible text, finish reason, and all other content blocks are retained. A genuine + /// turn is returned unchanged. + /// + private static AssistantAgentMessage StripNonExecutableToolCalls(AssistantAgentMessage message) + { + if (message.FinishReason == StopReason.ToolUse) + { + return message; + } + + var hasToolCallContent = message.ToolCalls is { Count: > 0 } + || (message.ContentBlocks?.OfType().Any() ?? false); + if (!hasToolCallContent) + { + return message; + } + + var strippedBlocks = message.ContentBlocks + ?.Where(block => block is not ToolCallContent) + .ToList(); + + return message with + { + ToolCalls = null, + ContentBlocks = strippedBlocks, + }; + } + private static void UpdateContextPartial( IList? contextMessages, AssistantAgentMessage current, diff --git a/tests/agent/BotNexus.Agent.Core.Tests/Loop/AgentLoopRunnerTruncatedToolCallTests.cs b/tests/agent/BotNexus.Agent.Core.Tests/Loop/AgentLoopRunnerTruncatedToolCallTests.cs new file mode 100644 index 00000000..9ca7575c --- /dev/null +++ b/tests/agent/BotNexus.Agent.Core.Tests/Loop/AgentLoopRunnerTruncatedToolCallTests.cs @@ -0,0 +1,196 @@ +using System.Text.Json; +using BotNexus.Agent.Core.Configuration; +using BotNexus.Agent.Core.Loop; +using BotNexus.Agent.Core.Tests.TestUtils; +using BotNexus.Agent.Core.Tools; +using BotNexus.Agent.Core.Types; +using BotNexus.Agent.Providers.Core.Models; +using BotNexus.Agent.Providers.Core.Streaming; + +namespace BotNexus.Agent.Core.Tests.Loop; + +using AgentUserMessage = BotNexus.Agent.Core.Types.UserMessage; + +/// +/// Regression tests for #1666: a truncated assistant turn (provider stop reason +/// , a content filter, or stream EOF) that nonetheless +/// surfaces a parsed tool call must NOT be dispatched. Tool dispatch is gated on a +/// terminal, so a half-formed tool call cut off at the +/// token limit is never executed with incomplete/garbled arguments. +/// +/// +/// BotNexus analogue of the OpenClaw fix that ignores truncated tool calls. Every BotNexus +/// provider promotes a legitimate, complete tool call to +/// at the parser/provider layer (Anthropic/Copilot native tool_use, OpenAI tool_calls, +/// Responses promotes a complete call from Stop), so the loop-level guard +/// (FinishReason == ToolUse) blocks only the truncated case and never a real tool +/// turn. +/// +[Collection(ApiProviderRegistryCollection.Name)] +public class AgentLoopRunnerTruncatedToolCallTests +{ + /// + /// A recording tool that counts how many times is invoked, + /// so a test can assert a truncated turn's tool call is never dispatched. + /// + private sealed class RecordingTool : IAgentTool + { + private static readonly JsonElement Schema = JsonDocument.Parse( + """{ "type": "object", "properties": { "command": { "type": "string" } } }""").RootElement.Clone(); + + private int _executeCount; + + public int ExecuteCount => Volatile.Read(ref _executeCount); + + public string Name => "shell"; + + public string Label => "Shell"; + + public Tool Definition => new("shell", "Run a shell command", Schema); + + public Task> PrepareArgumentsAsync( + IReadOnlyDictionary arguments, + CancellationToken cancellationToken = default) + => Task.FromResult(arguments); + + public Task ExecuteAsync( + string toolCallId, + IReadOnlyDictionary arguments, + CancellationToken cancellationToken = default, + AgentToolUpdateCallback? onUpdate = null) + { + Interlocked.Increment(ref _executeCount); + return Task.FromResult(new AgentToolResult([new AgentToolContent(AgentToolContentType.Text, "ok")])); + } + } + + /// + /// Registers a scripted provider whose responses are returned in sequence on each + /// successive LLM call (call 1 = first entry, call 2 = second, ...). The last entry is + /// reused for any further calls. + /// + private static IDisposable RegisterScriptedProvider(string apiId, params LlmStream[] responses) + { + var index = -1; + return TestHelpers.RegisterProvider( + new TestApiProvider(apiId, simpleStreamFactory: (_, _, _) => + { + var next = Interlocked.Increment(ref index); + var slot = Math.Min(next, responses.Length - 1); + return responses[slot]; + })); + } + + /// + /// Builds an assistant stream whose message carries narration text followed by a tool + /// call, terminating with the supplied stop reason. Mirrors a model turn that was cut + /// off mid-tool-call: the content blocks contain a tool call but the terminal is + /// (not ). + /// + private static LlmStream CreateTextPlusToolCallResponse( + StopReason reason, + string text, + (string id, string name, Dictionary args) toolCall) + { + var stream = new LlmStream(); + var toolCallContent = new ToolCallContent(toolCall.id, toolCall.name, toolCall.args); + var content = new List { new TextContent(text), toolCallContent }; + var message = new AssistantMessage( + Content: content, + Api: "test-api", + Provider: "test-provider", + ModelId: "test-model", + Usage: new Usage { Input = 10, Output = 5, TotalTokens = 15 }, + StopReason: reason, + ErrorMessage: null, + ResponseId: "response-1", + Timestamp: DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()); + + stream.Push(new StartEvent(message)); + stream.Push(new TextStartEvent(0, message)); + stream.Push(new TextDeltaEvent(0, text, message)); + stream.Push(new TextEndEvent(0, text, message)); + stream.Push(new ToolCallStartEvent(1, message)); + stream.Push(new ToolCallDeltaEvent(1, "{\"command\":\"gh issue cr", message)); + stream.Push(new ToolCallEndEvent(1, toolCallContent, message)); + stream.Push(new DoneEvent(reason, message)); + stream.End(message); + return stream; + } + + private static AgentContext ContextWithRecordingTool(RecordingTool tool) + => new(null, [], [tool]); + + /// + /// The core #1666 reproduction: a turn whose terminal is + /// surfaces a (partial) tool call. The loop must NOT dispatch it -- the recording tool's + /// execute is never called -- and the persisted assistant message must not carry the + /// tool call (its visible text and finish reason are retained). + /// + [Fact] + public async Task TruncatedToolCallTurn_IsNotDispatchedAndIsStrippedFromPersistedMessage() + { + const string api = "truncated-toolcall-length"; + var tool = new RecordingTool(); + using var _ = RegisterScriptedProvider( + api, + // Turn 1: truncated at the token limit while emitting a tool call. + CreateTextPlusToolCallResponse( + StopReason.Length, + "Filing the issue now", + ("call-1", "shell", new Dictionary { ["command"] = "gh issue cr" })), + // Turn 2: a benign completion so the loop terminates instead of looping on the + // reused truncated stream when the guard is (incorrectly) absent. + TestStreamFactory.CreateTextResponse("Done.", StopReason.Stop)); + + var result = await AgentLoopRunner.RunAsync( + [new AgentUserMessage("file the issue")], + ContextWithRecordingTool(tool), + TestHelpers.CreateTestConfig(model: TestHelpers.CreateTestModel(api)), + _ => Task.CompletedTask, + CancellationToken.None); + + // The truncated tool call must never have executed. + tool.ExecuteCount.ShouldBe(0); + + // The first assistant message (the truncated turn) keeps its text and Length finish + // reason but must not carry the tool call that a later code path could re-dispatch. + var truncatedAssistant = result + .OfType() + .First(m => m.FinishReason == StopReason.Length); + truncatedAssistant.Content.ShouldContain("Filing the issue now"); + truncatedAssistant.ToolCalls.ShouldBeNull(); + (truncatedAssistant.ContentBlocks ?? []).OfType().ShouldBeEmpty(); + + // No tool result message was ever produced for the truncated call. + result.OfType().ShouldBeEmpty(); + } + + /// + /// A normal turn with a complete tool call still + /// dispatches -- the guard must not regress legitimate tool execution. + /// + [Fact] + public async Task CompleteToolUseTurn_IsDispatched() + { + const string api = "truncated-toolcall-tooluse"; + var tool = new RecordingTool(); + using var _ = RegisterScriptedProvider( + api, + // Turn 1: a legitimate, complete tool call (ToolUse terminal) -> must dispatch. + TestStreamFactory.CreateToolCallResponse( + ("call-1", "shell", new Dictionary { ["command"] = "gh issue list" })), + // Turn 2: benign completion so the run settles after the tool result feeds back. + TestStreamFactory.CreateTextResponse("All set.", StopReason.Stop)); + + var result = await AgentLoopRunner.RunAsync( + [new AgentUserMessage("list the issues")], + ContextWithRecordingTool(tool), + TestHelpers.CreateTestConfig(model: TestHelpers.CreateTestModel(api)), + _ => Task.CompletedTask, + CancellationToken.None); + + tool.ExecuteCount.ShouldBe(1); + result.OfType().ShouldHaveSingleItem(); + } +} diff --git a/tests/agent/BotNexus.Agent.Core.Tests/Loop/StreamAccumulatorTruncatedToolCallTests.cs b/tests/agent/BotNexus.Agent.Core.Tests/Loop/StreamAccumulatorTruncatedToolCallTests.cs new file mode 100644 index 00000000..340075d4 --- /dev/null +++ b/tests/agent/BotNexus.Agent.Core.Tests/Loop/StreamAccumulatorTruncatedToolCallTests.cs @@ -0,0 +1,116 @@ +using BotNexus.Agent.Core.Loop; +using BotNexus.Agent.Core.Types; +using BotNexus.Agent.Providers.Core.Models; +using BotNexus.Agent.Providers.Core.Streaming; + +namespace BotNexus.Agent.Core.Tests.Loop; + +/// +/// Unit tests for #1666 at the accumulator boundary: when a streamed turn terminates with a +/// non- reason (truncation, content filter), the accumulated +/// final message must not carry tool-call content that a later code path could re-dispatch. +/// The visible text and the finish reason are retained -- only the tool-call content blocks +/// (both and any tool-call block in +/// ) are dropped. +/// +public class StreamAccumulatorTruncatedToolCallTests +{ + private static AssistantMessage BuildMessage(StopReason reason) + { + var content = new List + { + new TextContent("partial narration"), + new ToolCallContent("call-1", "shell", new Dictionary { ["command"] = "gh issue cr" }) + }; + + return new AssistantMessage( + Content: content, + Api: "test-api", + Provider: "test-provider", + ModelId: "test-model", + Usage: Usage.Empty(), + StopReason: reason, + ErrorMessage: null, + ResponseId: "resp_1", + Timestamp: DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()); + } + + private static LlmStream BuildStream(StopReason reason) + { + var message = BuildMessage(reason); + var stream = new LlmStream(); + stream.Push(new StartEvent(message)); + stream.Push(new TextStartEvent(0, message)); + stream.Push(new TextDeltaEvent(0, "partial narration", message)); + stream.Push(new TextEndEvent(0, "partial narration", message)); + stream.Push(new ToolCallStartEvent(1, message)); + stream.Push(new ToolCallDeltaEvent(1, "{\"command\":\"gh issue cr", message)); + stream.Push(new ToolCallEndEvent(1, (ToolCallContent)message.Content[1], message)); + stream.Push(new DoneEvent(reason, message)); + stream.End(message); + return stream; + } + + /// + /// A truncated () terminal that surfaced a tool call must + /// have the tool call stripped from the accumulated message while text and finish reason + /// are preserved. + /// + [Fact] + public async Task AccumulateAsync_LengthTerminalWithToolCall_StripsToolCallButKeepsTextAndReason() + { + var contextMessages = new List { new BotNexus.Agent.Core.Types.UserMessage("prompt") }; + + var result = await StreamAccumulator.AccumulateAsync( + BuildStream(StopReason.Length), + _ => Task.CompletedTask, + CancellationToken.None, + contextMessages); + + result.FinishReason.ShouldBe(StopReason.Length); + result.Content.ShouldContain("partial narration"); + result.ToolCalls.ShouldBeNull(); + (result.ContentBlocks ?? []).OfType().ShouldBeEmpty(); + (result.ContentBlocks ?? []).OfType().ShouldNotBeEmpty(); + + // The message recorded into the timeline is the stripped final, not the raw partial. + var recorded = contextMessages[^1].ShouldBeOfType(); + recorded.ToolCalls.ShouldBeNull(); + (recorded.ContentBlocks ?? []).OfType().ShouldBeEmpty(); + } + + /// + /// A content-filter terminal () is treated the same way + /// -- any surfaced tool call is non-executable and stripped. + /// + [Fact] + public async Task AccumulateAsync_SensitiveTerminalWithToolCall_StripsToolCall() + { + var result = await StreamAccumulator.AccumulateAsync( + BuildStream(StopReason.Sensitive), + _ => Task.CompletedTask, + CancellationToken.None); + + result.FinishReason.ShouldBe(StopReason.Sensitive); + result.ToolCalls.ShouldBeNull(); + (result.ContentBlocks ?? []).OfType().ShouldBeEmpty(); + } + + /// + /// A legitimate terminal keeps its tool call intact -- + /// the strip must not regress real tool turns. + /// + [Fact] + public async Task AccumulateAsync_ToolUseTerminal_RetainsToolCall() + { + var result = await StreamAccumulator.AccumulateAsync( + BuildStream(StopReason.ToolUse), + _ => Task.CompletedTask, + CancellationToken.None); + + result.FinishReason.ShouldBe(StopReason.ToolUse); + result.ToolCalls.ShouldNotBeNull(); + result.ToolCalls!.ShouldHaveSingleItem(); + (result.ContentBlocks ?? []).OfType().ShouldHaveSingleItem(); + } +}