diff --git a/.github/pydantic-ai-version.txt b/.github/pydantic-ai-version.txt index f634271..475dce1 100644 --- a/.github/pydantic-ai-version.txt +++ b/.github/pydantic-ai-version.txt @@ -1 +1 @@ -1.87.0 +1.106.0 diff --git a/packages/sdk/docs/reference/features.mdx b/packages/sdk/docs/reference/features.mdx index c329f47..47daac2 100644 --- a/packages/sdk/docs/reference/features.mdx +++ b/packages/sdk/docs/reference/features.mdx @@ -290,7 +290,7 @@ Vibes is designed to stay current with Pydantic AI - an AI agent automatically | Audio / video input | Audio and video as tool parameters | ✅ | [Multi-Modal](/advanced/multimodal) | `BinaryContent` with audio/video MIME types; `isAudioContent()` type guard | | Document input | PDFs and documents as tool parameters | ✅ | [Multi-Modal](/advanced/multimodal) | `BinaryContent` with `application/pdf` etc.; `isDocumentContent()` guard | | `UploadedFile` | File reference for provider file uploads | ✅ | [Multi-Modal](/advanced/multimodal) | `UploadedFile` type + `uploadedFileSchema` + `uploadedFileToToolResult()` | -| `BinaryImage` output | Agent returns a generated image | ✅ | [Multi-Modal](/advanced/multimodal) | `outputSchema: BINARY_IMAGE_OUTPUT` - agent returns `BinaryContent` when a tool produces an `image/*` result | +| `BinaryImage` output | Agent returns a generated image | ✅ | [Multi-Modal](/advanced/multimodal) | `outputSchema: BINARY_IMAGE_OUTPUT` - agent returns `BinaryContent` when a tool produces an `image/*` result (supports base64 and non-base64 data URIs) | --- @@ -304,4 +304,4 @@ Vibes is designed to stay current with Pydantic AI - an AI agent automatically | `ReinjectSystemPrompt` | Capability that prepends agent's system prompt when missing from history (v1.86.0) | ❌ | - | Not applicable — vibes never stores system prompts in message history; they are always passed as the `system` field to every `generateText`/`streamText` call, so re-injection is implicit | | `UIAdapter.manage_system_prompt` | `'server'` (default) strips frontend system prompts and reinjects agent's own (v1.86.0) | ❌ | - | Not applicable — vibes AG-UI adapter does not store or forward system prompts via message history | | `HandleDeferredToolCalls` | Auto-resolve deferred tool calls inline (v1.87.0) | ✅ | - | `deferredToolHandler` on `AgentOptions`/`RunOptions`; return `DeferredToolResults` to auto-approve or `null` to fall back to `ApprovalRequiredError` | -| `ProcessEventStream` | Observe or transform the agent event stream (v1.87.0) | ✅ | - | `eventStreamHandler` on `AgentOptions`/`RunOptions`; observer form (async fn → void) or processor form (async generator → iterable); applies to `runStreamEvents()` | +| `ProcessEventStream` | Observe or transform the agent event stream (v1.87.0) | ✅ | - | `eventStreamHandler` on `AgentOptions`/`RunOptions`; observer form (async fn → void) or processor form (async generator → iterable); applies to `runStreamEvents()` and preserves downstream completion even when observer handlers do not consume events | diff --git a/packages/sdk/lib/multimodal/binary_content.ts b/packages/sdk/lib/multimodal/binary_content.ts index 7299195..01883e1 100644 --- a/packages/sdk/lib/multimodal/binary_content.ts +++ b/packages/sdk/lib/multimodal/binary_content.ts @@ -239,11 +239,23 @@ export function extractBinaryImageFromToolOutput( } const commaIdx = image.indexOf(","); if (commaIdx === -1) return null; - const base64 = image.slice(commaIdx + 1); - const binaryString = atob(base64); - const data = new Uint8Array(binaryString.length); - for (let i = 0; i < binaryString.length; i++) { - data[i] = binaryString.charCodeAt(i); + const metadata = image.slice(0, commaIdx).toLowerCase(); + const encodedData = image.slice(commaIdx + 1); + + try { + if (metadata.includes(";base64")) { + const binaryString = atob(encodedData); + const data = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + data[i] = binaryString.charCodeAt(i); + } + return { type: "binary", mimeType, data }; + } + + const decoded = decodeURIComponent(encodedData); + const data = new TextEncoder().encode(decoded); + return { type: "binary", mimeType, data }; + } catch { + return null; } - return { type: "binary", mimeType, data }; } diff --git a/packages/sdk/tests/binary_image_output_test.ts b/packages/sdk/tests/binary_image_output_test.ts index 82c183c..61fb5d4 100644 --- a/packages/sdk/tests/binary_image_output_test.ts +++ b/packages/sdk/tests/binary_image_output_test.ts @@ -73,6 +73,31 @@ Deno.test("extractBinaryImageFromToolOutput - returns null when image field miss ); }); +Deno.test("extractBinaryImageFromToolOutput - supports valid non-base64 data URI", () => { + const extracted = extractBinaryImageFromToolOutput({ + type: "image", + image: "data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%3E%3C/svg%3E", + mimeType: "image/svg+xml", + }); + assertEquals(extracted !== null, true); + assertEquals(extracted?.mimeType, "image/svg+xml"); + assertEquals( + extracted?.data, + new TextEncoder().encode(''), + ); +}); + +Deno.test("extractBinaryImageFromToolOutput - returns null for invalid base64 data URI", () => { + assertEquals( + extractBinaryImageFromToolOutput({ + type: "image", + image: "data:image/png;base64,***not-base64***", + mimeType: "image/png", + }), + null, + ); +}); + // --------------------------------------------------------------------------- // Integration tests: Agent with BINARY_IMAGE_OUTPUT sentinel // --------------------------------------------------------------------------- diff --git a/packages/sdk/tests/event_stream_test.ts b/packages/sdk/tests/event_stream_test.ts index 9415e9a..aec8423 100644 --- a/packages/sdk/tests/event_stream_test.ts +++ b/packages/sdk/tests/event_stream_test.ts @@ -394,6 +394,28 @@ Deno.test("eventStreamHandler - observer form: side-channel receives all events" assertEquals(downstreamKinds.includes("final-result"), true); }); +Deno.test("eventStreamHandler - observer form: downstream completes even if handler does not consume", async () => { + const model = new MockLanguageModelV3({ + doStream: () => Promise.resolve(textStream("hello")), + }); + + let handlerCalls = 0; + + const agent = new Agent({ + model, + eventStreamHandler: () => { + handlerCalls += 1; + // Intentionally do not iterate the stream. + }, + }); + + const downstreamEvents = await collectEvents(agent.runStreamEvents("hi")); + const finalResult = downstreamEvents.find((e) => e.kind === "final-result"); + + assertExists(finalResult); + assertEquals(handlerCalls, 2); // Probe + real observer invocation +}); + Deno.test("eventStreamHandler - processor form: can filter events", async () => { const model = new MockLanguageModelV3({ doStream: () => Promise.resolve(textStream("hello world")),