Add Unicode surrogate sanitization for all providers

Fixes issue where unpaired Unicode surrogates in tool results cause JSON serialization errors in API providers, particularly Anthropic. - Add sanitizeSurrogates() utility function to remove unpaired surrogates - Apply sanitization in all provider convertMessages() functions: - User message text content (string and text blocks) - Assistant message text and thinking blocks - Tool result output - System prompts - Valid emoji (properly paired surrogates) are preserved - Add comprehensive test suite covering all 8 providers Previously only Google and Groq handled unpaired surrogates correctly. Now all providers (Anthropic, OpenAI Completions/Responses, Google, xAI, Groq, Cerebras, zAI) sanitize text before API submission.
2026-04-20 17:02:11 +00:00 · 2025-10-13 14:26:54 +02:00 · 2025-10-13 14:26:54 +02:00 · 4e7a340460
commit 4e7a340460
parent 949cd4efd8
6 changed files with 420 additions and 24 deletions
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -22,6 +22,7 @@ import type {
 	ToolCall,
 } from "../types.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
 import { validateToolArguments } from "../utils/validation.js";
 import { transformMessages } from "./transorm-messages.js";

@ -278,7 +279,7 @@ function buildParams(

 	const config: GenerateContentConfig = {
 		...(Object.keys(generationConfig).length > 0 && generationConfig),
-		...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
+		...(context.systemPrompt && { systemInstruction: sanitizeSurrogates(context.systemPrompt) }),
 		...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools) }),
 	};

@ -323,12 +324,12 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
 			if (typeof msg.content === "string") {
 				contents.push({
 					role: "user",
-					parts: [{ text: msg.content }],
+					parts: [{ text: sanitizeSurrogates(msg.content) }],
 				});
 			} else {
 				const parts: Part[] = msg.content.map((item) => {
 					if (item.type === "text") {
-						return { text: item.text };
+						return { text: sanitizeSurrogates(item.text) };
 					} else {
 						return {
 							inlineData: {
@ -350,12 +351,12 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)

 			for (const block of msg.content) {
 				if (block.type === "text") {
-					parts.push({ text: block.text });
+					parts.push({ text: sanitizeSurrogates(block.text) });
 				} else if (block.type === "thinking") {
 					const thinkingPart: Part = {
 						thought: true,
 						thoughtSignature: block.thinkingSignature,
-						text: block.thinking,
+						text: sanitizeSurrogates(block.thinking),
 					};
 					parts.push(thinkingPart);
 				} else if (block.type === "toolCall") {
@ -383,7 +384,7 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
 							id: msg.toolCallId,
 							name: msg.toolName,
 							response: {
-								result: msg.output,
+								result: sanitizeSurrogates(msg.output),
 								isError: msg.isError,
 							},
 						},