mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-17 08:00:59 +00:00
Add Unicode surrogate sanitization for all providers
Fixes issue where unpaired Unicode surrogates in tool results cause JSON serialization errors in API providers, particularly Anthropic. - Add sanitizeSurrogates() utility function to remove unpaired surrogates - Apply sanitization in all provider convertMessages() functions: - User message text content (string and text blocks) - Assistant message text and thinking blocks - Tool result output - System prompts - Valid emoji (properly paired surrogates) are preserved - Add comprehensive test suite covering all 8 providers Previously only Google and Groq handled unpaired surrogates correctly. Now all providers (Anthropic, OpenAI Completions/Responses, Google, xAI, Groq, Cerebras, zAI) sanitize text before API submission.
This commit is contained in:
parent
949cd4efd8
commit
4e7a340460
6 changed files with 420 additions and 24 deletions
|
|
@ -22,6 +22,7 @@ import type {
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { validateToolArguments } from "../utils/validation.js";
|
import { validateToolArguments } from "../utils/validation.js";
|
||||||
import { transformMessages } from "./transorm-messages.js";
|
import { transformMessages } from "./transorm-messages.js";
|
||||||
|
|
||||||
|
|
@ -284,7 +285,7 @@ function buildParams(
|
||||||
if (context.systemPrompt) {
|
if (context.systemPrompt) {
|
||||||
params.system.push({
|
params.system.push({
|
||||||
type: "text",
|
type: "text",
|
||||||
text: context.systemPrompt,
|
text: sanitizeSurrogates(context.systemPrompt),
|
||||||
cache_control: {
|
cache_control: {
|
||||||
type: "ephemeral",
|
type: "ephemeral",
|
||||||
},
|
},
|
||||||
|
|
@ -295,7 +296,7 @@ function buildParams(
|
||||||
params.system = [
|
params.system = [
|
||||||
{
|
{
|
||||||
type: "text",
|
type: "text",
|
||||||
text: context.systemPrompt,
|
text: sanitizeSurrogates(context.systemPrompt),
|
||||||
cache_control: {
|
cache_control: {
|
||||||
type: "ephemeral",
|
type: "ephemeral",
|
||||||
},
|
},
|
||||||
|
|
@ -349,7 +350,7 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
||||||
if (msg.content.trim().length > 0) {
|
if (msg.content.trim().length > 0) {
|
||||||
params.push({
|
params.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
content: msg.content,
|
content: sanitizeSurrogates(msg.content),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -357,7 +358,7 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
||||||
if (item.type === "text") {
|
if (item.type === "text") {
|
||||||
return {
|
return {
|
||||||
type: "text",
|
type: "text",
|
||||||
text: item.text,
|
text: sanitizeSurrogates(item.text),
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
return {
|
return {
|
||||||
|
|
@ -391,13 +392,13 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
||||||
if (block.text.trim().length === 0) continue;
|
if (block.text.trim().length === 0) continue;
|
||||||
blocks.push({
|
blocks.push({
|
||||||
type: "text",
|
type: "text",
|
||||||
text: block.text,
|
text: sanitizeSurrogates(block.text),
|
||||||
});
|
});
|
||||||
} else if (block.type === "thinking") {
|
} else if (block.type === "thinking") {
|
||||||
if (block.thinking.trim().length === 0) continue;
|
if (block.thinking.trim().length === 0) continue;
|
||||||
blocks.push({
|
blocks.push({
|
||||||
type: "thinking",
|
type: "thinking",
|
||||||
thinking: block.thinking,
|
thinking: sanitizeSurrogates(block.thinking),
|
||||||
signature: block.thinkingSignature || "",
|
signature: block.thinkingSignature || "",
|
||||||
});
|
});
|
||||||
} else if (block.type === "toolCall") {
|
} else if (block.type === "toolCall") {
|
||||||
|
|
@ -422,7 +423,7 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
||||||
toolResults.push({
|
toolResults.push({
|
||||||
type: "tool_result",
|
type: "tool_result",
|
||||||
tool_use_id: sanitizeToolCallId(msg.toolCallId),
|
tool_use_id: sanitizeToolCallId(msg.toolCallId),
|
||||||
content: msg.output,
|
content: sanitizeSurrogates(msg.output),
|
||||||
is_error: msg.isError,
|
is_error: msg.isError,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -433,7 +434,7 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
||||||
toolResults.push({
|
toolResults.push({
|
||||||
type: "tool_result",
|
type: "tool_result",
|
||||||
tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
|
tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
|
||||||
content: nextMsg.output,
|
content: sanitizeSurrogates(nextMsg.output),
|
||||||
is_error: nextMsg.isError,
|
is_error: nextMsg.isError,
|
||||||
});
|
});
|
||||||
j++;
|
j++;
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ import type {
|
||||||
ToolCall,
|
ToolCall,
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { validateToolArguments } from "../utils/validation.js";
|
import { validateToolArguments } from "../utils/validation.js";
|
||||||
import { transformMessages } from "./transorm-messages.js";
|
import { transformMessages } from "./transorm-messages.js";
|
||||||
|
|
||||||
|
|
@ -278,7 +279,7 @@ function buildParams(
|
||||||
|
|
||||||
const config: GenerateContentConfig = {
|
const config: GenerateContentConfig = {
|
||||||
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
||||||
...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
|
...(context.systemPrompt && { systemInstruction: sanitizeSurrogates(context.systemPrompt) }),
|
||||||
...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools) }),
|
...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools) }),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -323,12 +324,12 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
|
||||||
if (typeof msg.content === "string") {
|
if (typeof msg.content === "string") {
|
||||||
contents.push({
|
contents.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
parts: [{ text: msg.content }],
|
parts: [{ text: sanitizeSurrogates(msg.content) }],
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
const parts: Part[] = msg.content.map((item) => {
|
const parts: Part[] = msg.content.map((item) => {
|
||||||
if (item.type === "text") {
|
if (item.type === "text") {
|
||||||
return { text: item.text };
|
return { text: sanitizeSurrogates(item.text) };
|
||||||
} else {
|
} else {
|
||||||
return {
|
return {
|
||||||
inlineData: {
|
inlineData: {
|
||||||
|
|
@ -350,12 +351,12 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
|
||||||
|
|
||||||
for (const block of msg.content) {
|
for (const block of msg.content) {
|
||||||
if (block.type === "text") {
|
if (block.type === "text") {
|
||||||
parts.push({ text: block.text });
|
parts.push({ text: sanitizeSurrogates(block.text) });
|
||||||
} else if (block.type === "thinking") {
|
} else if (block.type === "thinking") {
|
||||||
const thinkingPart: Part = {
|
const thinkingPart: Part = {
|
||||||
thought: true,
|
thought: true,
|
||||||
thoughtSignature: block.thinkingSignature,
|
thoughtSignature: block.thinkingSignature,
|
||||||
text: block.thinking,
|
text: sanitizeSurrogates(block.thinking),
|
||||||
};
|
};
|
||||||
parts.push(thinkingPart);
|
parts.push(thinkingPart);
|
||||||
} else if (block.type === "toolCall") {
|
} else if (block.type === "toolCall") {
|
||||||
|
|
@ -383,7 +384,7 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
|
||||||
id: msg.toolCallId,
|
id: msg.toolCallId,
|
||||||
name: msg.toolName,
|
name: msg.toolName,
|
||||||
response: {
|
response: {
|
||||||
result: msg.output,
|
result: sanitizeSurrogates(msg.output),
|
||||||
isError: msg.isError,
|
isError: msg.isError,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ import type {
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { validateToolArguments } from "../utils/validation.js";
|
import { validateToolArguments } from "../utils/validation.js";
|
||||||
import { transformMessages } from "./transorm-messages.js";
|
import { transformMessages } from "./transorm-messages.js";
|
||||||
|
|
||||||
|
|
@ -310,7 +311,7 @@ function convertMessages(model: Model<"openai-completions">, context: Context):
|
||||||
const useDeveloperRole =
|
const useDeveloperRole =
|
||||||
model.reasoning && !model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai");
|
model.reasoning && !model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai");
|
||||||
const role = useDeveloperRole ? "developer" : "system";
|
const role = useDeveloperRole ? "developer" : "system";
|
||||||
params.push({ role: role, content: context.systemPrompt });
|
params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const msg of transformedMessages) {
|
for (const msg of transformedMessages) {
|
||||||
|
|
@ -318,14 +319,14 @@ function convertMessages(model: Model<"openai-completions">, context: Context):
|
||||||
if (typeof msg.content === "string") {
|
if (typeof msg.content === "string") {
|
||||||
params.push({
|
params.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
content: msg.content,
|
content: sanitizeSurrogates(msg.content),
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
|
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
|
||||||
if (item.type === "text") {
|
if (item.type === "text") {
|
||||||
return {
|
return {
|
||||||
type: "text",
|
type: "text",
|
||||||
text: item.text,
|
text: sanitizeSurrogates(item.text),
|
||||||
} satisfies ChatCompletionContentPartText;
|
} satisfies ChatCompletionContentPartText;
|
||||||
} else {
|
} else {
|
||||||
return {
|
return {
|
||||||
|
|
@ -354,7 +355,7 @@ function convertMessages(model: Model<"openai-completions">, context: Context):
|
||||||
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
|
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
|
||||||
if (textBlocks.length > 0) {
|
if (textBlocks.length > 0) {
|
||||||
assistantMsg.content = textBlocks.map((b) => {
|
assistantMsg.content = textBlocks.map((b) => {
|
||||||
return { type: "text", text: b.text };
|
return { type: "text", text: sanitizeSurrogates(b.text) };
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -386,7 +387,7 @@ function convertMessages(model: Model<"openai-completions">, context: Context):
|
||||||
} else if (msg.role === "toolResult") {
|
} else if (msg.role === "toolResult") {
|
||||||
params.push({
|
params.push({
|
||||||
role: "tool",
|
role: "tool",
|
||||||
content: msg.output,
|
content: sanitizeSurrogates(msg.output),
|
||||||
tool_call_id: msg.toolCallId,
|
tool_call_id: msg.toolCallId,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ import type {
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { validateToolArguments } from "../utils/validation.js";
|
import { validateToolArguments } from "../utils/validation.js";
|
||||||
import { transformMessages } from "./transorm-messages.js";
|
import { transformMessages } from "./transorm-messages.js";
|
||||||
|
|
||||||
|
|
@ -364,7 +365,7 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
|
||||||
const role = model.reasoning ? "developer" : "system";
|
const role = model.reasoning ? "developer" : "system";
|
||||||
messages.push({
|
messages.push({
|
||||||
role,
|
role,
|
||||||
content: context.systemPrompt,
|
content: sanitizeSurrogates(context.systemPrompt),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -373,14 +374,14 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
|
||||||
if (typeof msg.content === "string") {
|
if (typeof msg.content === "string") {
|
||||||
messages.push({
|
messages.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
content: [{ type: "input_text", text: msg.content }],
|
content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
|
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
|
||||||
if (item.type === "text") {
|
if (item.type === "text") {
|
||||||
return {
|
return {
|
||||||
type: "input_text",
|
type: "input_text",
|
||||||
text: item.text,
|
text: sanitizeSurrogates(item.text),
|
||||||
} satisfies ResponseInputText;
|
} satisfies ResponseInputText;
|
||||||
} else {
|
} else {
|
||||||
return {
|
return {
|
||||||
|
|
@ -414,7 +415,7 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
|
||||||
output.push({
|
output.push({
|
||||||
type: "message",
|
type: "message",
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
|
content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
|
||||||
status: "completed",
|
status: "completed",
|
||||||
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
|
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
|
||||||
} satisfies ResponseOutputMessage);
|
} satisfies ResponseOutputMessage);
|
||||||
|
|
@ -436,7 +437,7 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
|
||||||
messages.push({
|
messages.push({
|
||||||
type: "function_call_output",
|
type: "function_call_output",
|
||||||
call_id: msg.toolCallId.split("|")[0],
|
call_id: msg.toolCallId.split("|")[0],
|
||||||
output: msg.output,
|
output: sanitizeSurrogates(msg.output),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
25
packages/ai/src/utils/sanitize-unicode.ts
Normal file
25
packages/ai/src/utils/sanitize-unicode.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
/**
|
||||||
|
* Removes unpaired Unicode surrogate characters from a string.
|
||||||
|
*
|
||||||
|
* Unpaired surrogates (high surrogates 0xD800-0xDBFF without matching low surrogates 0xDC00-0xDFFF,
|
||||||
|
* or vice versa) cause JSON serialization errors in many API providers.
|
||||||
|
*
|
||||||
|
* Valid emoji and other characters outside the Basic Multilingual Plane use properly paired
|
||||||
|
* surrogates and will NOT be affected by this function.
|
||||||
|
*
|
||||||
|
* @param text - The text to sanitize
|
||||||
|
* @returns The sanitized text with unpaired surrogates removed
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Valid emoji (properly paired surrogates) are preserved
|
||||||
|
* sanitizeSurrogates("Hello 🙈 World") // => "Hello 🙈 World"
|
||||||
|
*
|
||||||
|
* // Unpaired high surrogate is removed
|
||||||
|
* const unpaired = String.fromCharCode(0xD83D); // high surrogate without low
|
||||||
|
* sanitizeSurrogates(`Text ${unpaired} here`) // => "Text here"
|
||||||
|
*/
|
||||||
|
export function sanitizeSurrogates(text: string): string {
|
||||||
|
// Replace unpaired high surrogates (0xD800-0xDBFF not followed by low surrogate)
|
||||||
|
// Replace unpaired low surrogates (0xDC00-0xDFFF not preceded by high surrogate)
|
||||||
|
return text.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, "");
|
||||||
|
}
|
||||||
367
packages/ai/test/unicode-surrogate.test.ts
Normal file
367
packages/ai/test/unicode-surrogate.test.ts
Normal file
|
|
@ -0,0 +1,367 @@
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import { getModel } from "../src/models.js";
|
||||||
|
import { complete } from "../src/stream.js";
|
||||||
|
import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for Unicode surrogate pair handling in tool results.
|
||||||
|
*
|
||||||
|
* Issue: When tool results contain emoji or other characters outside the Basic Multilingual Plane,
|
||||||
|
* they may be incorrectly serialized as unpaired surrogates, causing "no low surrogate in string"
|
||||||
|
* errors when sent to the API provider.
|
||||||
|
*
|
||||||
|
* Example error from Anthropic:
|
||||||
|
* "The request body is not valid JSON: no low surrogate in string: line 1 column 197667"
|
||||||
|
*/
|
||||||
|
|
||||||
|
async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||||
|
// Simulate a tool that returns emoji
|
||||||
|
const context: Context = {
|
||||||
|
systemPrompt: "You are a helpful assistant.",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: "Use the test tool",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "toolCall",
|
||||||
|
id: "test_1",
|
||||||
|
name: "test_tool",
|
||||||
|
arguments: {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
api: llm.api,
|
||||||
|
provider: llm.provider,
|
||||||
|
model: llm.id,
|
||||||
|
usage: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
|
},
|
||||||
|
stopReason: "toolUse",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
name: "test_tool",
|
||||||
|
description: "A test tool",
|
||||||
|
parameters: {} as any,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add tool result with various problematic Unicode characters
|
||||||
|
const toolResult: ToolResultMessage = {
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "test_1",
|
||||||
|
toolName: "test_tool",
|
||||||
|
output: `Test with emoji 🙈 and other characters:
|
||||||
|
- Monkey emoji: 🙈
|
||||||
|
- Thumbs up: 👍
|
||||||
|
- Heart: ❤️
|
||||||
|
- Thinking face: 🤔
|
||||||
|
- Rocket: 🚀
|
||||||
|
- Mixed text: Mario Zechner wann? Wo? Bin grad äußersr eventuninformiert 🙈
|
||||||
|
- Japanese: こんにちは
|
||||||
|
- Chinese: 你好
|
||||||
|
- Mathematical symbols: ∑∫∂√
|
||||||
|
- Special quotes: "curly" 'quotes'`,
|
||||||
|
isError: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
context.messages.push(toolResult);
|
||||||
|
|
||||||
|
// Add follow-up user message
|
||||||
|
context.messages.push({
|
||||||
|
role: "user",
|
||||||
|
content: "Summarize the tool result briefly.",
|
||||||
|
});
|
||||||
|
|
||||||
|
// This should not throw a surrogate pair error
|
||||||
|
const response = await complete(llm, context, options);
|
||||||
|
|
||||||
|
expect(response.stopReason).not.toBe("error");
|
||||||
|
expect(response.errorMessage).toBeFalsy();
|
||||||
|
expect(response.content.length).toBeGreaterThan(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||||
|
const context: Context = {
|
||||||
|
systemPrompt: "You are a helpful assistant.",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: "Use the linkedin tool to get comments",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "toolCall",
|
||||||
|
id: "linkedin_1",
|
||||||
|
name: "linkedin_skill",
|
||||||
|
arguments: {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
api: llm.api,
|
||||||
|
provider: llm.provider,
|
||||||
|
model: llm.id,
|
||||||
|
usage: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
|
},
|
||||||
|
stopReason: "toolUse",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
name: "linkedin_skill",
|
||||||
|
description: "Get LinkedIn comments",
|
||||||
|
parameters: {} as any,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Real-world tool result from LinkedIn with emoji
|
||||||
|
const toolResult: ToolResultMessage = {
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "linkedin_1",
|
||||||
|
toolName: "linkedin_skill",
|
||||||
|
output: `Post: Hab einen "Generative KI für Nicht-Techniker" Workshop gebaut.
|
||||||
|
Unanswered Comments: 2
|
||||||
|
|
||||||
|
=> {
|
||||||
|
"comments": [
|
||||||
|
{
|
||||||
|
"author": "Matthias Neumayer's graphic link",
|
||||||
|
"text": "Leider nehmen das viel zu wenige Leute ernst"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"author": "Matthias Neumayer's graphic link",
|
||||||
|
"text": "Mario Zechner wann? Wo? Bin grad äußersr eventuninformiert 🙈"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`,
|
||||||
|
isError: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
context.messages.push(toolResult);
|
||||||
|
|
||||||
|
context.messages.push({
|
||||||
|
role: "user",
|
||||||
|
content: "How many comments are there?",
|
||||||
|
});
|
||||||
|
|
||||||
|
// This should not throw a surrogate pair error
|
||||||
|
const response = await complete(llm, context, options);
|
||||||
|
|
||||||
|
expect(response.stopReason).not.toBe("error");
|
||||||
|
expect(response.errorMessage).toBeFalsy();
|
||||||
|
expect(response.content.some((b) => b.type === "text")).toBe(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||||
|
const context: Context = {
|
||||||
|
systemPrompt: "You are a helpful assistant.",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: "Use the test tool",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "toolCall",
|
||||||
|
id: "test_2",
|
||||||
|
name: "test_tool",
|
||||||
|
arguments: {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
api: llm.api,
|
||||||
|
provider: llm.provider,
|
||||||
|
model: llm.id,
|
||||||
|
usage: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
|
},
|
||||||
|
stopReason: "toolUse",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
name: "test_tool",
|
||||||
|
description: "A test tool",
|
||||||
|
parameters: {} as any,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Construct a string with an intentionally unpaired high surrogate
|
||||||
|
// This simulates what might happen if text processing corrupts emoji
|
||||||
|
const unpairedSurrogate = String.fromCharCode(0xd83d); // High surrogate without low surrogate
|
||||||
|
|
||||||
|
const toolResult: ToolResultMessage = {
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "test_2",
|
||||||
|
toolName: "test_tool",
|
||||||
|
output: `Text with unpaired surrogate: ${unpairedSurrogate} <- should be sanitized`,
|
||||||
|
isError: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
context.messages.push(toolResult);
|
||||||
|
|
||||||
|
context.messages.push({
|
||||||
|
role: "user",
|
||||||
|
content: "What did the tool return?",
|
||||||
|
});
|
||||||
|
|
||||||
|
// This should not throw a surrogate pair error
|
||||||
|
// The unpaired surrogate should be sanitized before sending to API
|
||||||
|
const response = await complete(llm, context, options);
|
||||||
|
|
||||||
|
expect(response.stopReason).not.toBe("error");
|
||||||
|
expect(response.errorMessage).toBeFalsy();
|
||||||
|
expect(response.content.length).toBeGreaterThan(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("AI Providers Unicode Surrogate Pair Tests", () => {
|
||||||
|
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("google", "gemini-2.5-flash");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("openai", "gpt-4o-mini");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("openai", "gpt-5-mini");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("xai", "grok-3");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("groq", "openai/gpt-oss-20b");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("cerebras", "gpt-oss-120b");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => {
|
||||||
|
const llm = getModel("zai", "glm-4.5-air");
|
||||||
|
|
||||||
|
it("should handle emoji in tool results", async () => {
|
||||||
|
await testEmojiInToolResults(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle real-world LinkedIn comment data with emoji", async () => {
|
||||||
|
await testRealWorldLinkedInData(llm);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
|
||||||
|
await testUnpairedHighSurrogate(llm);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue