co-mono/packages/ai/test/unicode-surrogate.test.ts
Mario Zechner 99b4b1aca0 Add Mistral as AI provider
- Add Mistral to KnownProvider type and model generation
- Implement Mistral-specific compat handling in openai-completions:
  - requiresToolResultName: tool results need name field
  - requiresAssistantAfterToolResult: synthetic assistant message between tool/user
  - requiresThinkingAsText: thinking blocks as <thinking> text
  - requiresMistralToolIds: tool IDs must be exactly 9 alphanumeric chars
- Add MISTRAL_API_KEY environment variable support
- Add Mistral tests across all test files
- Update documentation (README, CHANGELOG) for both ai and coding-agent packages
- Remove client IDs from gemini.md, reference upstream source instead

Closes #165
2025-12-10 20:36:19 +01:00

408 lines
11 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
/**
* Test for Unicode surrogate pair handling in tool results.
*
* Issue: When tool results contain emoji or other characters outside the Basic Multilingual Plane,
* they may be incorrectly serialized as unpaired surrogates, causing "no low surrogate in string"
* errors when sent to the API provider.
*
* Example error from Anthropic:
* "The request body is not valid JSON: no low surrogate in string: line 1 column 197667"
*/
async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
// Simulate a tool that returns emoji
const context: Context = {
systemPrompt: "You are a helpful assistant.",
messages: [
{
role: "user",
content: "Use the test tool",
timestamp: Date.now(),
},
{
role: "assistant",
content: [
{
type: "toolCall",
id: "test_1",
name: "test_tool",
arguments: {},
},
],
api: llm.api,
provider: llm.provider,
model: llm.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
timestamp: Date.now(),
},
],
tools: [
{
name: "test_tool",
description: "A test tool",
parameters: {} as any,
},
],
};
// Add tool result with various problematic Unicode characters
const toolResult: ToolResultMessage = {
role: "toolResult",
toolCallId: "test_1",
toolName: "test_tool",
content: [
{
type: "text",
text: `Test with emoji 🙈 and other characters:
- Monkey emoji: 🙈
- Thumbs up: 👍
- Heart: ❤️
- Thinking face: 🤔
- Rocket: 🚀
- Mixed text: Mario Zechner wann? Wo? Bin grad äußersr eventuninformiert 🙈
- Japanese: こんにちは
- Chinese: 你好
- Mathematical symbols: ∑∫∂√
- Special quotes: "curly" 'quotes'`,
},
],
isError: false,
timestamp: Date.now(),
};
context.messages.push(toolResult);
// Add follow-up user message
context.messages.push({
role: "user",
content: "Summarize the tool result briefly.",
timestamp: Date.now(),
});
// This should not throw a surrogate pair error
const response = await complete(llm, context, options);
expect(response.stopReason).not.toBe("error");
expect(response.errorMessage).toBeFalsy();
expect(response.content.length).toBeGreaterThan(0);
}
async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
const context: Context = {
systemPrompt: "You are a helpful assistant.",
messages: [
{
role: "user",
content: "Use the linkedin tool to get comments",
timestamp: Date.now(),
},
{
role: "assistant",
content: [
{
type: "toolCall",
id: "linkedin_1",
name: "linkedin_skill",
arguments: {},
},
],
api: llm.api,
provider: llm.provider,
model: llm.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
timestamp: Date.now(),
},
],
tools: [
{
name: "linkedin_skill",
description: "Get LinkedIn comments",
parameters: {} as any,
},
],
};
// Real-world tool result from LinkedIn with emoji
const toolResult: ToolResultMessage = {
role: "toolResult",
toolCallId: "linkedin_1",
toolName: "linkedin_skill",
content: [
{
type: "text",
text: `Post: Hab einen "Generative KI für Nicht-Techniker" Workshop gebaut.
Unanswered Comments: 2
=> {
"comments": [
{
"author": "Matthias Neumayer's graphic link",
"text": "Leider nehmen das viel zu wenige Leute ernst"
},
{
"author": "Matthias Neumayer's graphic link",
"text": "Mario Zechner wann? Wo? Bin grad äußersr eventuninformiert 🙈"
}
]
}`,
},
],
isError: false,
timestamp: Date.now(),
};
context.messages.push(toolResult);
context.messages.push({
role: "user",
content: "How many comments are there?",
timestamp: Date.now(),
});
// This should not throw a surrogate pair error
const response = await complete(llm, context, options);
expect(response.stopReason).not.toBe("error");
expect(response.errorMessage).toBeFalsy();
expect(response.content.some((b) => b.type === "text")).toBe(true);
}
async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
const context: Context = {
systemPrompt: "You are a helpful assistant.",
messages: [
{
role: "user",
content: "Use the test tool",
timestamp: Date.now(),
},
{
role: "assistant",
content: [
{
type: "toolCall",
id: "test_2",
name: "test_tool",
arguments: {},
},
],
api: llm.api,
provider: llm.provider,
model: llm.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
timestamp: Date.now(),
},
],
tools: [
{
name: "test_tool",
description: "A test tool",
parameters: {} as any,
},
],
};
// Construct a string with an intentionally unpaired high surrogate
// This simulates what might happen if text processing corrupts emoji
const unpairedSurrogate = String.fromCharCode(0xd83d); // High surrogate without low surrogate
const toolResult: ToolResultMessage = {
role: "toolResult",
toolCallId: "test_2",
toolName: "test_tool",
content: [{ type: "text", text: `Text with unpaired surrogate: ${unpairedSurrogate} <- should be sanitized` }],
isError: false,
timestamp: Date.now(),
};
context.messages.push(toolResult);
context.messages.push({
role: "user",
content: "What did the tool return?",
timestamp: Date.now(),
});
// This should not throw a surrogate pair error
// The unpaired surrogate should be sanitized before sending to API
const response = await complete(llm, context, options);
expect(response.stopReason).not.toBe("error");
expect(response.errorMessage).toBeFalsy();
expect(response.content.length).toBeGreaterThan(0);
}
describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Unicode Handling", () => {
const llm = getModel("google", "gemini-2.5-flash");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Unicode Handling", () => {
const llm = getModel("openai", "gpt-4o-mini");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Unicode Handling", () => {
const llm = getModel("openai", "gpt-5-mini");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Unicode Handling", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => {
const llm = getModel("xai", "grok-3");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Unicode Handling", () => {
const llm = getModel("groq", "openai/gpt-oss-20b");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Unicode Handling", () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => {
const llm = getModel("zai", "glm-4.5-air");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Unicode Handling", () => {
const llm = getModel("mistral", "devstral-medium-latest");
it("should handle emoji in tool results", async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
await testUnpairedHighSurrogate(llm);
});
});
});