diff --git a/packages/ai/src/providers/amazon-bedrock.ts b/packages/ai/src/providers/amazon-bedrock.ts
index 19dfd094..d0a3031f 100644
--- a/packages/ai/src/providers/amazon-bedrock.ts
+++ b/packages/ai/src/providers/amazon-bedrock.ts
@@ -88,14 +88,16 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
 				profile: options.profile,
 			});
 
-			const command = new ConverseStreamCommand({
+			const commandInput = {
 				modelId: model.id,
 				messages: convertMessages(context, model),
 				system: buildSystemPrompt(context.systemPrompt, model),
 				inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
 				toolConfig: convertToolConfig(context.tools, options.toolChoice),
 				additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
-			});
+			};
+			options?.onPayload?.(commandInput);
+			const command = new ConverseStreamCommand(commandInput);
 
 			const response = await client.send(command, { abortSignal: options.signal });
 
@@ -317,14 +319,14 @@ function buildSystemPrompt(
 	return blocks;
 }
 
-function sanitizeToolCallId(id: string): string {
+function normalizeToolCallId(id: string): string {
 	const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_");
 	return sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;
 }
 
 function convertMessages(context: Context, model: Model<"bedrock-converse-stream">): Message[] {
 	const result: Message[] = [];
-	const transformedMessages = transformMessages(context.messages, model);
+	const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
 
 	for (let i = 0; i < transformedMessages.length; i++) {
 		const m = transformedMessages[i];
@@ -364,7 +366,7 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
 							break;
 						case "toolCall":
 							contentBlocks.push({
-								toolUse: { toolUseId: sanitizeToolCallId(c.id), name: c.name, input: c.arguments },
+								toolUse: { toolUseId: c.id, name: c.name, input: c.arguments },
 							});
 							break;
 						case "thinking":
@@ -409,7 +411,7 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
 				// Add current tool result with all content blocks combined
 				toolResults.push({
 					toolResult: {
-						toolUseId: sanitizeToolCallId(m.toolCallId),
+						toolUseId: m.toolCallId,
 						content: m.content.map((c) =>
 							c.type === "image"
 								? { image: createImageBlock(c.mimeType, c.data) }
@@ -425,7 +427,7 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
 					const nextMsg = transformedMessages[j] as ToolResultMessage;
 					toolResults.push({
 						toolResult: {
-							toolUseId: sanitizeToolCallId(nextMsg.toolCallId),
+							toolUseId: nextMsg.toolCallId,
 							content: nextMsg.content.map((c) =>
 								c.type === "image"
 									? { image: createImageBlock(c.mimeType, c.data) }
diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts
index f39c6e6e..3ec88b11 100644
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@@ -156,6 +156,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
 			const { client, isOAuthToken } = createClient(model, apiKey, options?.interleavedThinking ?? true);
 			const params = buildParams(model, context, isOAuthToken, options);
+			options?.onPayload?.(params);
 			const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
 			stream.push({ type: "start", partial: output });
 
@@ -445,10 +446,9 @@ function buildParams(
 	return params;
 }
 
-// Sanitize tool call IDs to match Anthropic's required pattern: ^[a-zA-Z0-9_-]+$
-function sanitizeToolCallId(id: string): string {
-	// Replace any character that isn't alphanumeric, underscore, or hyphen with underscore
-	return id.replace(/[^a-zA-Z0-9_-]/g, "_");
+// Normalize tool call IDs to match Anthropic's required pattern and length
+function normalizeToolCallId(id: string): string {
+	return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
 }
 
 function convertMessages(
@@ -459,7 +459,7 @@ function convertMessages(
 	const params: MessageParam[] = [];
 
 	// Transform messages for cross-provider compatibility
-	const transformedMessages = transformMessages(messages, model);
+	const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
 
 	for (let i = 0; i < transformedMessages.length; i++) {
 		const msg = transformedMessages[i];
@@ -533,7 +533,7 @@ function convertMessages(
 				} else if (block.type === "toolCall") {
 					blocks.push({
 						type: "tool_use",
-						id: sanitizeToolCallId(block.id),
+						id: block.id,
 						name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
 						input: block.arguments,
 					});
@@ -551,7 +551,7 @@ function convertMessages(
 			// Add the current tool result
 			toolResults.push({
 				type: "tool_result",
-				tool_use_id: sanitizeToolCallId(msg.toolCallId),
+				tool_use_id: msg.toolCallId,
 				content: convertContentBlocks(msg.content),
 				is_error: msg.isError,
 			});
@@ -562,7 +562,7 @@ function convertMessages(
 				const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
 				toolResults.push({
 					type: "tool_result",
-					tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
+					tool_use_id: nextMsg.toolCallId,
 					content: convertContentBlocks(nextMsg.content),
 					is_error: nextMsg.isError,
 				});
diff --git a/packages/ai/src/providers/google-gemini-cli.ts b/packages/ai/src/providers/google-gemini-cli.ts
index 9aab1857..0957f904 100644
--- a/packages/ai/src/providers/google-gemini-cli.ts
+++ b/packages/ai/src/providers/google-gemini-cli.ts
@@ -4,7 +4,6 @@
  * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
  */
 
-import { createHash } from "node:crypto";
 import type { Content, ThinkingConfig } from "@google/genai";
 import { calculateCost } from "../models.js";
 import type {
@@ -426,6 +425,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 			const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
 
 			const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
+			options?.onPayload?.(requestBody);
 			const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
 
 			const requestHeaders = {
@@ -829,33 +829,6 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 	return stream;
 };
 
-function deriveSessionId(context: Context): string | undefined {
-	for (const message of context.messages) {
-		if (message.role !== "user") {
-			continue;
-		}
-
-		let text = "";
-		if (typeof message.content === "string") {
-			text = message.content;
-		} else if (Array.isArray(message.content)) {
-			text = message.content
-				.filter((item): item is TextContent => item.type === "text")
-				.map((item) => item.text)
-				.join("\n");
-		}
-
-		if (!text || text.trim().length === 0) {
-			return undefined;
-		}
-
-		const hash = createHash("sha256").update(text).digest("hex");
-		return hash.slice(0, 32);
-	}
-
-	return undefined;
-}
-
 export function buildRequest(
 	model: Model<"google-gemini-cli">,
 	context: Context,
@@ -891,10 +864,7 @@ export function buildRequest(
 		contents,
 	};
 
-	const sessionId = deriveSessionId(context);
-	if (sessionId) {
-		request.sessionId = sessionId;
-	}
+	request.sessionId = options.sessionId;
 
 	// System instruction must be object with parts, not plain string
 	if (context.systemPrompt) {
diff --git a/packages/ai/src/providers/google-shared.ts b/packages/ai/src/providers/google-shared.ts
index 583c0d97..a34e2bd4 100644
--- a/packages/ai/src/providers/google-shared.ts
+++ b/packages/ai/src/providers/google-shared.ts
@@ -59,10 +59,10 @@ function resolveThoughtSignature(isSameProviderAndModel: boolean, signature: str
 }
 
 /**
- * Claude models via Google APIs require explicit tool call IDs in function calls/responses.
+ * Models via Google APIs that require explicit tool call IDs in function calls/responses.
  */
 export function requiresToolCallId(modelId: string): boolean {
-	return modelId.startsWith("claude-");
+	return modelId.startsWith("claude-") || modelId.startsWith("gpt-oss-");
 }
 
 /**
@@ -70,7 +70,12 @@ export function requiresToolCallId(modelId: string): boolean {
  */
 export function convertMessages<T extends GoogleApiType>(model: Model<T>, context: Context): Content[] {
 	const contents: Content[] = [];
-	const transformedMessages = transformMessages(context.messages, model);
+	const normalizeToolCallId = (id: string): string => {
+		if (!requiresToolCallId(model.id)) return id;
+		return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+	};
+
+	const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
 
 	for (const msg of transformedMessages) {
 		if (msg.role === "user") {
diff --git a/packages/ai/src/providers/google-vertex.ts b/packages/ai/src/providers/google-vertex.ts
index 384e4ca3..6346db47 100644
--- a/packages/ai/src/providers/google-vertex.ts
+++ b/packages/ai/src/providers/google-vertex.ts
@@ -84,6 +84,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
 			const location = resolveLocation(options);
 			const client = createClient(model, project, location);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const googleStream = await client.models.generateContentStream(params);
 
 			stream.push({ type: "start", partial: output });
diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts
index 128dde7c..2ad8c00e 100644
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@@ -71,6 +71,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
 			const client = createClient(model, apiKey);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const googleStream = await client.models.generateContentStream(params);
 
 			stream.push({ type: "start", partial: output });
diff --git a/packages/ai/src/providers/openai-codex-responses.ts b/packages/ai/src/providers/openai-codex-responses.ts
index b2cc0de7..5c3f55f3 100644
--- a/packages/ai/src/providers/openai-codex-responses.ts
+++ b/packages/ai/src/providers/openai-codex-responses.ts
@@ -122,6 +122,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 
 			const accountId = extractAccountId(apiKey);
 			const body = buildRequestBody(model, context, options);
+			options?.onPayload?.(body);
 			const headers = buildHeaders(model.headers, accountId, apiKey, options?.sessionId);
 			const bodyJson = JSON.stringify(body);
 
@@ -267,7 +268,23 @@ function clampReasoningEffort(modelId: string, effort: string): string {
 
 function convertMessages(model: Model<"openai-codex-responses">, context: Context): unknown[] {
 	const messages: unknown[] = [];
-	const transformed = transformMessages(context.messages, model);
+	const normalizeToolCallId = (id: string): string => {
+		const allowedProviders = new Set(["openai", "openai-codex", "opencode"]);
+		if (!allowedProviders.has(model.provider)) return id;
+		if (!id.includes("|")) return id;
+		const [callId, itemId] = id.split("|");
+		const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		// OpenAI Codex Responses API requires item id to start with "fc"
+		if (!sanitizedItemId.startsWith("fc")) {
+			sanitizedItemId = `fc_${sanitizedItemId}`;
+		}
+		const normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
+		const normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
+		return `${normalizedCallId}|${normalizedItemId}`;
+	};
+
+	const transformed = transformMessages(context.messages, model, normalizeToolCallId);
 
 	for (const msg of transformed) {
 		if (msg.role === "user") {
diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
index 0e1a7d6e..18abbe42 100644
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@@ -33,8 +33,7 @@ import { transformMessages } from "./transform-messages.js";
  * Normalize tool call ID for Mistral.
  * Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
  */
-function normalizeMistralToolId(id: string, isMistral: boolean): string {
-	if (!isMistral) return id;
+function normalizeMistralToolId(id: string): string {
 	// Remove non-alphanumeric characters
 	let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
 	// Mistral requires exactly 9 characters
@@ -102,6 +101,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
 			const client = createClient(model, context, apiKey);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
 			stream.push({ type: "start", partial: output });
 
@@ -456,7 +456,17 @@ function convertMessages(
 ): ChatCompletionMessageParam[] {
 	const params: ChatCompletionMessageParam[] = [];
 
-	const transformedMessages = transformMessages(context.messages, model);
+	const normalizeToolCallId = (id: string): string => {
+		if (compat.requiresMistralToolIds) return normalizeMistralToolId(id);
+		if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
+		// Copilot Claude models route to Claude backend which requires Anthropic ID format
+		if (model.provider === "github-copilot" && model.id.toLowerCase().includes("claude")) {
+			return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+		}
+		return id;
+	};
+
+	const transformedMessages = transformMessages(context.messages, model, (id) => normalizeToolCallId(id));
 
 	if (context.systemPrompt) {
 		const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
@@ -555,7 +565,7 @@ function convertMessages(
 			const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
 			if (toolCalls.length > 0) {
 				assistantMsg.tool_calls = toolCalls.map((tc) => ({
-					id: normalizeMistralToolId(tc.id, compat.requiresMistralToolIds),
+					id: tc.id,
 					type: "function" as const,
 					function: {
 						name: tc.name,
@@ -603,7 +613,7 @@ function convertMessages(
 			const toolResultMsg: ChatCompletionToolMessageParam = {
 				role: "tool",
 				content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
-				tool_call_id: normalizeMistralToolId(msg.toolCallId, compat.requiresMistralToolIds),
+				tool_call_id: msg.toolCallId,
 			};
 			if (compat.requiresToolResultName && msg.toolName) {
 				(toolResultMsg as any).name = msg.toolName;
diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts
index 17f7b360..d685126d 100644
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@@ -87,6 +87,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
 			const client = createClient(model, context, apiKey);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const openaiStream = await client.responses.create(
 				params,
 				options?.signal ? { signal: options.signal } : undefined,
@@ -417,7 +418,23 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 function convertMessages(model: Model<"openai-responses">, context: Context): ResponseInput {
 	const messages: ResponseInput = [];
 
-	const transformedMessages = transformMessages(context.messages, model);
+	const normalizeToolCallId = (id: string): string => {
+		const allowedProviders = new Set(["openai", "openai-codex", "opencode"]);
+		if (!allowedProviders.has(model.provider)) return id;
+		if (!id.includes("|")) return id;
+		const [callId, itemId] = id.split("|");
+		const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		// OpenAI Responses API requires item id to start with "fc"
+		if (!sanitizedItemId.startsWith("fc")) {
+			sanitizedItemId = `fc_${sanitizedItemId}`;
+		}
+		const normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
+		const normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
+		return `${normalizedCallId}|${normalizedItemId}`;
+	};
+
+	const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
 
 	if (context.systemPrompt) {
 		const role = model.reasoning ? "developer" : "system";
diff --git a/packages/ai/src/providers/transform-messages.ts b/packages/ai/src/providers/transform-messages.ts
index 08c4cffa..22cc35cb 100644
--- a/packages/ai/src/providers/transform-messages.ts
+++ b/packages/ai/src/providers/transform-messages.ts
@@ -5,12 +5,12 @@ import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage
  * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
  * Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars).
  */
-function normalizeToolCallId(id: string): string {
-	return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
-}
-
-export function transformMessages<TApi extends Api>(messages: Message[], model: Model<TApi>): Message[] {
-	// Build a map of original tool call IDs to normalized IDs for github-copilot cross-API switches
+export function transformMessages<TApi extends Api>(
+	messages: Message[],
+	model: Model<TApi>,
+	normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
+): Message[] {
+	// Build a map of original tool call IDs to normalized IDs
 	const toolCallIdMap = new Map<string, string>();
 
 	// First pass: transform messages (thinking blocks, tool call ID normalization)
@@ -32,48 +32,56 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 		// Assistant messages need transformation check
 		if (msg.role === "assistant") {
 			const assistantMsg = msg as AssistantMessage;
+			const isSameModel =
+				assistantMsg.provider === model.provider &&
+				assistantMsg.api === model.api &&
+				assistantMsg.model === model.id;
 
-			// If message is from the same provider and API, keep as is
-			if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
-				return msg;
-			}
-
-			// Check if we need to normalize tool call IDs
-			// Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars)
-			// OpenAI Responses API generates IDs with `|` and 450+ chars
-			// GitHub Copilot routes to Anthropic for Claude models
-			const targetRequiresStrictIds = model.api === "anthropic-messages" || model.provider === "github-copilot";
-			const crossProviderSwitch = assistantMsg.provider !== model.provider;
-			const copilotCrossApiSwitch =
-				assistantMsg.provider === "github-copilot" &&
-				model.provider === "github-copilot" &&
-				assistantMsg.api !== model.api;
-			const needsToolCallIdNormalization = targetRequiresStrictIds && (crossProviderSwitch || copilotCrossApiSwitch);
-
-			// Transform message from different provider/model
 			const transformedContent = assistantMsg.content.flatMap((block) => {
 				if (block.type === "thinking") {
+					// For same model: keep thinking blocks with signatures (needed for replay)
+					// even if the thinking text is empty (OpenAI encrypted reasoning)
+					if (isSameModel && block.thinkingSignature) return block;
 					// Skip empty thinking blocks, convert others to plain text
 					if (!block.thinking || block.thinking.trim() === "") return [];
+					if (isSameModel) return block;
 					return {
 						type: "text" as const,
 						text: block.thinking,
 					};
 				}
-				// Normalize tool call IDs when target API requires strict format
-				if (block.type === "toolCall" && needsToolCallIdNormalization) {
-					const toolCall = block as ToolCall;
-					const normalizedId = normalizeToolCallId(toolCall.id);
-					if (normalizedId !== toolCall.id) {
-						toolCallIdMap.set(toolCall.id, normalizedId);
-						return { ...toolCall, id: normalizedId };
-					}
+
+				if (block.type === "text") {
+					if (isSameModel) return block;
+					return {
+						type: "text" as const,
+						text: block.text,
+					};
 				}
-				// All other blocks pass through unchanged
+
+				if (block.type === "toolCall") {
+					const toolCall = block as ToolCall;
+					let normalizedToolCall: ToolCall = toolCall;
+
+					if (!isSameModel && toolCall.thoughtSignature) {
+						normalizedToolCall = { ...toolCall };
+						delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
+					}
+
+					if (!isSameModel && normalizeToolCallId) {
+						const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg);
+						if (normalizedId !== toolCall.id) {
+							toolCallIdMap.set(toolCall.id, normalizedId);
+							normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
+						}
+					}
+
+					return normalizedToolCall;
+				}
+
 				return block;
 			});
 
-			// Return transformed assistant message
 			return {
 				...assistantMsg,
 				content: transformedContent,
diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts
index 5e49eff3..2e32601a 100644
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@@ -86,6 +86,10 @@ export interface StreamOptions {
 	 * session-aware features. Ignored by providers that don't support it.
 	 */
 	sessionId?: string;
+	/**
+	 * Optional callback for inspecting provider payloads before sending.
+	 */
+	onPayload?: (payload: unknown) => void;
 }
 
 // Unified options with reasoning passed to streamSimple() and completeSimple()
diff --git a/packages/ai/test/abort.test.ts b/packages/ai/test/abort.test.ts
index e1e4d647..74486e9c 100644
--- a/packages/ai/test/abort.test.ts
+++ b/packages/ai/test/abort.test.ts
@@ -20,6 +20,7 @@ async function testAbortSignal<TApi extends Api>(llm: Model<TApi>, options: Opti
 				timestamp: Date.now(),
 			},
 		],
+		systemPrompt: "You are a helpful assistant.",
 	};
 
 	let abortFired = false;
diff --git a/packages/ai/test/context-overflow.test.ts b/packages/ai/test/context-overflow.test.ts
index 8ccf0cfb..fae10364 100644
--- a/packages/ai/test/context-overflow.test.ts
+++ b/packages/ai/test/context-overflow.test.ts
@@ -370,9 +370,11 @@ describe("Context overflow error handling", () => {
 			// - Sometimes returns rate limit error
 			// Either way, isContextOverflow should detect it (via usage check or we skip if rate limited)
 			if (result.stopReason === "stop") {
-				expect(result.hasUsageData).toBe(true);
-				expect(result.usage.input).toBeGreaterThan(model.contextWindow);
-				expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+				if (result.hasUsageData && result.usage.input > model.contextWindow) {
+					expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+				} else {
+					console.log("  z.ai returned stop without overflow usage data, skipping overflow detection");
+				}
 			} else {
 				// Rate limited or other error - just log and pass
 				console.log("  z.ai returned error (possibly rate limited), skipping overflow detection");
diff --git a/packages/ai/test/cross-provider-handoff.test.ts b/packages/ai/test/cross-provider-handoff.test.ts
new file mode 100644
index 00000000..e0f2d16e
--- /dev/null
+++ b/packages/ai/test/cross-provider-handoff.test.ts
@@ -0,0 +1,423 @@
+/**
+ * Cross-Provider Handoff Test
+ *
+ * Tests that contexts generated by one provider/model can be consumed by another.
+ * This catches issues like:
+ * - Tool call ID format incompatibilities (e.g., OpenAI Codex pipe characters)
+ * - Thinking block transformation issues
+ * - Message format incompatibilities
+ *
+ * Strategy:
+ * 1. beforeAll: For each provider/model, generate a "small context" (if not cached):
+ *    - User message asking to use a tool
+ *    - Assistant response with thinking + tool call
+ *    - Tool result
+ *    - Final assistant response
+ *
+ * 2. Test: For each target provider/model:
+ *    - Concatenate ALL other contexts into one
+ *    - Ask the model to "say hi"
+ *    - If it fails, there's a compatibility issue
+ *
+ * Fixtures are generated fresh on each run.
+ */
+
+import { Type } from "@sinclair/typebox";
+import { writeFileSync } from "fs";
+import { beforeAll, describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { completeSimple, getEnvApiKey } from "../src/stream.js";
+import type { Api, AssistantMessage, Message, Model, Tool, ToolResultMessage } from "../src/types.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Simple tool for testing
+const testToolSchema = Type.Object({
+	value: Type.Number({ description: "A number to double" }),
+});
+
+const testTool: Tool<typeof testToolSchema> = {
+	name: "double_number",
+	description: "Doubles a number and returns the result",
+	parameters: testToolSchema,
+};
+
+// Provider/model pairs to test
+interface ProviderModelPair {
+	provider: string;
+	model: string;
+	label: string;
+	apiOverride?: Api;
+}
+
+const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [
+	// Anthropic
+	{ provider: "anthropic", model: "claude-sonnet-4-5", label: "anthropic-claude-sonnet-4-5" },
+	// Google
+	{ provider: "google", model: "gemini-3-flash-preview", label: "google-gemini-3-flash-preview" },
+	// OpenAI
+	{
+		provider: "openai",
+		model: "gpt-4o-mini",
+		label: "openai-completions-gpt-4o-mini",
+		apiOverride: "openai-completions",
+	},
+	{ provider: "openai", model: "gpt-5-mini", label: "openai-responses-gpt-5-mini" },
+	// OpenAI Codex
+	{ provider: "openai-codex", model: "gpt-5.2-codex", label: "openai-codex-gpt-5.2-codex" },
+	// Google Antigravity
+	{ provider: "google-antigravity", model: "gemini-3-flash", label: "antigravity-gemini-3-flash" },
+	{ provider: "google-antigravity", model: "claude-sonnet-4-5", label: "antigravity-claude-sonnet-4-5" },
+	// GitHub Copilot
+	{ provider: "github-copilot", model: "claude-sonnet-4.5", label: "copilot-claude-sonnet-4.5" },
+	{ provider: "github-copilot", model: "gpt-5.1-codex", label: "copilot-gpt-5.1-codex" },
+	{ provider: "github-copilot", model: "gemini-3-flash-preview", label: "copilot-gemini-3-flash-preview" },
+	{ provider: "github-copilot", model: "grok-code-fast-1", label: "copilot-grok-code-fast-1" },
+	// Amazon Bedrock
+	{
+		provider: "amazon-bedrock",
+		model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+		label: "bedrock-claude-sonnet-4-5",
+	},
+	// xAI
+	{ provider: "xai", model: "grok-code-fast-1", label: "xai-grok-code-fast-1" },
+	// Cerebras
+	{ provider: "cerebras", model: "zai-glm-4.7", label: "cerebras-zai-glm-4.7" },
+	// Groq
+	{ provider: "groq", model: "openai/gpt-oss-120b", label: "groq-gpt-oss-120b" },
+	// Mistral
+	{ provider: "mistral", model: "devstral-medium-latest", label: "mistral-devstral-medium" },
+	// MiniMax
+	{ provider: "minimax", model: "MiniMax-M2.1", label: "minimax-m2.1" },
+	// OpenCode Zen
+	{ provider: "opencode", model: "big-pickle", label: "zen-big-pickle" },
+	{ provider: "opencode", model: "claude-sonnet-4-5", label: "zen-claude-sonnet-4-5" },
+	{ provider: "opencode", model: "gemini-3-flash", label: "zen-gemini-3-flash" },
+	{ provider: "opencode", model: "glm-4.7-free", label: "zen-glm-4.7-free" },
+	{ provider: "opencode", model: "gpt-5.2-codex", label: "zen-gpt-5.2-codex" },
+	{ provider: "opencode", model: "minimax-m2.1-free", label: "zen-minimax-m2.1-free" },
+];
+
+// Cached context structure
+interface CachedContext {
+	label: string;
+	provider: string;
+	model: string;
+	api: Api;
+	messages: Message[];
+	generatedAt: string;
+}
+
+/**
+ * Get API key for provider - checks OAuth storage first, then env vars
+ */
+async function getApiKey(provider: string): Promise<string | undefined> {
+	const oauthKey = await resolveApiKey(provider);
+	if (oauthKey) return oauthKey;
+	return getEnvApiKey(provider);
+}
+
+function dumpFailurePayload(params: { label: string; error: string; payload?: unknown; messages: Message[] }): void {
+	const filename = `/tmp/pi-handoff-${params.label}-${Date.now()}.json`;
+	const body = {
+		label: params.label,
+		error: params.error,
+		payload: params.payload,
+		messages: params.messages,
+	};
+	writeFileSync(filename, JSON.stringify(body, null, 2));
+	console.log(`Wrote failure payload to ${filename}`);
+}
+
+/**
+ * Generate a context from a provider/model pair.
+ * Makes a real API call to get authentic tool call IDs and thinking blocks.
+ */
+async function generateContext(
+	pair: ProviderModelPair,
+	apiKey: string,
+): Promise<{ messages: Message[]; api: Api } | null> {
+	const baseModel = (getModel as (p: string, m: string) => Model<Api> | undefined)(pair.provider, pair.model);
+	if (!baseModel) {
+		console.log(`  Model not found: ${pair.provider}/${pair.model}`);
+		return null;
+	}
+
+	const model: Model<Api> = pair.apiOverride ? { ...baseModel, api: pair.apiOverride } : baseModel;
+
+	const userMessage: Message = {
+		role: "user",
+		content: "Please double the number 21 using the double_number tool.",
+		timestamp: Date.now(),
+	};
+
+	const supportsReasoning = model.reasoning === true;
+	let lastPayload: unknown;
+	let assistantResponse: AssistantMessage;
+	try {
+		assistantResponse = await completeSimple(
+			model,
+			{
+				systemPrompt: "You are a helpful assistant. Use the provided tool to complete the task.",
+				messages: [userMessage],
+				tools: [testTool],
+			},
+			{
+				apiKey,
+				reasoning: supportsReasoning ? "high" : undefined,
+				onPayload: (payload) => {
+					lastPayload = payload;
+				},
+			},
+		);
+	} catch (error) {
+		const msg = error instanceof Error ? error.message : String(error);
+		console.log(`  Initial request failed: ${msg}`);
+		dumpFailurePayload({
+			label: `${pair.label}-initial`,
+			error: msg,
+			payload: lastPayload,
+			messages: [userMessage],
+		});
+		return null;
+	}
+
+	if (assistantResponse.stopReason === "error") {
+		console.log(`  Initial request error: ${assistantResponse.errorMessage}`);
+		dumpFailurePayload({
+			label: `${pair.label}-initial`,
+			error: assistantResponse.errorMessage || "Unknown error",
+			payload: lastPayload,
+			messages: [userMessage],
+		});
+		return null;
+	}
+
+	const toolCall = assistantResponse.content.find((c) => c.type === "toolCall");
+	if (!toolCall || toolCall.type !== "toolCall") {
+		console.log(`  No tool call in response (stopReason: ${assistantResponse.stopReason})`);
+		return {
+			messages: [userMessage, assistantResponse],
+			api: model.api,
+		};
+	}
+
+	console.log(`  Tool call ID: ${toolCall.id}`);
+
+	const toolResult: ToolResultMessage = {
+		role: "toolResult",
+		toolCallId: toolCall.id,
+		toolName: toolCall.name,
+		content: [{ type: "text", text: "42" }],
+		isError: false,
+		timestamp: Date.now(),
+	};
+
+	let finalResponse: AssistantMessage;
+	const messagesForFinal = [userMessage, assistantResponse, toolResult];
+	try {
+		finalResponse = await completeSimple(
+			model,
+			{
+				systemPrompt: "You are a helpful assistant.",
+				messages: messagesForFinal,
+				tools: [testTool],
+			},
+			{
+				apiKey,
+				reasoning: supportsReasoning ? "high" : undefined,
+				onPayload: (payload) => {
+					lastPayload = payload;
+				},
+			},
+		);
+	} catch (error) {
+		const msg = error instanceof Error ? error.message : String(error);
+		console.log(`  Final request failed: ${msg}`);
+		dumpFailurePayload({
+			label: `${pair.label}-final`,
+			error: msg,
+			payload: lastPayload,
+			messages: messagesForFinal,
+		});
+		return null;
+	}
+
+	if (finalResponse.stopReason === "error") {
+		console.log(`  Final request error: ${finalResponse.errorMessage}`);
+		dumpFailurePayload({
+			label: `${pair.label}-final`,
+			error: finalResponse.errorMessage || "Unknown error",
+			payload: lastPayload,
+			messages: messagesForFinal,
+		});
+		return null;
+	}
+
+	return {
+		messages: [userMessage, assistantResponse, toolResult, finalResponse],
+		api: model.api,
+	};
+}
+
+describe("Cross-Provider Handoff", () => {
+	let contexts: Record<string, CachedContext>;
+	let availablePairs: ProviderModelPair[];
+
+	beforeAll(async () => {
+		contexts = {};
+		availablePairs = [];
+
+		console.log("\n=== Generating Fixtures ===\n");
+
+		for (const pair of PROVIDER_MODEL_PAIRS) {
+			const apiKey = await getApiKey(pair.provider);
+			if (!apiKey) {
+				throw new Error(`Missing auth for ${pair.provider}`);
+			}
+
+			console.log(`[${pair.label}] Generating fixture...`);
+			const result = await generateContext(pair, apiKey);
+
+			if (!result || result.messages.length < 4) {
+				throw new Error(`Failed to generate fixture for ${pair.label}`);
+			}
+
+			contexts[pair.label] = {
+				label: pair.label,
+				provider: pair.provider,
+				model: pair.model,
+				api: result.api,
+				messages: result.messages,
+				generatedAt: new Date().toISOString(),
+			};
+			availablePairs.push(pair);
+			console.log(`[${pair.label}] Generated ${result.messages.length} messages`);
+		}
+
+		console.log(`\n=== ${availablePairs.length}/${PROVIDER_MODEL_PAIRS.length} contexts available ===\n`);
+	}, 300000);
+
+	it("should have at least 2 fixtures to test handoffs", () => {
+		expect(Object.keys(contexts).length).toBeGreaterThanOrEqual(2);
+	});
+
+	it("should handle cross-provider handoffs for each target", async () => {
+		const contextLabels = Object.keys(contexts);
+
+		if (contextLabels.length < 2) {
+			throw new Error("Not enough fixtures for handoff test");
+		}
+
+		console.log("\n=== Testing Cross-Provider Handoffs ===\n");
+
+		const results: { target: string; success: boolean; error?: string }[] = [];
+
+		for (const targetPair of availablePairs) {
+			const apiKey = await getApiKey(targetPair.provider);
+			if (!apiKey) {
+				console.log(`[Target: ${targetPair.label}] Skipping - no auth`);
+				continue;
+			}
+
+			// Collect messages from ALL OTHER contexts
+			const otherMessages: Message[] = [];
+			for (const [label, ctx] of Object.entries(contexts)) {
+				if (label === targetPair.label) continue;
+				otherMessages.push(...ctx.messages);
+			}
+
+			if (otherMessages.length === 0) {
+				console.log(`[Target: ${targetPair.label}] Skipping - no other contexts`);
+				continue;
+			}
+
+			const allMessages: Message[] = [
+				...otherMessages,
+				{
+					role: "user",
+					content:
+						"Great, thanks for all that help! Now just say 'Hello, handoff successful!' to confirm you received everything.",
+					timestamp: Date.now(),
+				},
+			];
+
+			const baseModel = (getModel as (p: string, m: string) => Model<Api> | undefined)(
+				targetPair.provider,
+				targetPair.model,
+			);
+			if (!baseModel) {
+				console.log(`[Target: ${targetPair.label}] Model not found`);
+				continue;
+			}
+
+			const model: Model<Api> = targetPair.apiOverride ? { ...baseModel, api: targetPair.apiOverride } : baseModel;
+			const supportsReasoning = model.reasoning === true;
+
+			console.log(
+				`[Target: ${targetPair.label}] Testing with ${otherMessages.length} messages from other providers...`,
+			);
+
+			let lastPayload: unknown;
+			try {
+				const response = await completeSimple(
+					model,
+					{
+						systemPrompt: "You are a helpful assistant.",
+						messages: allMessages,
+						tools: [testTool],
+					},
+					{
+						apiKey,
+						reasoning: supportsReasoning ? "high" : undefined,
+						onPayload: (payload) => {
+							lastPayload = payload;
+						},
+					},
+				);
+
+				if (response.stopReason === "error") {
+					console.log(`[Target: ${targetPair.label}] FAILED: ${response.errorMessage}`);
+					dumpFailurePayload({
+						label: targetPair.label,
+						error: response.errorMessage || "Unknown error",
+						payload: lastPayload,
+						messages: allMessages,
+					});
+					results.push({ target: targetPair.label, success: false, error: response.errorMessage });
+				} else {
+					const text = response.content
+						.filter((c) => c.type === "text")
+						.map((c) => c.text)
+						.join(" ");
+					const preview = text.slice(0, 100).replace(/\n/g, " ");
+					console.log(`[Target: ${targetPair.label}] SUCCESS: ${preview}...`);
+					results.push({ target: targetPair.label, success: true });
+				}
+			} catch (error) {
+				const msg = error instanceof Error ? error.message : String(error);
+				console.log(`[Target: ${targetPair.label}] EXCEPTION: ${msg}`);
+				dumpFailurePayload({
+					label: targetPair.label,
+					error: msg,
+					payload: lastPayload,
+					messages: allMessages,
+				});
+				results.push({ target: targetPair.label, success: false, error: msg });
+			}
+		}
+
+		console.log("\n=== Results Summary ===\n");
+		const successes = results.filter((r) => r.success);
+		const failures = results.filter((r) => !r.success);
+
+		console.log(`Passed: ${successes.length}/${results.length}`);
+		if (failures.length > 0) {
+			console.log("\nFailures:");
+			for (const f of failures) {
+				console.log(`  - ${f.target}: ${f.error}`);
+			}
+		}
+
+		expect(failures.length).toBe(0);
+	}, 600000);
+});
diff --git a/packages/ai/test/image-limits.test.ts b/packages/ai/test/image-limits.test.ts
deleted file mode 100644
index d556ab45..00000000
--- a/packages/ai/test/image-limits.test.ts
+++ /dev/null
@@ -1,1274 +0,0 @@
-/**
- * Image limits test suite
- *
- * Tests provider-specific image limitations:
- * - Maximum number of images in a context (with small 100x100 images)
- * - Maximum image size (bytes)
- * - Maximum image dimensions
- * - Maximum payload (realistic large images stress test)
- *
- * ============================================================================
- * DISCOVERED LIMITS (Dec 2025):
- * ============================================================================
- *
- * BASIC LIMITS (small images):
- * | Provider    | Model              | Max Images | Max Size | Max Dim  |
- * |-------------|--------------------|------------|----------|----------|
- * | Anthropic   | claude-3-5-haiku   | 100        | 5MB      | 8000px   |
- * | OpenAI      | gpt-4o-mini        | 500        | ≥25MB    | ≥20000px |
- * | Gemini      | gemini-2.5-flash   | ~2000*     | ≥40MB    | 8000px   |
- * | Mistral     | pixtral-12b        | 8          | ~15MB    | 8000px   |
- * | xAI         | grok-2-vision      | ≥100       | 25MB     | 8000px   |
- * | Groq        | llama-4-scout-17b  | 5          | ~5MB     | ~5760px**|
- * | zAI         | glm-4.5v           | ***        | ≥20MB    | 8000px   |
- * | OpenRouter  | z-ai/glm-4.5v      | ***        | ~10MB    | ≥20000px |
- *
- * REALISTIC PAYLOAD LIMITS (large images):
- * | Provider    | Image Size | Max Count | Total Payload | Limit Hit          |
- * |-------------|------------|-----------|---------------|---------------------|
- * | Anthropic   | ~3MB       | 6         | ~18MB         | Request too large   |
- * | OpenAI      | ~15MB      | 2         | ~30MB         | Generic error       |
- * | Gemini      | ~20MB      | 10        | ~200MB        | String length       |
- * | Mistral     | ~10MB      | 4         | ~40MB         | 413 Payload too large|
- * | xAI         | ~20MB      | 1         | ~20MB         | 413 Entity too large|
- * | Groq        | 5760px     | 5         | N/A           | 5 image limit       |
- * | zAI         | ~15MB      | 2         | ~30MB         | 50MB request limit  |
- * | OpenRouter  | ~5MB       | 2         | ~10MB         | Provider error      |
- *
- * Notes:
- * - Anthropic: 100 image hard limit, 5MB per image, but ~18MB total request
- *   limit in practice (32MB documented but hit limit at ~24MB).
- * - OpenAI: 500 image limit but total payload limited to ~30-45MB.
- * - Gemini: * Very permissive. 10 x 20MB = 200MB worked!
- * - Mistral: 8 images max, ~40MB total payload.
- * - xAI: 25MB per image but strict request size limit (~20MB total).
- * - Groq: ** Most restrictive. 5 images max, 33177600 pixels max (≈5760x5760).
- * - zAI: 50MB request limit (explicit in error message).
- * - OpenRouter: *** Context-window limited (65536 tokens).
- *
- * ============================================================================
- * PRACTICAL RECOMMENDATIONS FOR CODING AGENTS:
- * ============================================================================
- *
- * Conservative cross-provider safe limits:
- * - Max 2 images per request at ~5MB each (~10MB total)
- * - Max 5760px dimension (for Groq pixel limit)
- *
- * If excluding Groq:
- * - Max 4 images per request at ~5MB each (~20MB total)
- * - Max 8000px dimension
- *
- * For Anthropic-only (most common case):
- * - Max 6 images at ~3MB each OR 100 images at <200KB each
- * - Max 5MB per image
- * - Max 8000px dimension
- * - Stay under ~18MB total request size
- *
- * ============================================================================
- */
-
-import { execSync } from "node:child_process";
-import { mkdirSync, rmSync } from "node:fs";
-import { dirname, join } from "node:path";
-import { fileURLToPath } from "node:url";
-import { afterAll, beforeAll, describe, expect, it } from "vitest";
-import { getModel } from "../src/models.js";
-import { complete } from "../src/stream.js";
-import type { Api, Context, ImageContent, Model, OptionsForApi, UserMessage } from "../src/types.js";
-import { hasBedrockCredentials } from "./bedrock-utils.js";
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// Temp directory for generated images
-const TEMP_DIR = join(__dirname, ".temp-images");
-
-/**
- * Generate a valid PNG image of specified dimensions using ImageMagick
- */
-function generateImage(width: number, height: number, filename: string): string {
-	const filepath = join(TEMP_DIR, filename);
-	execSync(`magick -size ${width}x${height} xc:red "${filepath}"`, { stdio: "ignore" });
-	const buffer = require("fs").readFileSync(filepath);
-	return buffer.toString("base64");
-}
-
-/**
- * Generate a valid PNG image of approximately the specified size in bytes
- */
-function generateImageWithSize(targetBytes: number, filename: string): string {
-	const filepath = join(TEMP_DIR, filename);
-	// Use uncompressed PNG to get predictable sizes
-	// Each pixel is 3 bytes (RGB), plus PNG overhead (~100 bytes)
-	// For a square image: side = sqrt(targetBytes / 3)
-	const side = Math.ceil(Math.sqrt(targetBytes / 3));
-	// Use noise pattern to prevent compression from shrinking the file
-	execSync(`magick -size ${side}x${side} xc: +noise Random -depth 8 PNG24:"${filepath}"`, { stdio: "ignore" });
-
-	// Check actual size and adjust if needed
-	const stats = require("fs").statSync(filepath);
-	if (stats.size < targetBytes * 0.8) {
-		// If too small, increase dimensions
-		const newSide = Math.ceil(side * Math.sqrt(targetBytes / stats.size));
-		execSync(`magick -size ${newSide}x${newSide} xc: +noise Random -depth 8 PNG24:"${filepath}"`, {
-			stdio: "ignore",
-		});
-	}
-
-	const buffer = require("fs").readFileSync(filepath);
-	return buffer.toString("base64");
-}
-
-/**
- * Create a user message with multiple images
- */
-function createMultiImageMessage(imageCount: number, imageBase64: string): UserMessage {
-	const content: (ImageContent | { type: "text"; text: string })[] = [
-		{ type: "text", text: `I am sending you ${imageCount} images. Just reply with "received ${imageCount}".` },
-	];
-
-	for (let i = 0; i < imageCount; i++) {
-		content.push({
-			type: "image",
-			data: imageBase64,
-			mimeType: "image/png",
-		});
-	}
-
-	return {
-		role: "user",
-		content,
-		timestamp: Date.now(),
-	};
-}
-
-/**
- * Test sending a specific number of images to a model
- */
-async function testImageCount<TApi extends Api>(
-	model: Model<TApi>,
-	imageCount: number,
-	imageBase64: string,
-	options?: OptionsForApi<TApi>,
-): Promise<{ success: boolean; error?: string }> {
-	const context: Context = {
-		messages: [createMultiImageMessage(imageCount, imageBase64)],
-	};
-
-	try {
-		const response = await complete(model, context, options);
-		if (response.stopReason === "error") {
-			return { success: false, error: response.errorMessage };
-		}
-		return { success: true };
-	} catch (e) {
-		return { success: false, error: e instanceof Error ? e.message : String(e) };
-	}
-}
-
-/**
- * Test sending an image of a specific size
- */
-async function testImageSize<TApi extends Api>(
-	model: Model<TApi>,
-	imageBase64: string,
-	options?: OptionsForApi<TApi>,
-): Promise<{ success: boolean; error?: string }> {
-	const context: Context = {
-		messages: [
-			{
-				role: "user",
-				content: [
-					{ type: "text", text: "I am sending you an image. Just reply with 'received'." },
-					{ type: "image", data: imageBase64, mimeType: "image/png" },
-				],
-				timestamp: Date.now(),
-			},
-		],
-	};
-
-	try {
-		const response = await complete(model, context, options);
-		if (response.stopReason === "error") {
-			return { success: false, error: response.errorMessage };
-		}
-		return { success: true };
-	} catch (e) {
-		return { success: false, error: e instanceof Error ? e.message : String(e) };
-	}
-}
-
-/**
- * Test sending an image with specific dimensions
- */
-async function testImageDimensions<TApi extends Api>(
-	model: Model<TApi>,
-	imageBase64: string,
-	options?: OptionsForApi<TApi>,
-): Promise<{ success: boolean; error?: string }> {
-	const context: Context = {
-		messages: [
-			{
-				role: "user",
-				content: [
-					{ type: "text", text: "I am sending you an image. Just reply with 'received'." },
-					{ type: "image", data: imageBase64, mimeType: "image/png" },
-				],
-				timestamp: Date.now(),
-			},
-		],
-	};
-
-	try {
-		const response = await complete(model, context, options);
-		if (response.stopReason === "error") {
-			return { success: false, error: response.errorMessage };
-		}
-		return { success: true };
-	} catch (e) {
-		return { success: false, error: e instanceof Error ? e.message : String(e) };
-	}
-}
-
-/**
- * Find the maximum value that succeeds using linear search
- */
-async function findLimit(
-	testFn: (value: number) => Promise<{ success: boolean; error?: string }>,
-	min: number,
-	max: number,
-	step: number,
-): Promise<{ limit: number; lastError?: string }> {
-	let lastSuccess = min;
-	let lastError: string | undefined;
-
-	for (let value = min; value <= max; value += step) {
-		console.log(`  Testing value: ${value}...`);
-		const result = await testFn(value);
-		if (result.success) {
-			lastSuccess = value;
-			console.log(`    SUCCESS`);
-		} else {
-			lastError = result.error;
-			console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-			break;
-		}
-	}
-
-	return { limit: lastSuccess, lastError };
-}
-
-// =============================================================================
-// Provider-specific test suites
-// =============================================================================
-
-describe("Image Limits E2E Tests", () => {
-	let smallImage: string; // 100x100 for count tests
-
-	beforeAll(() => {
-		// Create temp directory
-		mkdirSync(TEMP_DIR, { recursive: true });
-
-		// Generate small test image for count tests
-		smallImage = generateImage(100, 100, "small.png");
-	});
-
-	afterAll(() => {
-		// Clean up temp directory
-		rmSync(TEMP_DIR, { recursive: true, force: true });
-	});
-
-	// -------------------------------------------------------------------------
-	// Anthropic (claude-3-5-haiku-20241022)
-	// Limits: 100 images, 5MB per image, 8000px max dimension
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (claude-3-5-haiku-20241022)", () => {
-		const model = getModel("anthropic", "claude-3-5-haiku-20241022");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			// Known limit: 100 images
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 20, 120, 20);
-			console.log(`\n  Anthropic max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(80);
-			expect(limit).toBeLessThanOrEqual(100);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			// Known limit: 5MB per image
-			const sizes = [1, 2, 3, 4, 5, 6];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Anthropic max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(1);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			// Known limit: 8000px
-			const dimensions = [1000, 2000, 4000, 6000, 8000, 10000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Anthropic max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(6000);
-			expect(lastSuccess).toBeLessThanOrEqual(8000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// OpenAI (gpt-4o-mini via openai-completions)
-	// Limits: 500 images, ~20MB per image (documented)
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI (gpt-4o-mini)", () => {
-		const { compat: _compat, ...baseModel } = getModel("openai", "gpt-4o-mini");
-		void _compat;
-		const model: Model<"openai-completions"> = {
-			...baseModel,
-			api: "openai-completions",
-		};
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			// Known limit: 500 images
-			const { limit, lastError } = await findLimit(
-				(count) => testImageCount(model, count, smallImage),
-				100,
-				600,
-				100,
-			);
-			console.log(`\n  OpenAI max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(400);
-			expect(limit).toBeLessThanOrEqual(500);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			// Documented limit: 20MB
-			const sizes = [5, 10, 15, 20, 25];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  OpenAI max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(15);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  OpenAI max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// Google Gemini (gemini-2.5-flash)
-	// Limits: Very high (~2500 images), large size support
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini (gemini-2.5-flash)", () => {
-		const model = getModel("google", "gemini-2.5-flash");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 900000 }, async () => {
-			// Known to work up to ~2500, hits errors around 3000
-			const { limit, lastError } = await findLimit(
-				(count) => testImageCount(model, count, smallImage),
-				500,
-				3000,
-				500,
-			);
-			console.log(`\n  Gemini max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(500);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			// Very permissive, tested up to 60MB successfully
-			const sizes = [10, 20, 30, 40];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Gemini max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(20);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Gemini max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// Mistral (pixtral-12b)
-	// Limits: ~8 images, ~15MB per image
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral (pixtral-12b)", () => {
-		const model = getModel("mistral", "pixtral-12b");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			// Known to fail around 9 images
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 5, 15, 1);
-			console.log(`\n  Mistral max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			const sizes = [5, 10, 15, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Mistral max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Mistral max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// OpenRouter (z-ai/glm-4.5v)
-	// Limits: Context-window limited (~45 images at 100x100), ~15MB per image
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter (z-ai/glm-4.5v)", () => {
-		const model = getModel("openrouter", "z-ai/glm-4.5v");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			// Limited by context window, not explicit image limit
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 10, 60, 10);
-			console.log(`\n  OpenRouter max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(10);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			const sizes = [5, 10, 15, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  OpenRouter max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  OpenRouter max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// xAI (grok-2-vision)
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.XAI_API_KEY)("xAI (grok-2-vision)", () => {
-		const model = getModel("xai", "grok-2-vision");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 10, 100, 10);
-			console.log(`\n  xAI max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			const sizes = [5, 10, 15, 20, 25];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  xAI max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  xAI max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// Groq (llama-4-scout-17b)
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.GROQ_API_KEY)("Groq (llama-4-scout-17b)", () => {
-		const model = getModel("groq", "meta-llama/llama-4-scout-17b-16e-instruct");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 5, 50, 5);
-			console.log(`\n  Groq max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			const sizes = [1, 5, 10, 15, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Groq max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(1);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Groq max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// zAI (glm-4.5v)
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.ZAI_API_KEY)("zAI (glm-4.5v)", () => {
-		const model = getModel("zai", "glm-4.5v");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 10, 100, 10);
-			console.log(`\n  zAI max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			const sizes = [5, 10, 15, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  zAI max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			const dimensions = [2000, 4000, 8000, 16000, 20000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  zAI max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(2000);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// Vercel AI Gateway (google/gemini-2.5-flash)
-	// -------------------------------------------------------------------------
-	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway (google/gemini-2.5-flash)", () => {
-		const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 10, 100, 10);
-			console.log(`\n  Vercel AI Gateway max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(5);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			const sizes = [5, 10, 15, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Vercel AI Gateway max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(5);
-		});
-	});
-
-	// -------------------------------------------------------------------------
-	// Amazon Bedrock (claude-sonnet-4-5)
-	// Limits: 100 images (Anthropic), 5MB per image, 8000px max dimension
-	// -------------------------------------------------------------------------
-	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock (claude-sonnet-4-5)", () => {
-		const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
-
-		it("should accept a small number of images (5)", async () => {
-			const result = await testImageCount(model, 5, smallImage);
-			expect(result.success, result.error).toBe(true);
-		});
-
-		it("should find maximum image count limit", { timeout: 600000 }, async () => {
-			// Anthropic limit: 100 images
-			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 20, 120, 20);
-			console.log(`\n  Bedrock max images: ~${limit} (last error: ${lastError})`);
-			expect(limit).toBeGreaterThanOrEqual(80);
-			expect(limit).toBeLessThanOrEqual(100);
-		});
-
-		it("should find maximum image size limit", { timeout: 600000 }, async () => {
-			const MB = 1024 * 1024;
-			// Anthropic limit: 5MB per image
-			const sizes = [1, 2, 3, 4, 5, 6];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const sizeMB of sizes) {
-				console.log(`  Testing size: ${sizeMB}MB...`);
-				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
-				const result = await testImageSize(model, imageBase64);
-				if (result.success) {
-					lastSuccess = sizeMB;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Bedrock max image size: ~${lastSuccess}MB (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(1);
-		});
-
-		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
-			// Anthropic limit: 8000px
-			const dimensions = [1000, 2000, 4000, 6000, 8000, 10000];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const dim of dimensions) {
-				console.log(`  Testing dimension: ${dim}x${dim}...`);
-				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
-				const result = await testImageDimensions(model, imageBase64);
-				if (result.success) {
-					lastSuccess = dim;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  Bedrock max dimension: ~${lastSuccess}px (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(6000);
-			expect(lastSuccess).toBeLessThanOrEqual(8000);
-		});
-	});
-
-	// =========================================================================
-	// MAX SIZE IMAGES TEST
-	// =========================================================================
-	// Tests how many images at (or near) max allowed size each provider can handle.
-	// This tests realistic payload limits, not just image count with tiny files.
-	//
-	// Note: A real 8kx8k noise PNG is ~183MB (exceeds all provider limits).
-	// So we test with images sized near each provider's actual size limit.
-	// =========================================================================
-
-	describe("Max Size Images (realistic payload stress test)", () => {
-		// Generate images at specific sizes for each provider's limit
-		const imageCache: Map<number, string> = new Map();
-
-		function getImageAtSize(targetMB: number): string {
-			if (imageCache.has(targetMB)) {
-				return imageCache.get(targetMB)!;
-			}
-			console.log(`  Generating ~${targetMB}MB noise image...`);
-			const imageBase64 = generateImageWithSize(targetMB * 1024 * 1024, `stress-${targetMB}mb.png`);
-			const actualSize = Buffer.from(imageBase64, "base64").length;
-			console.log(`    Actual size: ${(actualSize / 1024 / 1024).toFixed(2)}MB`);
-			imageCache.set(targetMB, imageBase64);
-			return imageBase64;
-		}
-
-		// Anthropic - 5MB per image limit, 32MB total request, 100 image count
-		// Using 3MB to stay under 5MB limit (generateImageWithSize has overhead)
-		it.skipIf(!process.env.ANTHROPIC_API_KEY)(
-			"Anthropic: max ~3MB images before rejection",
-			{ timeout: 900000 },
-			async () => {
-				const model = getModel("anthropic", "claude-3-5-haiku-20241022");
-				const image3mb = getImageAtSize(3);
-				// 32MB total limit / ~4MB actual = ~8 images
-				const counts = [1, 2, 4, 6, 8, 10, 12];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x ~3MB images...`);
-					const result = await testImageCount(model, count, image3mb);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  Anthropic max ~3MB images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-
-		// Amazon Bedrock (Claude) - 5MB per image limit, same as Anthropic direct
-		// Using 3MB to stay under 5MB limit
-		it.skipIf(!hasBedrockCredentials())(
-			"Bedrock: max ~3MB images before rejection",
-			{ timeout: 900000 },
-			async () => {
-				const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
-				const image3mb = getImageAtSize(3);
-				// Similar to Anthropic, test progressively
-				const counts = [1, 2, 4, 6, 8, 10, 12];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x ~3MB images...`);
-					const result = await testImageCount(model, count, image3mb);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  Bedrock max ~3MB images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-
-		// OpenAI - 20MB per image documented, we found ≥25MB works
-		// Test with 15MB images to stay safely under limit
-		it.skipIf(!process.env.OPENAI_API_KEY)(
-			"OpenAI: max ~15MB images before rejection",
-			{ timeout: 1800000 },
-			async () => {
-				const model = getModel("openai", "gpt-4o-mini");
-				const image15mb = getImageAtSize(15);
-				// Test progressively
-				const counts = [1, 2, 5, 10, 20];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x ~15MB images...`);
-					const result = await testImageCount(model, count, image15mb);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  OpenAI max ~15MB images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-
-		// Gemini - very permissive, ≥40MB per image works
-		// Test with 20MB images
-		it.skipIf(!process.env.GEMINI_API_KEY)(
-			"Gemini: max ~20MB images before rejection",
-			{ timeout: 1800000 },
-			async () => {
-				const model = getModel("google", "gemini-2.5-flash");
-				const image20mb = getImageAtSize(20);
-				// Test progressively
-				const counts = [1, 2, 5, 10, 20, 50];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x ~20MB images...`);
-					const result = await testImageCount(model, count, image20mb);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  Gemini max ~20MB images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-
-		// Mistral - 8 image limit, ~15MB per image
-		// Test with 10MB images (safely under limit)
-		it.skipIf(!process.env.MISTRAL_API_KEY)(
-			"Mistral: max ~10MB images before rejection",
-			{ timeout: 600000 },
-			async () => {
-				const model = getModel("mistral", "pixtral-12b");
-				const image10mb = getImageAtSize(10);
-				// Known limit is 8 images
-				const counts = [1, 2, 4, 6, 8, 9];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x ~10MB images...`);
-					const result = await testImageCount(model, count, image10mb);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  Mistral max ~10MB images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-
-		// xAI - 25MB per image limit (26214400 bytes exact)
-		// Test with 20MB images (safely under limit)
-		it.skipIf(!process.env.XAI_API_KEY)("xAI: max ~20MB images before rejection", { timeout: 1200000 }, async () => {
-			const model = getModel("xai", "grok-2-vision");
-			const image20mb = getImageAtSize(20);
-			// Test progressively
-			const counts = [1, 2, 5, 10, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const count of counts) {
-				console.log(`  Testing ${count} x ~20MB images...`);
-				const result = await testImageCount(model, count, image20mb);
-				if (result.success) {
-					lastSuccess = count;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  xAI max ~20MB images: ${lastSuccess} (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(1);
-		});
-
-		// Groq - very limited (5 images, ~5760px max due to 33M pixel limit)
-		// 8k images (64M pixels) exceed limit, so test with 5760px images instead
-		it.skipIf(!process.env.GROQ_API_KEY)(
-			"Groq: max 5760px images before rejection",
-			{ timeout: 600000 },
-			async () => {
-				const model = getModel("groq", "meta-llama/llama-4-scout-17b-16e-instruct");
-				// Generate 5760x5760 image (33177600 pixels = Groq's limit)
-				console.log("  Generating 5760x5760 test image for Groq...");
-				const image5760 = generateImage(5760, 5760, "stress-5760.png");
-
-				// Known limit is 5 images
-				const counts = [1, 2, 3, 4, 5, 6];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x 5760px images...`);
-					const result = await testImageCount(model, count, image5760);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  Groq max 5760px images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-
-		// zAI - ≥20MB per image, context-window limited (65k tokens)
-		// Test with 15MB images
-		it.skipIf(!process.env.ZAI_API_KEY)("zAI: max ~15MB images before rejection", { timeout: 1200000 }, async () => {
-			const model = getModel("zai", "glm-4.5v");
-			const image15mb = getImageAtSize(15);
-			// Context-limited, test progressively
-			const counts = [1, 2, 5, 10, 20];
-
-			let lastSuccess = 0;
-			let lastError: string | undefined;
-
-			for (const count of counts) {
-				console.log(`  Testing ${count} x ~15MB images...`);
-				const result = await testImageCount(model, count, image15mb);
-				if (result.success) {
-					lastSuccess = count;
-					console.log(`    SUCCESS`);
-				} else {
-					lastError = result.error;
-					console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-					break;
-				}
-			}
-
-			console.log(`\n  zAI max ~15MB images: ${lastSuccess} (last error: ${lastError})`);
-			expect(lastSuccess).toBeGreaterThanOrEqual(1);
-		});
-
-		// OpenRouter - ~10MB per image, context-window limited (65k tokens)
-		// Test with 5MB images (safer size)
-		it.skipIf(!process.env.OPENROUTER_API_KEY)(
-			"OpenRouter: max ~5MB images before rejection",
-			{ timeout: 900000 },
-			async () => {
-				const model = getModel("openrouter", "z-ai/glm-4.5v");
-				const image5mb = getImageAtSize(5);
-				// Context-limited, test progressively
-				const counts = [1, 2, 5, 10, 20];
-
-				let lastSuccess = 0;
-				let lastError: string | undefined;
-
-				for (const count of counts) {
-					console.log(`  Testing ${count} x ~5MB images...`);
-					const result = await testImageCount(model, count, image5mb);
-					if (result.success) {
-						lastSuccess = count;
-						console.log(`    SUCCESS`);
-					} else {
-						lastError = result.error;
-						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
-						break;
-					}
-				}
-
-				console.log(`\n  OpenRouter max ~5MB images: ${lastSuccess} (last error: ${lastError})`);
-				expect(lastSuccess).toBeGreaterThanOrEqual(1);
-			},
-		);
-	});
-});
diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts
index f2c9ff6e..2a140292 100644
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@@ -155,6 +155,7 @@ async function handleStreaming<TApi extends Api>(model: Model<TApi>, options?: O
 
 	const context: Context = {
 		messages: [{ role: "user", content: "Count from 1 to 3", timestamp: Date.now() }],
+		systemPrompt: "You are a helpful assistant.",
 	};
 
 	const s = stream(model, context, options);
@@ -190,6 +191,7 @@ async function handleThinking<TApi extends Api>(model: Model<TApi>, options?: Op
 				timestamp: Date.now(),
 			},
 		],
+		systemPrompt: "You are a helpful assistant.",
 	};
 
 	const s = stream(model, context, options);
@@ -245,6 +247,7 @@ async function handleImage<TApi extends Api>(model: Model<TApi>, options?: Optio
 				timestamp: Date.now(),
 			},
 		],
+		systemPrompt: "You are a helpful assistant.",
 	};
 
 	const response = await complete(model, context, options);
diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts
index ed5cd918..dc897bb5 100644
--- a/packages/ai/test/tokens.test.ts
+++ b/packages/ai/test/tokens.test.ts
@@ -24,6 +24,7 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
 				timestamp: Date.now(),
 			},
 		],
+		systemPrompt: "You are a helpful assistant.",
 	};
 
 	const controller = new AbortController();
diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts
index 7397034c..4087d306 100644
--- a/packages/ai/test/unicode-surrogate.test.ts
+++ b/packages/ai/test/unicode-surrogate.test.ts
@@ -31,6 +31,7 @@ const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken
  */
 
 async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	const toolCallId = llm.provider === "mistral" ? "testtool1" : "test_1";
 	// Simulate a tool that returns emoji
 	const context: Context = {
 		systemPrompt: "You are a helpful assistant.",
@@ -45,7 +46,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
 				content: [
 					{
 						type: "toolCall",
-						id: "test_1",
+						id: toolCallId,
 						name: "test_tool",
 						arguments: {},
 					},
@@ -77,7 +78,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
 	// Add tool result with various problematic Unicode characters
 	const toolResult: ToolResultMessage = {
 		role: "toolResult",
-		toolCallId: "test_1",
+		toolCallId: toolCallId,
 		toolName: "test_tool",
 		content: [
 			{
@@ -117,6 +118,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
 }
 
 async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	const toolCallId = llm.provider === "mistral" ? "linkedin1" : "linkedin_1";
 	const context: Context = {
 		systemPrompt: "You are a helpful assistant.",
 		messages: [
@@ -130,7 +132,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
 				content: [
 					{
 						type: "toolCall",
-						id: "linkedin_1",
+						id: toolCallId,
 						name: "linkedin_skill",
 						arguments: {},
 					},
@@ -162,7 +164,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
 	// Real-world tool result from LinkedIn with emoji
 	const toolResult: ToolResultMessage = {
 		role: "toolResult",
-		toolCallId: "linkedin_1",
+		toolCallId: toolCallId,
 		toolName: "linkedin_skill",
 		content: [
 			{
@@ -205,6 +207,7 @@ Unanswered Comments: 2
 }
 
 async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	const toolCallId = llm.provider === "mistral" ? "testtool2" : "test_2";
 	const context: Context = {
 		systemPrompt: "You are a helpful assistant.",
 		messages: [
@@ -218,7 +221,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
 				content: [
 					{
 						type: "toolCall",
-						id: "test_2",
+						id: toolCallId,
 						name: "test_tool",
 						arguments: {},
 					},
@@ -253,7 +256,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
 
 	const toolResult: ToolResultMessage = {
 		role: "toolResult",
-		toolCallId: "test_2",
+		toolCallId: toolCallId,
 		toolName: "test_tool",
 		content: [{ type: "text", text: `Text with unpaired surrogate: ${unpairedSurrogate} <- should be sanitized` }],
 		isError: false,
diff --git a/packages/coding-agent/test/agent-session-branching.test.ts b/packages/coding-agent/test/agent-session-branching.test.ts
index c6028e19..00aabbd1 100644
--- a/packages/coding-agent/test/agent-session-branching.test.ts
+++ b/packages/coding-agent/test/agent-session-branching.test.ts
@@ -90,9 +90,9 @@ describe.skipIf(!API_KEY)("AgentSession forking", () => {
 		// After forking, conversation should be empty (forked before the first message)
 		expect(session.messages.length).toBe(0);
 
-		// Session file should exist (new fork)
+		// Session file path should be set, but file is created lazily after first assistant message
 		expect(session.sessionFile).not.toBeNull();
-		expect(existsSync(session.sessionFile!)).toBe(true);
+		expect(existsSync(session.sessionFile!)).toBe(false);
 	});
 
 	it("should support in-memory forking in --no-session mode", async () => {