move pi-mono into companion-cloud as apps/companion-os

- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-21 16:01:09 +00:00 · 2026-03-07 09:22:50 -08:00 · 2026-03-07 09:22:50 -08:00 · 0250f72976
commit 0250f72976
579 changed files with 206942 additions and 0 deletions
--- a/packages/ai/test/abort.test.ts
+++ b/packages/ai/test/abort.test.ts
@ -0,0 +1,339 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete, stream } from "../src/stream.js";
+import type { Api, Context, Model, StreamOptions } from "../src/types.js";
+
+type StreamOptionsWithExtras = StreamOptions & Record<string, unknown>;
+
+import {
+  hasAzureOpenAICredentials,
+  resolveAzureDeploymentName,
+} from "./azure-utils.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const [geminiCliToken, openaiCodexToken] = await Promise.all([
+  resolveApiKey("google-gemini-cli"),
+  resolveApiKey("openai-codex"),
+]);
+
+async function testAbortSignal<TApi extends Api>(
+  llm: Model<TApi>,
+  options: StreamOptionsWithExtras = {},
+) {
+  const context: Context = {
+    messages: [
+      {
+        role: "user",
+        content:
+          "What is 15 + 27? Think step by step. Then list 50 first names.",
+        timestamp: Date.now(),
+      },
+    ],
+    systemPrompt: "You are a helpful assistant.",
+  };
+
+  let abortFired = false;
+  let text = "";
+  const controller = new AbortController();
+  const response = await stream(llm, context, {
+    ...options,
+    signal: controller.signal,
+  });
+  for await (const event of response) {
+    if (abortFired) return;
+    if (event.type === "text_delta" || event.type === "thinking_delta") {
+      text += event.delta;
+    }
+    if (text.length >= 50) {
+      controller.abort();
+      abortFired = true;
+    }
+  }
+  const msg = await response.result();
+
+  // If we get here without throwing, the abort didn't work
+  expect(msg.stopReason).toBe("aborted");
+  expect(msg.content.length).toBeGreaterThan(0);
+
+  context.messages.push(msg);
+  context.messages.push({
+    role: "user",
+    content: "Please continue, but only generate 5 names.",
+    timestamp: Date.now(),
+  });
+
+  const followUp = await complete(llm, context, options);
+  expect(followUp.stopReason).toBe("stop");
+  expect(followUp.content.length).toBeGreaterThan(0);
+}
+
+async function testImmediateAbort<TApi extends Api>(
+  llm: Model<TApi>,
+  options: StreamOptionsWithExtras = {},
+) {
+  const controller = new AbortController();
+
+  controller.abort();
+
+  const context: Context = {
+    messages: [{ role: "user", content: "Hello", timestamp: Date.now() }],
+  };
+
+  const response = await complete(llm, context, {
+    ...options,
+    signal: controller.signal,
+  });
+  expect(response.stopReason).toBe("aborted");
+}
+
+async function testAbortThenNewMessage<TApi extends Api>(
+  llm: Model<TApi>,
+  options: StreamOptionsWithExtras = {},
+) {
+  // First request: abort immediately before any response content arrives
+  const controller = new AbortController();
+  controller.abort();
+
+  const context: Context = {
+    messages: [
+      { role: "user", content: "Hello, how are you?", timestamp: Date.now() },
+    ],
+  };
+
+  const abortedResponse = await complete(llm, context, {
+    ...options,
+    signal: controller.signal,
+  });
+  expect(abortedResponse.stopReason).toBe("aborted");
+  // The aborted message has empty content since we aborted before anything arrived
+  expect(abortedResponse.content.length).toBe(0);
+
+  // Add the aborted assistant message to context (this is what happens in the real coding agent)
+  context.messages.push(abortedResponse);
+
+  // Second request: send a new message - this should work even with the aborted message in context
+  context.messages.push({
+    role: "user",
+    content: "What is 2 + 2?",
+    timestamp: Date.now(),
+  });
+
+  const followUp = await complete(llm, context, options);
+  expect(followUp.stopReason).toBe("stop");
+  expect(followUp.content.length).toBeGreaterThan(0);
+}
+
+describe("AI Providers Abort Tests", () => {
+  describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
+    const llm = getModel("google", "gemini-2.5-flash");
+
+    it("should abort mid-stream", { retry: 3 }, async () => {
+      await testAbortSignal(llm, { thinking: { enabled: true } });
+    });
+
+    it("should handle immediate abort", { retry: 3 }, async () => {
+      await testImmediateAbort(llm, { thinking: { enabled: true } });
+    });
+  });
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Completions Provider Abort",
+    () => {
+      const { compat: _compat, ...baseModel } = getModel(
+        "openai",
+        "gpt-4o-mini",
+      )!;
+      void _compat;
+      const llm: Model<"openai-completions"> = {
+        ...baseModel,
+        api: "openai-completions",
+      };
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Responses Provider Abort",
+    () => {
+      const llm = getModel("openai", "gpt-5-mini");
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+    },
+  );
+
+  describe.skipIf(!hasAzureOpenAICredentials())(
+    "Azure OpenAI Responses Provider Abort",
+    () => {
+      const llm = getModel("azure-openai-responses", "gpt-4o-mini");
+      const azureDeploymentName = resolveAzureDeploymentName(llm.id);
+      const azureOptions = azureDeploymentName ? { azureDeploymentName } : {};
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm, azureOptions);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm, azureOptions);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)(
+    "Anthropic Provider Abort",
+    () => {
+      const llm = getModel("anthropic", "claude-opus-4-1-20250805");
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm, {
+          thinkingEnabled: true,
+          thinkingBudgetTokens: 2048,
+        });
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm, {
+          thinkingEnabled: true,
+          thinkingBudgetTokens: 2048,
+        });
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.MISTRAL_API_KEY)(
+    "Mistral Provider Abort",
+    () => {
+      const llm = getModel("mistral", "devstral-medium-latest");
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.MINIMAX_API_KEY)(
+    "MiniMax Provider Abort",
+    () => {
+      const llm = getModel("minimax", "MiniMax-M2.1");
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.KIMI_API_KEY)(
+    "Kimi For Coding Provider Abort",
+    () => {
+      const llm = getModel("kimi-coding", "kimi-k2-thinking");
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
+    "Vercel AI Gateway Provider Abort",
+    () => {
+      const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm);
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+    },
+  );
+
+  // Google Gemini CLI / Antigravity share the same provider, so one test covers both
+  describe("Google Gemini CLI Provider Abort", () => {
+    it.skipIf(!geminiCliToken)(
+      "should abort mid-stream",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+        await testAbortSignal(llm, { apiKey: geminiCliToken });
+      },
+    );
+
+    it.skipIf(!geminiCliToken)(
+      "should handle immediate abort",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+        await testImmediateAbort(llm, { apiKey: geminiCliToken });
+      },
+    );
+  });
+
+  describe("OpenAI Codex Provider Abort", () => {
+    it.skipIf(!openaiCodexToken)(
+      "should abort mid-stream",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("openai-codex", "gpt-5.2-codex");
+        await testAbortSignal(llm, { apiKey: openaiCodexToken });
+      },
+    );
+
+    it.skipIf(!openaiCodexToken)(
+      "should handle immediate abort",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("openai-codex", "gpt-5.2-codex");
+        await testImmediateAbort(llm, { apiKey: openaiCodexToken });
+      },
+    );
+  });
+
+  describe.skipIf(!hasBedrockCredentials())(
+    "Amazon Bedrock Provider Abort",
+    () => {
+      const llm = getModel(
+        "amazon-bedrock",
+        "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+      );
+
+      it("should abort mid-stream", { retry: 3 }, async () => {
+        await testAbortSignal(llm, { reasoning: "medium" });
+      });
+
+      it("should handle immediate abort", { retry: 3 }, async () => {
+        await testImmediateAbort(llm);
+      });
+
+      it("should handle abort then new message", { retry: 3 }, async () => {
+        await testAbortThenNewMessage(llm);
+      });
+    },
+  );
+});
--- a/packages/ai/test/anthropic-tool-name-normalization.test.ts
+++ b/packages/ai/test/anthropic-tool-name-normalization.test.ts
@ -0,0 +1,217 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Context, Tool } from "../src/types.js";
+import { resolveApiKey } from "./oauth.js";
+
+const oauthToken = await resolveApiKey("anthropic");
+
+/**
+ * Tests for Anthropic OAuth tool name normalization.
+ *
+ * When using Claude Code OAuth, tool names must match CC's canonical casing.
+ * The normalization should:
+ * 1. Convert tool names that match CC tools (case-insensitive) to CC casing on outbound
+ * 2. Convert tool names back to the original casing on inbound
+ *
+ * This is a simple case-insensitive lookup, NOT a mapping of different names.
+ * e.g., "todowrite" -> "TodoWrite" -> "todowrite" (round-trip works)
+ *
+ * The old `find -> Glob` mapping was WRONG because:
+ * - Outbound: "find" -> "Glob"
+ * - Inbound: "Glob" -> ??? (no tool named "glob" in context.tools, only "find")
+ * - Result: tool call has name "Glob" but no tool exists with that name
+ */
+describe.skipIf(!oauthToken)("Anthropic OAuth tool name normalization", () => {
+  const model = getModel("anthropic", "claude-sonnet-4-20250514");
+
+  it("should normalize user-defined tool matching CC name (todowrite -> TodoWrite -> todowrite)", async () => {
+    // User defines a tool named "todowrite" (lowercase)
+    // CC has "TodoWrite" - this should round-trip correctly
+    const todoTool: Tool = {
+      name: "todowrite",
+      description: "Write a todo item",
+      parameters: Type.Object({
+        task: Type.String({ description: "The task to add" }),
+      }),
+    };
+
+    const context: Context = {
+      systemPrompt:
+        "You are a helpful assistant. Use the todowrite tool when asked to add todos.",
+      messages: [
+        {
+          role: "user",
+          content: "Add a todo: buy milk. Use the todowrite tool.",
+          timestamp: Date.now(),
+        },
+      ],
+      tools: [todoTool],
+    };
+
+    const s = stream(model, context, { apiKey: oauthToken });
+    let toolCallName: string | undefined;
+
+    for await (const event of s) {
+      if (event.type === "toolcall_end") {
+        const toolCall = event.partial.content[event.contentIndex];
+        if (toolCall.type === "toolCall") {
+          toolCallName = toolCall.name;
+        }
+      }
+    }
+
+    const response = await s.result();
+    expect(response.stopReason, `Error: ${response.errorMessage}`).toBe(
+      "toolUse",
+    );
+
+    // The tool call should come back with the ORIGINAL name "todowrite", not "TodoWrite"
+    expect(toolCallName).toBe("todowrite");
+  });
+
+  it("should handle pi's built-in tools (read, write, edit, bash)", async () => {
+    // Pi's tools use lowercase names, CC uses PascalCase
+    const readTool: Tool = {
+      name: "read",
+      description: "Read a file",
+      parameters: Type.Object({
+        path: Type.String({ description: "File path" }),
+      }),
+    };
+
+    const context: Context = {
+      systemPrompt:
+        "You are a helpful assistant. Use the read tool to read files.",
+      messages: [
+        {
+          role: "user",
+          content: "Read the file /tmp/test.txt using the read tool.",
+          timestamp: Date.now(),
+        },
+      ],
+      tools: [readTool],
+    };
+
+    const s = stream(model, context, { apiKey: oauthToken });
+    let toolCallName: string | undefined;
+
+    for await (const event of s) {
+      if (event.type === "toolcall_end") {
+        const toolCall = event.partial.content[event.contentIndex];
+        if (toolCall.type === "toolCall") {
+          toolCallName = toolCall.name;
+        }
+      }
+    }
+
+    const response = await s.result();
+    expect(response.stopReason, `Error: ${response.errorMessage}`).toBe(
+      "toolUse",
+    );
+
+    // The tool call should come back with the ORIGINAL name "read", not "Read"
+    expect(toolCallName).toBe("read");
+  });
+
+  it("should NOT map find to Glob - find is not a CC tool name", async () => {
+    // Pi has a "find" tool, CC has "Glob" - these are DIFFERENT tools
+    // The old code incorrectly mapped find -> Glob, which broke the round-trip
+    // because there's no tool named "glob" in context.tools
+    const findTool: Tool = {
+      name: "find",
+      description: "Find files by pattern",
+      parameters: Type.Object({
+        pattern: Type.String({ description: "Glob pattern" }),
+      }),
+    };
+
+    const context: Context = {
+      systemPrompt:
+        "You are a helpful assistant. Use the find tool to search for files.",
+      messages: [
+        {
+          role: "user",
+          content: "Find all .ts files using the find tool.",
+          timestamp: Date.now(),
+        },
+      ],
+      tools: [findTool],
+    };
+
+    const s = stream(model, context, { apiKey: oauthToken });
+    let toolCallName: string | undefined;
+
+    for await (const event of s) {
+      if (event.type === "toolcall_end") {
+        const toolCall = event.partial.content[event.contentIndex];
+        if (toolCall.type === "toolCall") {
+          toolCallName = toolCall.name;
+        }
+      }
+    }
+
+    const response = await s.result();
+    expect(response.stopReason, `Error: ${response.errorMessage}`).toBe(
+      "toolUse",
+    );
+
+    // With the BROKEN find -> Glob mapping:
+    // - Sent as "Glob" to Anthropic
+    // - Received back as "Glob"
+    // - fromClaudeCodeName("Glob", tools) looks for tool.name.toLowerCase() === "glob"
+    // - No match (tool is named "find"), returns "Glob"
+    // - Test fails: toolCallName is "Glob" instead of "find"
+    //
+    // With the CORRECT implementation (no find->Glob mapping):
+    // - Sent as "find" to Anthropic (no CC tool named "Find")
+    // - Received back as "find"
+    // - Test passes: toolCallName is "find"
+    expect(toolCallName).toBe("find");
+  });
+
+  it("should handle custom tools that don't match any CC tool names", async () => {
+    // A completely custom tool should pass through unchanged
+    const customTool: Tool = {
+      name: "my_custom_tool",
+      description: "A custom tool",
+      parameters: Type.Object({
+        input: Type.String({ description: "Input value" }),
+      }),
+    };
+
+    const context: Context = {
+      systemPrompt:
+        "You are a helpful assistant. Use my_custom_tool when asked.",
+      messages: [
+        {
+          role: "user",
+          content: "Use my_custom_tool with input 'hello'.",
+          timestamp: Date.now(),
+        },
+      ],
+      tools: [customTool],
+    };
+
+    const s = stream(model, context, { apiKey: oauthToken });
+    let toolCallName: string | undefined;
+
+    for await (const event of s) {
+      if (event.type === "toolcall_end") {
+        const toolCall = event.partial.content[event.contentIndex];
+        if (toolCall.type === "toolCall") {
+          toolCallName = toolCall.name;
+        }
+      }
+    }
+
+    const response = await s.result();
+    expect(response.stopReason, `Error: ${response.errorMessage}`).toBe(
+      "toolUse",
+    );
+
+    // Custom tool names should pass through unchanged
+    expect(toolCallName).toBe("my_custom_tool");
+  });
+});
--- a/packages/ai/test/azure-utils.ts
+++ b/packages/ai/test/azure-utils.ts
@ -0,0 +1,34 @@
+/**
+ * Utility functions for Azure OpenAI tests
+ */
+
+function parseDeploymentNameMap(
+  value: string | undefined,
+): Map<string, string> {
+  const map = new Map<string, string>();
+  if (!value) return map;
+  for (const entry of value.split(",")) {
+    const trimmed = entry.trim();
+    if (!trimmed) continue;
+    const [modelId, deploymentName] = trimmed.split("=", 2);
+    if (!modelId || !deploymentName) continue;
+    map.set(modelId.trim(), deploymentName.trim());
+  }
+  return map;
+}
+
+export function hasAzureOpenAICredentials(): boolean {
+  const hasKey = !!process.env.AZURE_OPENAI_API_KEY;
+  const hasBaseUrl = !!(
+    process.env.AZURE_OPENAI_BASE_URL || process.env.AZURE_OPENAI_RESOURCE_NAME
+  );
+  return hasKey && hasBaseUrl;
+}
+
+export function resolveAzureDeploymentName(
+  modelId: string,
+): string | undefined {
+  const mapValue = process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP;
+  if (!mapValue) return undefined;
+  return parseDeploymentNameMap(mapValue).get(modelId);
+}
--- a/packages/ai/test/bedrock-models.test.ts
+++ b/packages/ai/test/bedrock-models.test.ts
@ -0,0 +1,72 @@
+/**
+ * A test suite to ensure all configured Amazon Bedrock models are usable.
+ *
+ * This is here to make sure we got correct model identifiers from models.dev and other sources.
+ * Because Amazon Bedrock requires cross-region inference in some models,
+ * plain model identifiers are not always usable and it requires tweaking of model identifiers to use cross-region inference.
+ * See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system for more details.
+ *
+ * This test suite is not enabled by default unless AWS credentials and `BEDROCK_EXTENSIVE_MODEL_TEST` environment variables are set.
+ * This test suite takes ~2 minutes to run. Because not all models are available in all regions,
+ * it's recommended to use `us-west-2` region for best coverage for running this test suite.
+ *
+ * You can run this test suite with:
+ * ```bash
+ * $ AWS_REGION=us-west-2 BEDROCK_EXTENSIVE_MODEL_TEST=1 AWS_PROFILE=... npm test -- ./test/bedrock-models.test.ts
+ * ```
+ */
+
+import { describe, expect, it } from "vitest";
+import { getModels } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { Context } from "../src/types.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+
+describe("Amazon Bedrock Models", () => {
+  const models = getModels("amazon-bedrock");
+
+  it("should get all available Bedrock models", () => {
+    expect(models.length).toBeGreaterThan(0);
+    console.log(`Found ${models.length} Bedrock models`);
+  });
+
+  if (hasBedrockCredentials() && process.env.BEDROCK_EXTENSIVE_MODEL_TEST) {
+    for (const model of models) {
+      it(
+        `should make a simple request with ${model.id}`,
+        { timeout: 10_000 },
+        async () => {
+          const context: Context = {
+            systemPrompt: "You are a helpful assistant. Be extremely concise.",
+            messages: [
+              {
+                role: "user",
+                content: "Reply with exactly: 'OK'",
+                timestamp: Date.now(),
+              },
+            ],
+          };
+
+          const response = await complete(model, context);
+
+          expect(response.role).toBe("assistant");
+          expect(response.content).toBeTruthy();
+          expect(response.content.length).toBeGreaterThan(0);
+          expect(
+            response.usage.input + response.usage.cacheRead,
+          ).toBeGreaterThan(0);
+          expect(response.usage.output).toBeGreaterThan(0);
+          expect(response.errorMessage).toBeFalsy();
+
+          const textContent = response.content
+            .filter((b) => b.type === "text")
+            .map((b) => (b.type === "text" ? b.text : ""))
+            .join("")
+            .trim();
+          expect(textContent).toBeTruthy();
+          console.log(`${model.id}: ${textContent.substring(0, 100)}`);
+        },
+      );
+    }
+  }
+});
--- a/packages/ai/test/bedrock-utils.ts
+++ b/packages/ai/test/bedrock-utils.ts
@ -0,0 +1,18 @@
+/**
+ * Utility functions for Amazon Bedrock tests
+ */
+
+/**
+ * Check if any valid AWS credentials are configured for Bedrock.
+ * Returns true if any of the following are set:
+ * - AWS_PROFILE (named profile from ~/.aws/credentials)
+ * - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM keys)
+ * - AWS_BEARER_TOKEN_BEDROCK (Bedrock API key)
+ */
+export function hasBedrockCredentials(): boolean {
+  return !!(
+    process.env.AWS_PROFILE ||
+    (process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
+    process.env.AWS_BEARER_TOKEN_BEDROCK
+  );
+}
--- a/packages/ai/test/cache-retention.test.ts
+++ b/packages/ai/test/cache-retention.test.ts
@ -0,0 +1,352 @@
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Context } from "../src/types.js";
+
+describe("Cache Retention (PI_CACHE_RETENTION)", () => {
+  const originalEnv = process.env.PI_CACHE_RETENTION;
+
+  beforeEach(() => {
+    delete process.env.PI_CACHE_RETENTION;
+  });
+
+  afterEach(() => {
+    if (originalEnv !== undefined) {
+      process.env.PI_CACHE_RETENTION = originalEnv;
+    } else {
+      delete process.env.PI_CACHE_RETENTION;
+    }
+  });
+
+  const context: Context = {
+    systemPrompt: "You are a helpful assistant.",
+    messages: [{ role: "user", content: "Hello", timestamp: Date.now() }],
+  };
+
+  describe("Anthropic Provider", () => {
+    it.skipIf(!process.env.ANTHROPIC_API_KEY)(
+      "should use default cache TTL (no ttl field) when PI_CACHE_RETENTION is not set",
+      async () => {
+        const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+        let capturedPayload: any = null;
+
+        const s = stream(model, context, {
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // Consume the stream to trigger the request
+        for await (const _ of s) {
+          // Just consume
+        }
+
+        expect(capturedPayload).not.toBeNull();
+        // System prompt should have cache_control without ttl
+        expect(capturedPayload.system).toBeDefined();
+        expect(capturedPayload.system[0].cache_control).toEqual({
+          type: "ephemeral",
+        });
+      },
+    );
+
+    it.skipIf(!process.env.ANTHROPIC_API_KEY)(
+      "should use 1h cache TTL when PI_CACHE_RETENTION=long",
+      async () => {
+        process.env.PI_CACHE_RETENTION = "long";
+        const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+        let capturedPayload: any = null;
+
+        const s = stream(model, context, {
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // Consume the stream to trigger the request
+        for await (const _ of s) {
+          // Just consume
+        }
+
+        expect(capturedPayload).not.toBeNull();
+        // System prompt should have cache_control with ttl: "1h"
+        expect(capturedPayload.system).toBeDefined();
+        expect(capturedPayload.system[0].cache_control).toEqual({
+          type: "ephemeral",
+          ttl: "1h",
+        });
+      },
+    );
+
+    it("should not add ttl when baseUrl is not api.anthropic.com", async () => {
+      process.env.PI_CACHE_RETENTION = "long";
+
+      // Create a model with a different baseUrl (simulating a proxy)
+      const baseModel = getModel("anthropic", "claude-3-5-haiku-20241022");
+      const proxyModel = {
+        ...baseModel,
+        baseUrl: "https://my-proxy.example.com/v1",
+      };
+
+      let capturedPayload: any = null;
+
+      // We can't actually make the request (no proxy), but we can verify the payload
+      // by using a mock or checking the logic directly
+      // For this test, we'll import the helper directly
+
+      // Since we can't easily test this without mocking, we'll skip the actual API call
+      // and just verify the helper logic works correctly
+      const { streamAnthropic } = await import("../src/providers/anthropic.js");
+
+      try {
+        const s = streamAnthropic(proxyModel, context, {
+          apiKey: "fake-key",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // This will fail since we're using a fake key and fake proxy, but the payload should be captured
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      // The payload should have been captured before the error
+      if (capturedPayload) {
+        // System prompt should have cache_control WITHOUT ttl (proxy URL)
+        expect(capturedPayload.system[0].cache_control).toEqual({
+          type: "ephemeral",
+        });
+      }
+    });
+
+    it("should omit cache_control when cacheRetention is none", async () => {
+      const baseModel = getModel("anthropic", "claude-3-5-haiku-20241022");
+      let capturedPayload: any = null;
+
+      const { streamAnthropic } = await import("../src/providers/anthropic.js");
+
+      try {
+        const s = streamAnthropic(baseModel, context, {
+          apiKey: "fake-key",
+          cacheRetention: "none",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      expect(capturedPayload).not.toBeNull();
+      expect(capturedPayload.system[0].cache_control).toBeUndefined();
+    });
+
+    it("should add cache_control to string user messages", async () => {
+      const baseModel = getModel("anthropic", "claude-3-5-haiku-20241022");
+      let capturedPayload: any = null;
+
+      const { streamAnthropic } = await import("../src/providers/anthropic.js");
+
+      try {
+        const s = streamAnthropic(baseModel, context, {
+          apiKey: "fake-key",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      expect(capturedPayload).not.toBeNull();
+      const lastMessage =
+        capturedPayload.messages[capturedPayload.messages.length - 1];
+      expect(Array.isArray(lastMessage.content)).toBe(true);
+      const lastBlock = lastMessage.content[lastMessage.content.length - 1];
+      expect(lastBlock.cache_control).toEqual({ type: "ephemeral" });
+    });
+
+    it("should set 1h cache TTL when cacheRetention is long", async () => {
+      const baseModel = getModel("anthropic", "claude-3-5-haiku-20241022");
+      let capturedPayload: any = null;
+
+      const { streamAnthropic } = await import("../src/providers/anthropic.js");
+
+      try {
+        const s = streamAnthropic(baseModel, context, {
+          apiKey: "fake-key",
+          cacheRetention: "long",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      expect(capturedPayload).not.toBeNull();
+      expect(capturedPayload.system[0].cache_control).toEqual({
+        type: "ephemeral",
+        ttl: "1h",
+      });
+    });
+  });
+
+  describe("OpenAI Responses Provider", () => {
+    it.skipIf(!process.env.OPENAI_API_KEY)(
+      "should not set prompt_cache_retention when PI_CACHE_RETENTION is not set",
+      async () => {
+        const model = getModel("openai", "gpt-4o-mini");
+        let capturedPayload: any = null;
+
+        const s = stream(model, context, {
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // Consume the stream to trigger the request
+        for await (const _ of s) {
+          // Just consume
+        }
+
+        expect(capturedPayload).not.toBeNull();
+        expect(capturedPayload.prompt_cache_retention).toBeUndefined();
+      },
+    );
+
+    it.skipIf(!process.env.OPENAI_API_KEY)(
+      "should set prompt_cache_retention to 24h when PI_CACHE_RETENTION=long",
+      async () => {
+        process.env.PI_CACHE_RETENTION = "long";
+        const model = getModel("openai", "gpt-4o-mini");
+        let capturedPayload: any = null;
+
+        const s = stream(model, context, {
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // Consume the stream to trigger the request
+        for await (const _ of s) {
+          // Just consume
+        }
+
+        expect(capturedPayload).not.toBeNull();
+        expect(capturedPayload.prompt_cache_retention).toBe("24h");
+      },
+    );
+
+    it("should not set prompt_cache_retention when baseUrl is not api.openai.com", async () => {
+      process.env.PI_CACHE_RETENTION = "long";
+
+      // Create a model with a different baseUrl (simulating a proxy)
+      const baseModel = getModel("openai", "gpt-4o-mini");
+      const proxyModel = {
+        ...baseModel,
+        baseUrl: "https://my-proxy.example.com/v1",
+      };
+
+      let capturedPayload: any = null;
+
+      const { streamOpenAIResponses } =
+        await import("../src/providers/openai-responses.js");
+
+      try {
+        const s = streamOpenAIResponses(proxyModel, context, {
+          apiKey: "fake-key",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // This will fail since we're using a fake key and fake proxy, but the payload should be captured
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      // The payload should have been captured before the error
+      if (capturedPayload) {
+        expect(capturedPayload.prompt_cache_retention).toBeUndefined();
+      }
+    });
+
+    it("should omit prompt_cache_key when cacheRetention is none", async () => {
+      const model = getModel("openai", "gpt-4o-mini");
+      let capturedPayload: any = null;
+
+      const { streamOpenAIResponses } =
+        await import("../src/providers/openai-responses.js");
+
+      try {
+        const s = streamOpenAIResponses(model, context, {
+          apiKey: "fake-key",
+          cacheRetention: "none",
+          sessionId: "session-1",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      expect(capturedPayload).not.toBeNull();
+      expect(capturedPayload.prompt_cache_key).toBeUndefined();
+      expect(capturedPayload.prompt_cache_retention).toBeUndefined();
+    });
+
+    it("should set prompt_cache_retention when cacheRetention is long", async () => {
+      const model = getModel("openai", "gpt-4o-mini");
+      let capturedPayload: any = null;
+
+      const { streamOpenAIResponses } =
+        await import("../src/providers/openai-responses.js");
+
+      try {
+        const s = streamOpenAIResponses(model, context, {
+          apiKey: "fake-key",
+          cacheRetention: "long",
+          sessionId: "session-2",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        for await (const event of s) {
+          if (event.type === "error") break;
+        }
+      } catch {
+        // Expected to fail
+      }
+
+      expect(capturedPayload).not.toBeNull();
+      expect(capturedPayload.prompt_cache_key).toBe("session-2");
+      expect(capturedPayload.prompt_cache_retention).toBe("24h");
+    });
+  });
+});
--- a/packages/ai/test/context-overflow.test.ts
+++ b/packages/ai/test/context-overflow.test.ts
@ -0,0 +1,864 @@
+/**
+ * Test context overflow error handling across providers.
+ *
+ * Context overflow occurs when the input (prompt + history) exceeds
+ * the model's context window. This is different from output token limits.
+ *
+ * Expected behavior: All providers should return stopReason: "error"
+ * with an errorMessage that indicates the context was too large,
+ * OR (for z.ai) return successfully with usage.input > contextWindow.
+ *
+ * The isContextOverflow() function must return true for all providers.
+ */
+
+import type { ChildProcess } from "child_process";
+import { execSync, spawn } from "child_process";
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { AssistantMessage, Context, Model, Usage } from "../src/types.js";
+import { isContextOverflow } from "../src/utils/overflow.js";
+import { hasAzureOpenAICredentials } from "./azure-utils.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+  resolveApiKey("github-copilot"),
+  resolveApiKey("google-gemini-cli"),
+  resolveApiKey("google-antigravity"),
+  resolveApiKey("openai-codex"),
+]);
+const [githubCopilotToken, geminiCliToken, antigravityToken, openaiCodexToken] =
+  oauthTokens;
+
+// Lorem ipsum paragraph for realistic token estimation
+const LOREM_IPSUM = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. `;
+
+// Generate a string that will exceed the context window
+// Using chars/4 as token estimate (works better with varied text than repeated chars)
+function generateOverflowContent(contextWindow: number): string {
+  const targetTokens = contextWindow + 10000; // Exceed by 10k tokens
+  const targetChars = targetTokens * 4 * 1.5;
+  const repetitions = Math.ceil(targetChars / LOREM_IPSUM.length);
+  return LOREM_IPSUM.repeat(repetitions);
+}
+
+interface OverflowResult {
+  provider: string;
+  model: string;
+  contextWindow: number;
+  stopReason: string;
+  errorMessage: string | undefined;
+  usage: Usage;
+  hasUsageData: boolean;
+  response: AssistantMessage;
+}
+
+async function testContextOverflow(
+  model: Model<any>,
+  apiKey: string,
+): Promise<OverflowResult> {
+  const overflowContent = generateOverflowContent(model.contextWindow);
+
+  const context: Context = {
+    systemPrompt: "You are a helpful assistant.",
+    messages: [
+      {
+        role: "user",
+        content: overflowContent,
+        timestamp: Date.now(),
+      },
+    ],
+  };
+
+  const response = await complete(model, context, { apiKey });
+
+  const hasUsageData = response.usage.input > 0 || response.usage.cacheRead > 0;
+
+  return {
+    provider: model.provider,
+    model: model.id,
+    contextWindow: model.contextWindow,
+    stopReason: response.stopReason,
+    errorMessage: response.errorMessage,
+    usage: response.usage,
+    hasUsageData,
+    response,
+  };
+}
+
+function logResult(result: OverflowResult) {
+  console.log(`\n${result.provider} / ${result.model}:`);
+  console.log(`  contextWindow: ${result.contextWindow}`);
+  console.log(`  stopReason: ${result.stopReason}`);
+  console.log(`  errorMessage: ${result.errorMessage}`);
+  console.log(`  usage: ${JSON.stringify(result.usage)}`);
+  console.log(`  hasUsageData: ${result.hasUsageData}`);
+}
+
+// =============================================================================
+// Anthropic
+// Expected pattern: "prompt is too long: X tokens > Y maximum"
+// =============================================================================
+
+describe("Context overflow error handling", () => {
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
+    it("claude-3-5-haiku - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+      const result = await testContextOverflow(
+        model,
+        process.env.ANTHROPIC_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(/prompt is too long/i);
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)(
+    "Anthropic (OAuth)",
+    () => {
+      it("claude-sonnet-4 - should detect overflow via isContextOverflow", async () => {
+        const model = getModel("anthropic", "claude-sonnet-4-20250514");
+        const result = await testContextOverflow(
+          model,
+          process.env.ANTHROPIC_OAUTH_TOKEN!,
+        );
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(result.errorMessage).toMatch(/prompt is too long/i);
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      }, 120000);
+    },
+  );
+
+  // =============================================================================
+  // GitHub Copilot (OAuth)
+  // Tests both OpenAI and Anthropic models via Copilot
+  // =============================================================================
+
+  describe("GitHub Copilot (OAuth)", () => {
+    // OpenAI model via Copilot
+    it.skipIf(!githubCopilotToken)(
+      "gpt-4o - should detect overflow via isContextOverflow",
+      async () => {
+        const model = getModel("github-copilot", "gpt-4o");
+        const result = await testContextOverflow(model, githubCopilotToken!);
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(result.errorMessage).toMatch(/exceeds the limit of \d+/i);
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      },
+      120000,
+    );
+
+    // Anthropic model via Copilot
+    it.skipIf(!githubCopilotToken)(
+      "claude-sonnet-4 - should detect overflow via isContextOverflow",
+      async () => {
+        const model = getModel("github-copilot", "claude-sonnet-4");
+        const result = await testContextOverflow(model, githubCopilotToken!);
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(result.errorMessage).toMatch(
+          /exceeds the limit of \d+|input is too long/i,
+        );
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      },
+      120000,
+    );
+  });
+
+  // =============================================================================
+  // OpenAI
+  // Expected pattern: "exceeds the context window"
+  // =============================================================================
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
+    it("gpt-4o-mini - should detect overflow via isContextOverflow", async () => {
+      const model = { ...getModel("openai", "gpt-4o-mini") };
+      model.api = "openai-completions" as any;
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENAI_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(/maximum context length/i);
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
+    it("gpt-4o - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("openai", "gpt-4o");
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENAI_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(/exceeds the context window/i);
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  describe.skipIf(!hasAzureOpenAICredentials())(
+    "Azure OpenAI Responses",
+    () => {
+      it("gpt-4o-mini - should detect overflow via isContextOverflow", async () => {
+        const model = getModel("azure-openai-responses", "gpt-4o-mini");
+        const result = await testContextOverflow(
+          model,
+          process.env.AZURE_OPENAI_API_KEY!,
+        );
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(result.errorMessage).toMatch(/context|maximum/i);
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      }, 120000);
+    },
+  );
+
+  // =============================================================================
+  // Google
+  // Expected pattern: "input token count (X) exceeds the maximum"
+  // =============================================================================
+
+  describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
+    it("gemini-2.0-flash - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("google", "gemini-2.0-flash");
+      const result = await testContextOverflow(
+        model,
+        process.env.GEMINI_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /input token count.*exceeds the maximum/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Google Gemini CLI (OAuth)
+  // Uses same API as Google, expects same error pattern
+  // =============================================================================
+
+  describe("Google Gemini CLI (OAuth)", () => {
+    it.skipIf(!geminiCliToken)(
+      "gemini-2.5-flash - should detect overflow via isContextOverflow",
+      async () => {
+        const model = getModel("google-gemini-cli", "gemini-2.5-flash");
+        const result = await testContextOverflow(model, geminiCliToken!);
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(result.errorMessage).toMatch(
+          /input token count.*exceeds the maximum/i,
+        );
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      },
+      120000,
+    );
+  });
+
+  // =============================================================================
+  // Google Antigravity (OAuth)
+  // Tests both Gemini and Anthropic models via Antigravity
+  // =============================================================================
+
+  describe("Google Antigravity (OAuth)", () => {
+    // Gemini model
+    it.skipIf(!antigravityToken)(
+      "gemini-3-flash - should detect overflow via isContextOverflow",
+      async () => {
+        const model = getModel("google-antigravity", "gemini-3-flash");
+        const result = await testContextOverflow(model, antigravityToken!);
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(result.errorMessage).toMatch(
+          /input token count.*exceeds the maximum/i,
+        );
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      },
+      120000,
+    );
+
+    // Anthropic model via Antigravity
+    it.skipIf(!antigravityToken)(
+      "claude-sonnet-4-5 - should detect overflow via isContextOverflow",
+      async () => {
+        const model = getModel("google-antigravity", "claude-sonnet-4-5");
+        const result = await testContextOverflow(model, antigravityToken!);
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        // Anthropic models return "prompt is too long" pattern
+        expect(result.errorMessage).toMatch(/prompt is too long/i);
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      },
+      120000,
+    );
+  });
+
+  // =============================================================================
+  // OpenAI Codex (OAuth)
+  // Uses ChatGPT Plus/Pro subscription via OAuth
+  // =============================================================================
+
+  describe("OpenAI Codex (OAuth)", () => {
+    it.skipIf(!openaiCodexToken)(
+      "gpt-5.2-codex - should detect overflow via isContextOverflow",
+      async () => {
+        const model = getModel("openai-codex", "gpt-5.2-codex");
+        const result = await testContextOverflow(model, openaiCodexToken!);
+        logResult(result);
+
+        expect(result.stopReason).toBe("error");
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      },
+      120000,
+    );
+  });
+
+  // =============================================================================
+  // Amazon Bedrock
+  // Expected pattern: "Input is too long for requested model"
+  // =============================================================================
+
+  describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
+    it("claude-sonnet-4-5 - should detect overflow via isContextOverflow", async () => {
+      const model = getModel(
+        "amazon-bedrock",
+        "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+      );
+      const result = await testContextOverflow(model, "");
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // xAI
+  // Expected pattern: "maximum prompt length is X but the request contains Y"
+  // =============================================================================
+
+  describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
+    it("grok-3-fast - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("xai", "grok-3-fast");
+      const result = await testContextOverflow(model, process.env.XAI_API_KEY!);
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(/maximum prompt length is \d+/i);
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Groq
+  // Expected pattern: "reduce the length of the messages"
+  // =============================================================================
+
+  describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
+    it("llama-3.3-70b-versatile - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("groq", "llama-3.3-70b-versatile");
+      const result = await testContextOverflow(
+        model,
+        process.env.GROQ_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(/reduce the length of the messages/i);
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Cerebras
+  // Expected: 400/413 status code with no body
+  // =============================================================================
+
+  describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
+    it("qwen-3-235b - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("cerebras", "qwen-3-235b-a22b-instruct-2507");
+      const result = await testContextOverflow(
+        model,
+        process.env.CEREBRAS_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      // Cerebras returns status code with no body (400, 413, or 429 for token rate limit)
+      expect(result.errorMessage).toMatch(/4(00|13|29).*\(no body\)/i);
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Hugging Face
+  // Uses OpenAI-compatible Inference Router
+  // =============================================================================
+
+  describe.skipIf(!process.env.HF_TOKEN)("Hugging Face", () => {
+    it("Kimi-K2.5 - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("huggingface", "moonshotai/Kimi-K2.5");
+      const result = await testContextOverflow(model, process.env.HF_TOKEN!);
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // z.ai
+  // Special case: Sometimes accepts overflow silently, sometimes rate limits
+  // Detection via usage.input > contextWindow when successful
+  // =============================================================================
+
+  describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
+    it("glm-4.5-flash - should detect overflow via isContextOverflow (silent overflow or rate limit)", async () => {
+      const model = getModel("zai", "glm-4.5-flash");
+      const result = await testContextOverflow(model, process.env.ZAI_API_KEY!);
+      logResult(result);
+
+      // z.ai behavior is inconsistent:
+      // - Sometimes accepts overflow and returns successfully with usage.input > contextWindow
+      // - Sometimes returns rate limit error
+      // Either way, isContextOverflow should detect it (via usage check or we skip if rate limited)
+      if (result.stopReason === "stop") {
+        if (result.hasUsageData && result.usage.input > model.contextWindow) {
+          expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+            true,
+          );
+        } else {
+          console.log(
+            "  z.ai returned stop without overflow usage data, skipping overflow detection",
+          );
+        }
+      } else {
+        // Rate limited or other error - just log and pass
+        console.log(
+          "  z.ai returned error (possibly rate limited), skipping overflow detection",
+        );
+      }
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Mistral
+  // =============================================================================
+
+  describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral", () => {
+    it("devstral-medium-latest - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("mistral", "devstral-medium-latest");
+      const result = await testContextOverflow(
+        model,
+        process.env.MISTRAL_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /too large for model with \d+ maximum context length/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // MiniMax
+  // Expected pattern: TBD - need to test actual error message
+  // =============================================================================
+
+  describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax", () => {
+    it("MiniMax-M2.1 - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("minimax", "MiniMax-M2.1");
+      const result = await testContextOverflow(
+        model,
+        process.env.MINIMAX_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Kimi For Coding
+  // =============================================================================
+
+  describe.skipIf(!process.env.KIMI_API_KEY)("Kimi For Coding", () => {
+    it("kimi-k2-thinking - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("kimi-coding", "kimi-k2-thinking");
+      const result = await testContextOverflow(
+        model,
+        process.env.KIMI_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Vercel AI Gateway - Unified API for multiple providers
+  // =============================================================================
+
+  describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway", () => {
+    it("google/gemini-2.5-flash via AI Gateway - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
+      const result = await testContextOverflow(
+        model,
+        process.env.AI_GATEWAY_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // OpenRouter - Multiple backend providers
+  // Expected pattern: "maximum context length is X tokens"
+  // =============================================================================
+
+  describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
+    // Anthropic backend
+    it("anthropic/claude-sonnet-4 via OpenRouter - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("openrouter", "anthropic/claude-sonnet-4");
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENROUTER_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /maximum context length is \d+ tokens/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+
+    // DeepSeek backend
+    it("deepseek/deepseek-v3.2 via OpenRouter - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("openrouter", "deepseek/deepseek-v3.2");
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENROUTER_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /maximum context length is \d+ tokens/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+
+    // Mistral backend
+    it("mistralai/mistral-large-2512 via OpenRouter - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("openrouter", "mistralai/mistral-large-2512");
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENROUTER_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /maximum context length is \d+ tokens/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+
+    // Google backend
+    it("google/gemini-2.5-flash via OpenRouter - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("openrouter", "google/gemini-2.5-flash");
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENROUTER_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /maximum context length is \d+ tokens/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+
+    // Meta/Llama backend
+    it("meta-llama/llama-4-maverick via OpenRouter - should detect overflow via isContextOverflow", async () => {
+      const model = getModel("openrouter", "meta-llama/llama-4-maverick");
+      const result = await testContextOverflow(
+        model,
+        process.env.OPENROUTER_API_KEY!,
+      );
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(result.errorMessage).toMatch(
+        /maximum context length is \d+ tokens/i,
+      );
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // Ollama (local)
+  // =============================================================================
+
+  // Check if ollama is installed and local LLM tests are enabled
+  let ollamaInstalled = false;
+  if (!process.env.PI_NO_LOCAL_LLM) {
+    try {
+      execSync("which ollama", { stdio: "ignore" });
+      ollamaInstalled = true;
+    } catch {
+      ollamaInstalled = false;
+    }
+  }
+
+  describe.skipIf(!ollamaInstalled)("Ollama (local)", () => {
+    let ollamaProcess: ChildProcess | null = null;
+    let model: Model<"openai-completions">;
+
+    beforeAll(async () => {
+      // Check if model is available, if not pull it
+      try {
+        execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
+      } catch {
+        console.log("Pulling gpt-oss:20b model for Ollama overflow tests...");
+        try {
+          execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
+        } catch (_e) {
+          console.warn(
+            "Failed to pull gpt-oss:20b model, tests will be skipped",
+          );
+          return;
+        }
+      }
+
+      // Start ollama server
+      ollamaProcess = spawn("ollama", ["serve"], {
+        detached: false,
+        stdio: "ignore",
+      });
+
+      // Wait for server to be ready
+      await new Promise<void>((resolve) => {
+        const checkServer = async () => {
+          try {
+            const response = await fetch("http://localhost:11434/api/tags");
+            if (response.ok) {
+              resolve();
+            } else {
+              setTimeout(checkServer, 500);
+            }
+          } catch {
+            setTimeout(checkServer, 500);
+          }
+        };
+        setTimeout(checkServer, 1000);
+      });
+
+      model = {
+        id: "gpt-oss:20b",
+        api: "openai-completions",
+        provider: "ollama",
+        baseUrl: "http://localhost:11434/v1",
+        reasoning: true,
+        input: ["text"],
+        contextWindow: 128000,
+        maxTokens: 16000,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        name: "Ollama GPT-OSS 20B",
+      };
+    }, 60000);
+
+    afterAll(() => {
+      if (ollamaProcess) {
+        ollamaProcess.kill("SIGTERM");
+        ollamaProcess = null;
+      }
+    });
+
+    it("gpt-oss:20b - should detect overflow via isContextOverflow (ollama silently truncates)", async () => {
+      const result = await testContextOverflow(model, "ollama");
+      logResult(result);
+
+      // Ollama silently truncates input instead of erroring
+      // It returns stopReason "stop" with truncated usage
+      // We cannot detect overflow via error message, only via usage comparison
+      if (result.stopReason === "stop" && result.hasUsageData) {
+        // Ollama truncated - check if reported usage is less than what we sent
+        // This is a "silent overflow" - we can detect it if we know expected input size
+        console.log(
+          "  Ollama silently truncated input to",
+          result.usage.input,
+          "tokens",
+        );
+        // For now, we accept this behavior - Ollama doesn't give us a way to detect overflow
+      } else if (result.stopReason === "error") {
+        expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+          true,
+        );
+      }
+    }, 300000); // 5 min timeout for local model
+  });
+
+  // =============================================================================
+  // LM Studio (local) - Skip if not running or local LLM tests disabled
+  // =============================================================================
+
+  let lmStudioRunning = false;
+  if (!process.env.PI_NO_LOCAL_LLM) {
+    try {
+      execSync(
+        "curl -s --max-time 1 http://localhost:1234/v1/models > /dev/null",
+        { stdio: "ignore" },
+      );
+      lmStudioRunning = true;
+    } catch {
+      lmStudioRunning = false;
+    }
+  }
+
+  describe.skipIf(!lmStudioRunning)("LM Studio (local)", () => {
+    it("should detect overflow via isContextOverflow", async () => {
+      const model: Model<"openai-completions"> = {
+        id: "local-model",
+        api: "openai-completions",
+        provider: "lm-studio",
+        baseUrl: "http://localhost:1234/v1",
+        reasoning: false,
+        input: ["text"],
+        contextWindow: 8192,
+        maxTokens: 2048,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        name: "LM Studio Local Model",
+      };
+
+      const result = await testContextOverflow(model, "lm-studio");
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+
+  // =============================================================================
+  // llama.cpp server (local) - Skip if not running
+  // =============================================================================
+
+  let llamaCppRunning = false;
+  try {
+    execSync("curl -s --max-time 1 http://localhost:8081/health > /dev/null", {
+      stdio: "ignore",
+    });
+    llamaCppRunning = true;
+  } catch {
+    llamaCppRunning = false;
+  }
+
+  describe.skipIf(!llamaCppRunning)("llama.cpp (local)", () => {
+    it("should detect overflow via isContextOverflow", async () => {
+      // Using small context (4096) to match server --ctx-size setting
+      const model: Model<"openai-completions"> = {
+        id: "local-model",
+        api: "openai-completions",
+        provider: "llama.cpp",
+        baseUrl: "http://localhost:8081/v1",
+        reasoning: false,
+        input: ["text"],
+        contextWindow: 4096,
+        maxTokens: 2048,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        name: "llama.cpp Local Model",
+      };
+
+      const result = await testContextOverflow(model, "llama.cpp");
+      logResult(result);
+
+      expect(result.stopReason).toBe("error");
+      expect(isContextOverflow(result.response, model.contextWindow)).toBe(
+        true,
+      );
+    }, 120000);
+  });
+});
--- a/packages/ai/test/cross-provider-handoff.test.ts
+++ b/packages/ai/test/cross-provider-handoff.test.ts
@ -0,0 +1,568 @@
+/**
+ * Cross-Provider Handoff Test
+ *
+ * Tests that contexts generated by one provider/model can be consumed by another.
+ * This catches issues like:
+ * - Tool call ID format incompatibilities (e.g., OpenAI Codex pipe characters)
+ * - Thinking block transformation issues
+ * - Message format incompatibilities
+ *
+ * Strategy:
+ * 1. beforeAll: For each provider/model, generate a "small context" (if not cached):
+ *    - User message asking to use a tool
+ *    - Assistant response with thinking + tool call
+ *    - Tool result
+ *    - Final assistant response
+ *
+ * 2. Test: For each target provider/model:
+ *    - Concatenate ALL other contexts into one
+ *    - Ask the model to "say hi"
+ *    - If it fails, there's a compatibility issue
+ *
+ * Fixtures are generated fresh on each run.
+ */
+
+import { Type } from "@sinclair/typebox";
+import { writeFileSync } from "fs";
+import { beforeAll, describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { completeSimple, getEnvApiKey } from "../src/stream.js";
+import type {
+  Api,
+  AssistantMessage,
+  Message,
+  Model,
+  Tool,
+  ToolResultMessage,
+} from "../src/types.js";
+import { hasAzureOpenAICredentials } from "./azure-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Simple tool for testing
+const testToolSchema = Type.Object({
+  value: Type.Number({ description: "A number to double" }),
+});
+
+const testTool: Tool<typeof testToolSchema> = {
+  name: "double_number",
+  description: "Doubles a number and returns the result",
+  parameters: testToolSchema,
+};
+
+// Provider/model pairs to test
+interface ProviderModelPair {
+  provider: string;
+  model: string;
+  label: string;
+  apiOverride?: Api;
+}
+
+const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [
+  // Anthropic
+  {
+    provider: "anthropic",
+    model: "claude-sonnet-4-5",
+    label: "anthropic-claude-sonnet-4-5",
+  },
+  // Google
+  {
+    provider: "google",
+    model: "gemini-3-flash-preview",
+    label: "google-gemini-3-flash-preview",
+  },
+  // OpenAI
+  {
+    provider: "openai",
+    model: "gpt-4o-mini",
+    label: "openai-completions-gpt-4o-mini",
+    apiOverride: "openai-completions",
+  },
+  {
+    provider: "openai",
+    model: "gpt-5-mini",
+    label: "openai-responses-gpt-5-mini",
+  },
+  {
+    provider: "azure-openai-responses",
+    model: "gpt-4o-mini",
+    label: "azure-openai-responses-gpt-4o-mini",
+  },
+  // OpenAI Codex
+  {
+    provider: "openai-codex",
+    model: "gpt-5.2-codex",
+    label: "openai-codex-gpt-5.2-codex",
+  },
+  // Google Antigravity
+  {
+    provider: "google-antigravity",
+    model: "gemini-3-flash",
+    label: "antigravity-gemini-3-flash",
+  },
+  {
+    provider: "google-antigravity",
+    model: "claude-sonnet-4-5",
+    label: "antigravity-claude-sonnet-4-5",
+  },
+  // GitHub Copilot
+  {
+    provider: "github-copilot",
+    model: "claude-sonnet-4.5",
+    label: "copilot-claude-sonnet-4.5",
+  },
+  {
+    provider: "github-copilot",
+    model: "gpt-5.1-codex",
+    label: "copilot-gpt-5.1-codex",
+  },
+  {
+    provider: "github-copilot",
+    model: "gemini-3-flash-preview",
+    label: "copilot-gemini-3-flash-preview",
+  },
+  {
+    provider: "github-copilot",
+    model: "grok-code-fast-1",
+    label: "copilot-grok-code-fast-1",
+  },
+  // Amazon Bedrock
+  {
+    provider: "amazon-bedrock",
+    model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+    label: "bedrock-claude-sonnet-4-5",
+  },
+  // xAI
+  { provider: "xai", model: "grok-code-fast-1", label: "xai-grok-code-fast-1" },
+  // Cerebras
+  { provider: "cerebras", model: "zai-glm-4.7", label: "cerebras-zai-glm-4.7" },
+  // Groq
+  {
+    provider: "groq",
+    model: "openai/gpt-oss-120b",
+    label: "groq-gpt-oss-120b",
+  },
+  // Hugging Face
+  {
+    provider: "huggingface",
+    model: "moonshotai/Kimi-K2.5",
+    label: "huggingface-kimi-k2.5",
+  },
+  // Kimi For Coding
+  {
+    provider: "kimi-coding",
+    model: "kimi-k2-thinking",
+    label: "kimi-coding-k2-thinking",
+  },
+  // Mistral
+  {
+    provider: "mistral",
+    model: "devstral-medium-latest",
+    label: "mistral-devstral-medium",
+  },
+  // MiniMax
+  { provider: "minimax", model: "MiniMax-M2.1", label: "minimax-m2.1" },
+  // OpenCode Zen
+  { provider: "opencode", model: "big-pickle", label: "zen-big-pickle" },
+  {
+    provider: "opencode",
+    model: "claude-sonnet-4-5",
+    label: "zen-claude-sonnet-4-5",
+  },
+  {
+    provider: "opencode",
+    model: "gemini-3-flash",
+    label: "zen-gemini-3-flash",
+  },
+  { provider: "opencode", model: "glm-4.7-free", label: "zen-glm-4.7-free" },
+  { provider: "opencode", model: "gpt-5.2-codex", label: "zen-gpt-5.2-codex" },
+  {
+    provider: "opencode",
+    model: "minimax-m2.1-free",
+    label: "zen-minimax-m2.1-free",
+  },
+  // OpenCode Go
+  { provider: "opencode-go", model: "kimi-k2.5", label: "go-kimi-k2.5" },
+  { provider: "opencode-go", model: "minimax-m2.5", label: "go-minimax-m2.5" },
+];
+
+// Cached context structure
+interface CachedContext {
+  label: string;
+  provider: string;
+  model: string;
+  api: Api;
+  messages: Message[];
+  generatedAt: string;
+}
+
+/**
+ * Get API key for provider - checks OAuth storage first, then env vars
+ */
+async function getApiKey(provider: string): Promise<string | undefined> {
+  const oauthKey = await resolveApiKey(provider);
+  if (oauthKey) return oauthKey;
+  return getEnvApiKey(provider);
+}
+
+/**
+ * Synchronous check for API key availability (env vars only, for skipIf)
+ */
+function hasApiKey(provider: string): boolean {
+  if (provider === "azure-openai-responses") {
+    return hasAzureOpenAICredentials();
+  }
+  return !!getEnvApiKey(provider);
+}
+
+/**
+ * Check if any provider has API keys available (for skipIf at describe level)
+ */
+function hasAnyApiKey(): boolean {
+  return PROVIDER_MODEL_PAIRS.some((pair) => hasApiKey(pair.provider));
+}
+
+function dumpFailurePayload(params: {
+  label: string;
+  error: string;
+  payload?: unknown;
+  messages: Message[];
+}): void {
+  const filename = `/tmp/pi-handoff-${params.label}-${Date.now()}.json`;
+  const body = {
+    label: params.label,
+    error: params.error,
+    payload: params.payload,
+    messages: params.messages,
+  };
+  writeFileSync(filename, JSON.stringify(body, null, 2));
+  console.log(`Wrote failure payload to ${filename}`);
+}
+
+/**
+ * Generate a context from a provider/model pair.
+ * Makes a real API call to get authentic tool call IDs and thinking blocks.
+ */
+async function generateContext(
+  pair: ProviderModelPair,
+  apiKey: string,
+): Promise<{ messages: Message[]; api: Api } | null> {
+  const baseModel = (
+    getModel as (p: string, m: string) => Model<Api> | undefined
+  )(pair.provider, pair.model);
+  if (!baseModel) {
+    console.log(`  Model not found: ${pair.provider}/${pair.model}`);
+    return null;
+  }
+
+  const model: Model<Api> = pair.apiOverride
+    ? { ...baseModel, api: pair.apiOverride }
+    : baseModel;
+
+  const userMessage: Message = {
+    role: "user",
+    content: "Please double the number 21 using the double_number tool.",
+    timestamp: Date.now(),
+  };
+
+  const supportsReasoning = model.reasoning === true;
+  let lastPayload: unknown;
+  let assistantResponse: AssistantMessage;
+  try {
+    assistantResponse = await completeSimple(
+      model,
+      {
+        systemPrompt:
+          "You are a helpful assistant. Use the provided tool to complete the task.",
+        messages: [userMessage],
+        tools: [testTool],
+      },
+      {
+        apiKey,
+        reasoning: supportsReasoning ? "high" : undefined,
+        onPayload: (payload) => {
+          lastPayload = payload;
+        },
+      },
+    );
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : String(error);
+    console.log(`  Initial request failed: ${msg}`);
+    dumpFailurePayload({
+      label: `${pair.label}-initial`,
+      error: msg,
+      payload: lastPayload,
+      messages: [userMessage],
+    });
+    return null;
+  }
+
+  if (assistantResponse.stopReason === "error") {
+    console.log(`  Initial request error: ${assistantResponse.errorMessage}`);
+    dumpFailurePayload({
+      label: `${pair.label}-initial`,
+      error: assistantResponse.errorMessage || "Unknown error",
+      payload: lastPayload,
+      messages: [userMessage],
+    });
+    return null;
+  }
+
+  const toolCall = assistantResponse.content.find((c) => c.type === "toolCall");
+  if (!toolCall || toolCall.type !== "toolCall") {
+    console.log(
+      `  No tool call in response (stopReason: ${assistantResponse.stopReason})`,
+    );
+    return {
+      messages: [userMessage, assistantResponse],
+      api: model.api,
+    };
+  }
+
+  console.log(`  Tool call ID: ${toolCall.id}`);
+
+  const toolResult: ToolResultMessage = {
+    role: "toolResult",
+    toolCallId: toolCall.id,
+    toolName: toolCall.name,
+    content: [{ type: "text", text: "42" }],
+    isError: false,
+    timestamp: Date.now(),
+  };
+
+  let finalResponse: AssistantMessage;
+  const messagesForFinal = [userMessage, assistantResponse, toolResult];
+  try {
+    finalResponse = await completeSimple(
+      model,
+      {
+        systemPrompt: "You are a helpful assistant.",
+        messages: messagesForFinal,
+        tools: [testTool],
+      },
+      {
+        apiKey,
+        reasoning: supportsReasoning ? "high" : undefined,
+        onPayload: (payload) => {
+          lastPayload = payload;
+        },
+      },
+    );
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : String(error);
+    console.log(`  Final request failed: ${msg}`);
+    dumpFailurePayload({
+      label: `${pair.label}-final`,
+      error: msg,
+      payload: lastPayload,
+      messages: messagesForFinal,
+    });
+    return null;
+  }
+
+  if (finalResponse.stopReason === "error") {
+    console.log(`  Final request error: ${finalResponse.errorMessage}`);
+    dumpFailurePayload({
+      label: `${pair.label}-final`,
+      error: finalResponse.errorMessage || "Unknown error",
+      payload: lastPayload,
+      messages: messagesForFinal,
+    });
+    return null;
+  }
+
+  return {
+    messages: [userMessage, assistantResponse, toolResult, finalResponse],
+    api: model.api,
+  };
+}
+
+describe.skipIf(!hasAnyApiKey())("Cross-Provider Handoff", () => {
+  let contexts: Record<string, CachedContext>;
+  let availablePairs: ProviderModelPair[];
+
+  beforeAll(async () => {
+    contexts = {};
+    availablePairs = [];
+
+    console.log("\n=== Generating Fixtures ===\n");
+
+    for (const pair of PROVIDER_MODEL_PAIRS) {
+      const apiKey = await getApiKey(pair.provider);
+      if (!apiKey) {
+        console.log(`[${pair.label}] Skipping - no auth for ${pair.provider}`);
+        continue;
+      }
+
+      console.log(`[${pair.label}] Generating fixture...`);
+      const result = await generateContext(pair, apiKey);
+
+      if (!result || result.messages.length < 4) {
+        console.log(`[${pair.label}] Failed to generate fixture, skipping`);
+        continue;
+      }
+
+      contexts[pair.label] = {
+        label: pair.label,
+        provider: pair.provider,
+        model: pair.model,
+        api: result.api,
+        messages: result.messages,
+        generatedAt: new Date().toISOString(),
+      };
+      availablePairs.push(pair);
+      console.log(
+        `[${pair.label}] Generated ${result.messages.length} messages`,
+      );
+    }
+
+    console.log(
+      `\n=== ${availablePairs.length}/${PROVIDER_MODEL_PAIRS.length} contexts available ===\n`,
+    );
+  }, 300000);
+
+  it.skipIf(!hasAnyApiKey())(
+    "should have at least 2 fixtures to test handoffs",
+    () => {
+      expect(Object.keys(contexts).length).toBeGreaterThanOrEqual(2);
+    },
+  );
+
+  it.skipIf(!hasAnyApiKey())(
+    "should handle cross-provider handoffs for each target",
+    async () => {
+      const contextLabels = Object.keys(contexts);
+
+      if (contextLabels.length < 2) {
+        console.log("Not enough fixtures for handoff test, skipping");
+        return;
+      }
+
+      console.log("\n=== Testing Cross-Provider Handoffs ===\n");
+
+      const results: { target: string; success: boolean; error?: string }[] =
+        [];
+
+      for (const targetPair of availablePairs) {
+        const apiKey = await getApiKey(targetPair.provider);
+        if (!apiKey) {
+          console.log(`[Target: ${targetPair.label}] Skipping - no auth`);
+          continue;
+        }
+
+        // Collect messages from ALL OTHER contexts
+        const otherMessages: Message[] = [];
+        for (const [label, ctx] of Object.entries(contexts)) {
+          if (label === targetPair.label) continue;
+          otherMessages.push(...ctx.messages);
+        }
+
+        if (otherMessages.length === 0) {
+          console.log(
+            `[Target: ${targetPair.label}] Skipping - no other contexts`,
+          );
+          continue;
+        }
+
+        const allMessages: Message[] = [
+          ...otherMessages,
+          {
+            role: "user",
+            content:
+              "Great, thanks for all that help! Now just say 'Hello, handoff successful!' to confirm you received everything.",
+            timestamp: Date.now(),
+          },
+        ];
+
+        const baseModel = (
+          getModel as (p: string, m: string) => Model<Api> | undefined
+        )(targetPair.provider, targetPair.model);
+        if (!baseModel) {
+          console.log(`[Target: ${targetPair.label}] Model not found`);
+          continue;
+        }
+
+        const model: Model<Api> = targetPair.apiOverride
+          ? { ...baseModel, api: targetPair.apiOverride }
+          : baseModel;
+        const supportsReasoning = model.reasoning === true;
+
+        console.log(
+          `[Target: ${targetPair.label}] Testing with ${otherMessages.length} messages from other providers...`,
+        );
+
+        let lastPayload: unknown;
+        try {
+          const response = await completeSimple(
+            model,
+            {
+              systemPrompt: "You are a helpful assistant.",
+              messages: allMessages,
+              tools: [testTool],
+            },
+            {
+              apiKey,
+              reasoning: supportsReasoning ? "high" : undefined,
+              onPayload: (payload) => {
+                lastPayload = payload;
+              },
+            },
+          );
+
+          if (response.stopReason === "error") {
+            console.log(
+              `[Target: ${targetPair.label}] FAILED: ${response.errorMessage}`,
+            );
+            dumpFailurePayload({
+              label: targetPair.label,
+              error: response.errorMessage || "Unknown error",
+              payload: lastPayload,
+              messages: allMessages,
+            });
+            results.push({
+              target: targetPair.label,
+              success: false,
+              error: response.errorMessage,
+            });
+          } else {
+            const text = response.content
+              .filter((c) => c.type === "text")
+              .map((c) => c.text)
+              .join(" ");
+            const preview = text.slice(0, 100).replace(/\n/g, " ");
+            console.log(`[Target: ${targetPair.label}] SUCCESS: ${preview}...`);
+            results.push({ target: targetPair.label, success: true });
+          }
+        } catch (error) {
+          const msg = error instanceof Error ? error.message : String(error);
+          console.log(`[Target: ${targetPair.label}] EXCEPTION: ${msg}`);
+          dumpFailurePayload({
+            label: targetPair.label,
+            error: msg,
+            payload: lastPayload,
+            messages: allMessages,
+          });
+          results.push({
+            target: targetPair.label,
+            success: false,
+            error: msg,
+          });
+        }
+      }
+
+      console.log("\n=== Results Summary ===\n");
+      const successes = results.filter((r) => r.success);
+      const failures = results.filter((r) => !r.success);
+
+      console.log(`Passed: ${successes.length}/${results.length}`);
+      if (failures.length > 0) {
+        console.log("\nFailures:");
+        for (const f of failures) {
+          console.log(`  - ${f.target}: ${f.error}`);
+        }
+      }
+
+      expect(failures.length).toBe(0);
+    },
+    600000,
+  );
+});
--- a/packages/ai/test/data/red-circle.png
+++ b/packages/ai/test/data/red-circle.png
--- a/packages/ai/test/empty.test.ts
+++ b/packages/ai/test/empty.test.ts
--- a/packages/ai/test/github-copilot-anthropic.test.ts
+++ b/packages/ai/test/github-copilot-anthropic.test.ts
@ -0,0 +1,115 @@
+import { describe, expect, it, vi } from "vitest";
+import { getModel } from "../src/models.js";
+import type { Context } from "../src/types.js";
+
+const mockState = vi.hoisted(() => ({
+  constructorOpts: undefined as Record<string, unknown> | undefined,
+  streamParams: undefined as Record<string, unknown> | undefined,
+}));
+
+vi.mock("@anthropic-ai/sdk", () => {
+  const fakeStream = {
+    async *[Symbol.asyncIterator]() {
+      yield {
+        type: "message_start",
+        message: {
+          usage: { input_tokens: 10, output_tokens: 0 },
+        },
+      };
+      yield {
+        type: "message_delta",
+        delta: { stop_reason: "end_turn" },
+        usage: { output_tokens: 5 },
+      };
+    },
+    finalMessage: async () => ({
+      usage: {
+        input_tokens: 10,
+        output_tokens: 5,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+      },
+    }),
+  };
+
+  class FakeAnthropic {
+    constructor(opts: Record<string, unknown>) {
+      mockState.constructorOpts = opts;
+    }
+    messages = {
+      stream: (params: Record<string, unknown>) => {
+        mockState.streamParams = params;
+        return fakeStream;
+      },
+    };
+  }
+
+  return { default: FakeAnthropic };
+});
+
+describe("Copilot Claude via Anthropic Messages", () => {
+  const context: Context = {
+    systemPrompt: "You are a helpful assistant.",
+    messages: [{ role: "user", content: "Hello", timestamp: Date.now() }],
+  };
+
+  it("uses Bearer auth, Copilot headers, and valid Anthropic Messages payload", async () => {
+    const model = getModel("github-copilot", "claude-sonnet-4");
+    expect(model.api).toBe("anthropic-messages");
+
+    const { streamAnthropic } = await import("../src/providers/anthropic.js");
+    const s = streamAnthropic(model, context, {
+      apiKey: "tid_copilot_session_test_token",
+    });
+    for await (const event of s) {
+      if (event.type === "error") break;
+    }
+
+    const opts = mockState.constructorOpts!;
+    expect(opts).toBeDefined();
+
+    // Auth: apiKey null, authToken for Bearer
+    expect(opts.apiKey).toBeNull();
+    expect(opts.authToken).toBe("tid_copilot_session_test_token");
+    const headers = opts.defaultHeaders as Record<string, string>;
+
+    // Copilot static headers from model.headers
+    expect(headers["User-Agent"]).toContain("GitHubCopilotChat");
+    expect(headers["Copilot-Integration-Id"]).toBe("vscode-chat");
+
+    // Dynamic headers
+    expect(headers["X-Initiator"]).toBe("user");
+    expect(headers["Openai-Intent"]).toBe("conversation-edits");
+
+    // No fine-grained-tool-streaming (Copilot doesn't support it)
+    const beta = headers["anthropic-beta"] ?? "";
+    expect(beta).not.toContain("fine-grained-tool-streaming");
+
+    // Payload is valid Anthropic Messages format
+    const params = mockState.streamParams!;
+    expect(params.model).toBe("claude-sonnet-4");
+    expect(params.stream).toBe(true);
+    expect(params.max_tokens).toBeGreaterThan(0);
+    expect(Array.isArray(params.messages)).toBe(true);
+  });
+
+  it("includes interleaved-thinking beta when reasoning is enabled", async () => {
+    const model = getModel("github-copilot", "claude-sonnet-4");
+    const { streamAnthropic } = await import("../src/providers/anthropic.js");
+    const s = streamAnthropic(model, context, {
+      apiKey: "tid_copilot_session_test_token",
+      interleavedThinking: true,
+    });
+    for await (const event of s) {
+      if (event.type === "error") break;
+    }
+
+    const headers = mockState.constructorOpts!.defaultHeaders as Record<
+      string,
+      string
+    >;
+    expect(headers["anthropic-beta"]).toContain(
+      "interleaved-thinking-2025-05-14",
+    );
+  });
+});
--- a/packages/ai/test/google-gemini-cli-claude-thinking-header.test.ts
+++ b/packages/ai/test/google-gemini-cli-claude-thinking-header.test.ts
@ -0,0 +1,109 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
+import type { Context, Model } from "../src/types.js";
+
+const originalFetch = global.fetch;
+const apiKey = JSON.stringify({ token: "token", projectId: "project" });
+
+const createSseResponse = () => {
+  const sse = `${[
+    `data: ${JSON.stringify({
+      response: {
+        candidates: [
+          {
+            content: { role: "model", parts: [{ text: "Hello" }] },
+            finishReason: "STOP",
+          },
+        ],
+      },
+    })}`,
+  ].join("\n\n")}\n\n`;
+
+  const encoder = new TextEncoder();
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      controller.enqueue(encoder.encode(sse));
+      controller.close();
+    },
+  });
+
+  return new Response(stream, {
+    status: 200,
+    headers: { "content-type": "text/event-stream" },
+  });
+};
+
+afterEach(() => {
+  global.fetch = originalFetch;
+  vi.restoreAllMocks();
+});
+
+describe("google-gemini-cli Claude thinking header", () => {
+  const context: Context = {
+    messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+  };
+
+  it("adds anthropic-beta for Claude thinking models", async () => {
+    const fetchMock = vi.fn(
+      async (_input: string | URL, init?: RequestInit) => {
+        const headers = new Headers(init?.headers);
+        expect(headers.get("anthropic-beta")).toBe(
+          "interleaved-thinking-2025-05-14",
+        );
+        return createSseResponse();
+      },
+    );
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"google-gemini-cli"> = {
+      id: "claude-opus-4-5-thinking",
+      name: "Claude Opus 4.5 Thinking",
+      api: "google-gemini-cli",
+      provider: "google-antigravity",
+      baseUrl: "https://cloudcode-pa.googleapis.com",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+
+    const stream = streamGoogleGeminiCli(model, context, { apiKey });
+    for await (const _event of stream) {
+      // exhaust stream
+    }
+    await stream.result();
+  });
+
+  it("does not add anthropic-beta for Gemini models", async () => {
+    const fetchMock = vi.fn(
+      async (_input: string | URL, init?: RequestInit) => {
+        const headers = new Headers(init?.headers);
+        expect(headers.has("anthropic-beta")).toBe(false);
+        return createSseResponse();
+      },
+    );
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"google-gemini-cli"> = {
+      id: "gemini-2.5-flash",
+      name: "Gemini 2.5 Flash",
+      api: "google-gemini-cli",
+      provider: "google-gemini-cli",
+      baseUrl: "https://cloudcode-pa.googleapis.com",
+      reasoning: false,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+
+    const stream = streamGoogleGeminiCli(model, context, { apiKey });
+    for await (const _event of stream) {
+      // exhaust stream
+    }
+    await stream.result();
+  });
+});
--- a/packages/ai/test/google-gemini-cli-empty-stream.test.ts
+++ b/packages/ai/test/google-gemini-cli-empty-stream.test.ts
@ -0,0 +1,108 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
+import type { Context, Model } from "../src/types.js";
+
+const originalFetch = global.fetch;
+
+afterEach(() => {
+  global.fetch = originalFetch;
+  vi.restoreAllMocks();
+});
+
+describe("google-gemini-cli empty stream retry", () => {
+  it("retries empty SSE responses without duplicate start", async () => {
+    const emptyStream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.close();
+      },
+    });
+
+    const sse = `${[
+      `data: ${JSON.stringify({
+        response: {
+          candidates: [
+            {
+              content: { role: "model", parts: [{ text: "Hello" }] },
+              finishReason: "STOP",
+            },
+          ],
+          usageMetadata: {
+            promptTokenCount: 1,
+            candidatesTokenCount: 1,
+            totalTokenCount: 2,
+          },
+        },
+      })}`,
+    ].join("\n\n")}\n\n`;
+
+    const encoder = new TextEncoder();
+    const dataStream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(encoder.encode(sse));
+        controller.close();
+      },
+    });
+
+    let callCount = 0;
+    const fetchMock = vi.fn(async () => {
+      callCount += 1;
+      if (callCount === 1) {
+        return new Response(emptyStream, {
+          status: 200,
+          headers: { "content-type": "text/event-stream" },
+        });
+      }
+      return new Response(dataStream, {
+        status: 200,
+        headers: { "content-type": "text/event-stream" },
+      });
+    });
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"google-gemini-cli"> = {
+      id: "gemini-2.5-flash",
+      name: "Gemini 2.5 Flash",
+      api: "google-gemini-cli",
+      provider: "google-gemini-cli",
+      baseUrl: "https://cloudcode-pa.googleapis.com",
+      reasoning: false,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+
+    const context: Context = {
+      messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+    };
+
+    const stream = streamGoogleGeminiCli(model, context, {
+      apiKey: JSON.stringify({ token: "token", projectId: "project" }),
+    });
+
+    let startCount = 0;
+    let doneCount = 0;
+    let text = "";
+
+    for await (const event of stream) {
+      if (event.type === "start") {
+        startCount += 1;
+      }
+      if (event.type === "done") {
+        doneCount += 1;
+      }
+      if (event.type === "text_delta") {
+        text += event.delta;
+      }
+    }
+
+    const result = await stream.result();
+
+    expect(text).toBe("Hello");
+    expect(result.stopReason).toBe("stop");
+    expect(startCount).toBe(1);
+    expect(doneCount).toBe(1);
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+});
--- a/packages/ai/test/google-gemini-cli-retry-delay.test.ts
+++ b/packages/ai/test/google-gemini-cli-retry-delay.test.ts
@ -0,0 +1,57 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
+
+describe("extractRetryDelay header parsing", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("prefers Retry-After seconds header", () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
+
+    const response = new Response("", { headers: { "Retry-After": "5" } });
+    const delay = extractRetryDelay("Please retry in 1s", response);
+
+    expect(delay).toBe(6000);
+  });
+
+  it("parses Retry-After HTTP date header", () => {
+    vi.useFakeTimers();
+    const now = new Date("2025-01-01T00:00:00Z");
+    vi.setSystemTime(now);
+
+    const retryAt = new Date(now.getTime() + 12000).toUTCString();
+    const response = new Response("", { headers: { "Retry-After": retryAt } });
+    const delay = extractRetryDelay("", response);
+
+    expect(delay).toBe(13000);
+  });
+
+  it("parses x-ratelimit-reset header", () => {
+    vi.useFakeTimers();
+    const now = new Date("2025-01-01T00:00:00Z");
+    vi.setSystemTime(now);
+
+    const resetAtMs = now.getTime() + 20000;
+    const resetSeconds = Math.floor(resetAtMs / 1000).toString();
+    const response = new Response("", {
+      headers: { "x-ratelimit-reset": resetSeconds },
+    });
+    const delay = extractRetryDelay("", response);
+
+    expect(delay).toBe(21000);
+  });
+
+  it("parses x-ratelimit-reset-after header", () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
+
+    const response = new Response("", {
+      headers: { "x-ratelimit-reset-after": "30" },
+    });
+    const delay = extractRetryDelay("", response);
+
+    expect(delay).toBe(31000);
+  });
+});
--- a/packages/ai/test/google-shared-gemini3-unsigned-tool-call.test.ts
+++ b/packages/ai/test/google-shared-gemini3-unsigned-tool-call.test.ts
@ -0,0 +1,195 @@
+import { describe, expect, it } from "vitest";
+import { convertMessages } from "../src/providers/google-shared.js";
+import type { Context, Model } from "../src/types.js";
+
+const SKIP_THOUGHT_SIGNATURE = "skip_thought_signature_validator";
+
+function makeGemini3Model(
+  id = "gemini-3-pro-preview",
+): Model<"google-generative-ai"> {
+  return {
+    id,
+    name: "Gemini 3 Pro Preview",
+    api: "google-generative-ai",
+    provider: "google",
+    baseUrl: "https://generativelanguage.googleapis.com",
+    reasoning: true,
+    input: ["text"],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: 128000,
+    maxTokens: 8192,
+  };
+}
+
+describe("google-shared convertMessages — Gemini 3 unsigned tool calls", () => {
+  it("uses skip_thought_signature_validator for unsigned tool calls on Gemini 3", () => {
+    const model = makeGemini3Model();
+    const now = Date.now();
+    const context: Context = {
+      messages: [
+        { role: "user", content: "Hi", timestamp: now },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "toolCall",
+              id: "call_1",
+              name: "bash",
+              arguments: { command: "ls -la" },
+              // No thoughtSignature: simulates Claude via Antigravity.
+            },
+          ],
+          api: "google-gemini-cli",
+          provider: "google-antigravity",
+          model: "claude-sonnet-4-20250514",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: now,
+        },
+      ],
+    };
+
+    const contents = convertMessages(model, context);
+
+    const modelTurn = contents.find((c) => c.role === "model");
+    expect(modelTurn).toBeTruthy();
+
+    // Should be a structured functionCall, NOT text fallback
+    const fcPart = modelTurn?.parts?.find((p) => p.functionCall !== undefined);
+    expect(fcPart).toBeTruthy();
+    expect(fcPart?.functionCall?.name).toBe("bash");
+    expect(fcPart?.functionCall?.args).toEqual({ command: "ls -la" });
+    expect(fcPart?.thoughtSignature).toBe(SKIP_THOUGHT_SIGNATURE);
+
+    // No text fallback should exist
+    const textParts =
+      modelTurn?.parts?.filter((p) => p.text !== undefined) ?? [];
+    const historicalText = textParts.filter((p) =>
+      p.text?.includes("Historical context"),
+    );
+    expect(historicalText).toHaveLength(0);
+  });
+
+  it("preserves valid thoughtSignature when present (same provider/model)", () => {
+    const model = makeGemini3Model();
+    const now = Date.now();
+    // Valid base64 signature (16 bytes = 24 chars base64)
+    const validSig = "AAAAAAAAAAAAAAAAAAAAAA==";
+    const context: Context = {
+      messages: [
+        { role: "user", content: "Hi", timestamp: now },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "toolCall",
+              id: "call_1",
+              name: "bash",
+              arguments: { command: "echo hi" },
+              thoughtSignature: validSig,
+            },
+          ],
+          api: "google-generative-ai",
+          provider: "google",
+          model: "gemini-3-pro-preview",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: now,
+        },
+      ],
+    };
+
+    const contents = convertMessages(model, context);
+    const modelTurn = contents.find((c) => c.role === "model");
+    const fcPart = modelTurn?.parts?.find((p) => p.functionCall !== undefined);
+
+    expect(fcPart).toBeTruthy();
+    expect(fcPart?.thoughtSignature).toBe(validSig);
+  });
+
+  it("does not add sentinel for non-Gemini-3 models", () => {
+    const model: Model<"google-generative-ai"> = {
+      id: "gemini-2.5-flash",
+      name: "Gemini 2.5 Flash",
+      api: "google-generative-ai",
+      provider: "google",
+      baseUrl: "https://generativelanguage.googleapis.com",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+    const now = Date.now();
+    const context: Context = {
+      messages: [
+        { role: "user", content: "Hi", timestamp: now },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "toolCall",
+              id: "call_1",
+              name: "bash",
+              arguments: { command: "ls" },
+              // No thoughtSignature
+            },
+          ],
+          api: "google-gemini-cli",
+          provider: "google-antigravity",
+          model: "claude-sonnet-4-20250514",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: now,
+        },
+      ],
+    };
+
+    const contents = convertMessages(model, context);
+    const modelTurn = contents.find((c) => c.role === "model");
+    const fcPart = modelTurn?.parts?.find((p) => p.functionCall !== undefined);
+
+    expect(fcPart).toBeTruthy();
+    // No sentinel, no thoughtSignature at all
+    expect(fcPart?.thoughtSignature).toBeUndefined();
+  });
+});
--- a/packages/ai/test/google-thinking-signature.test.ts
+++ b/packages/ai/test/google-thinking-signature.test.ts
@ -0,0 +1,56 @@
+import { describe, expect, it } from "vitest";
+import {
+  isThinkingPart,
+  retainThoughtSignature,
+} from "../src/providers/google-shared.js";
+
+describe("Google thinking detection (thoughtSignature)", () => {
+  it("treats part.thought === true as thinking", () => {
+    expect(isThinkingPart({ thought: true, thoughtSignature: undefined })).toBe(
+      true,
+    );
+    expect(
+      isThinkingPart({ thought: true, thoughtSignature: "opaque-signature" }),
+    ).toBe(true);
+  });
+
+  it("does not treat thoughtSignature alone as thinking", () => {
+    // Per Google docs, thoughtSignature is for context replay and can appear on any part type.
+    // Only thought === true indicates thinking content.
+    // See: https://ai.google.dev/gemini-api/docs/thought-signatures
+    expect(
+      isThinkingPart({
+        thought: undefined,
+        thoughtSignature: "opaque-signature",
+      }),
+    ).toBe(false);
+    expect(
+      isThinkingPart({ thought: false, thoughtSignature: "opaque-signature" }),
+    ).toBe(false);
+  });
+
+  it("does not treat empty/missing signatures as thinking if thought is not set", () => {
+    expect(
+      isThinkingPart({ thought: undefined, thoughtSignature: undefined }),
+    ).toBe(false);
+    expect(isThinkingPart({ thought: false, thoughtSignature: "" })).toBe(
+      false,
+    );
+  });
+
+  it("preserves the existing signature when subsequent deltas omit thoughtSignature", () => {
+    const first = retainThoughtSignature(undefined, "sig-1");
+    expect(first).toBe("sig-1");
+
+    const second = retainThoughtSignature(first, undefined);
+    expect(second).toBe("sig-1");
+
+    const third = retainThoughtSignature(second, "");
+    expect(third).toBe("sig-1");
+  });
+
+  it("updates the signature when a new non-empty signature arrives", () => {
+    const updated = retainThoughtSignature("sig-1", "sig-2");
+    expect(updated).toBe("sig-2");
+  });
+});
--- a/packages/ai/test/google-tool-call-missing-args.test.ts
+++ b/packages/ai/test/google-tool-call-missing-args.test.ts
@ -0,0 +1,107 @@
+import { Type } from "@sinclair/typebox";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
+import type { Context, Model, ToolCall } from "../src/types.js";
+
+const emptySchema = Type.Object({});
+
+const originalFetch = global.fetch;
+
+afterEach(() => {
+  global.fetch = originalFetch;
+  vi.restoreAllMocks();
+});
+
+describe("google providers tool call missing args", () => {
+  it("defaults arguments to empty object when provider omits args field", async () => {
+    // Simulate a tool call response where args is missing (no-arg tool)
+    const sse = `${[
+      `data: ${JSON.stringify({
+        response: {
+          candidates: [
+            {
+              content: {
+                role: "model",
+                parts: [
+                  {
+                    functionCall: {
+                      name: "get_status",
+                      // args intentionally omitted
+                    },
+                  },
+                ],
+              },
+              finishReason: "STOP",
+            },
+          ],
+          usageMetadata: {
+            promptTokenCount: 10,
+            candidatesTokenCount: 5,
+            totalTokenCount: 15,
+          },
+        },
+      })}`,
+    ].join("\n\n")}\n\n`;
+
+    const encoder = new TextEncoder();
+    const dataStream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(encoder.encode(sse));
+        controller.close();
+      },
+    });
+
+    const fetchMock = vi.fn(async () => {
+      return new Response(dataStream, {
+        status: 200,
+        headers: { "content-type": "text/event-stream" },
+      });
+    });
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"google-gemini-cli"> = {
+      id: "gemini-2.5-flash",
+      name: "Gemini 2.5 Flash",
+      api: "google-gemini-cli",
+      provider: "google-gemini-cli",
+      baseUrl: "https://cloudcode-pa.googleapis.com",
+      reasoning: false,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+
+    const context: Context = {
+      messages: [
+        { role: "user", content: "Check status", timestamp: Date.now() },
+      ],
+      tools: [
+        {
+          name: "get_status",
+          description: "Get current status",
+          parameters: emptySchema,
+        },
+      ],
+    };
+
+    const stream = streamGoogleGeminiCli(model, context, {
+      apiKey: JSON.stringify({ token: "token", projectId: "project" }),
+    });
+
+    for await (const _ of stream) {
+      // consume stream
+    }
+
+    const result = await stream.result();
+
+    expect(result.stopReason).toBe("toolUse");
+    expect(result.content).toHaveLength(1);
+
+    const toolCall = result.content[0] as ToolCall;
+    expect(toolCall.type).toBe("toolCall");
+    expect(toolCall.name).toBe("get_status");
+    expect(toolCall.arguments).toEqual({});
+  });
+});
--- a/packages/ai/test/image-tool-result.test.ts
+++ b/packages/ai/test/image-tool-result.test.ts
@ -0,0 +1,630 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import type {
+  Api,
+  Context,
+  Model,
+  Tool,
+  ToolResultMessage,
+} from "../src/index.js";
+import { complete, getModel } from "../src/index.js";
+import type { StreamOptions } from "../src/types.js";
+
+type StreamOptionsWithExtras = StreamOptions & Record<string, unknown>;
+
+import {
+  hasAzureOpenAICredentials,
+  resolveAzureDeploymentName,
+} from "./azure-utils.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+  resolveApiKey("anthropic"),
+  resolveApiKey("github-copilot"),
+  resolveApiKey("google-gemini-cli"),
+  resolveApiKey("google-antigravity"),
+  resolveApiKey("openai-codex"),
+]);
+const [
+  anthropicOAuthToken,
+  githubCopilotToken,
+  geminiCliToken,
+  antigravityToken,
+  openaiCodexToken,
+] = oauthTokens;
+
+/**
+ * Test that tool results containing only images work correctly across all providers.
+ * This verifies that:
+ * 1. Tool results can contain image content blocks
+ * 2. Providers correctly pass images from tool results to the LLM
+ * 3. The LLM can see and describe images returned by tools
+ */
+async function handleToolWithImageResult<TApi extends Api>(
+  model: Model<TApi>,
+  options?: StreamOptionsWithExtras,
+) {
+  // Check if the model supports images
+  if (!model.input.includes("image")) {
+    console.log(
+      `Skipping tool image result test - model ${model.id} doesn't support images`,
+    );
+    return;
+  }
+
+  // Read the test image
+  const imagePath = join(__dirname, "data", "red-circle.png");
+  const imageBuffer = readFileSync(imagePath);
+  const base64Image = imageBuffer.toString("base64");
+
+  // Define a tool that returns only an image (no text)
+  const getImageSchema = Type.Object({});
+  const getImageTool: Tool<typeof getImageSchema> = {
+    name: "get_circle",
+    description: "Returns a circle image for visualization",
+    parameters: getImageSchema,
+  };
+
+  const context: Context = {
+    systemPrompt: "You are a helpful assistant that uses tools when asked.",
+    messages: [
+      {
+        role: "user",
+        content:
+          "Call the get_circle tool to get an image, and describe what you see, shapes, colors, etc.",
+        timestamp: Date.now(),
+      },
+    ],
+    tools: [getImageTool],
+  };
+
+  // First request - LLM should call the tool
+  const firstResponse = await complete(model, context, options);
+  expect(firstResponse.stopReason).toBe("toolUse");
+
+  // Find the tool call
+  const toolCall = firstResponse.content.find((b) => b.type === "toolCall");
+  expect(toolCall).toBeTruthy();
+  if (!toolCall || toolCall.type !== "toolCall") {
+    throw new Error("Expected tool call");
+  }
+  expect(toolCall.name).toBe("get_circle");
+
+  // Add the tool call to context
+  context.messages.push(firstResponse);
+
+  // Create tool result with ONLY an image (no text)
+  const toolResult: ToolResultMessage = {
+    role: "toolResult",
+    toolCallId: toolCall.id,
+    toolName: toolCall.name,
+    content: [
+      {
+        type: "image",
+        data: base64Image,
+        mimeType: "image/png",
+      },
+    ],
+    isError: false,
+    timestamp: Date.now(),
+  };
+
+  context.messages.push(toolResult);
+
+  // Second request - LLM should describe the image from the tool result
+  const secondResponse = await complete(model, context, options);
+  expect(secondResponse.stopReason).toBe("stop");
+  expect(secondResponse.errorMessage).toBeFalsy();
+
+  // Verify the LLM can see and describe the image
+  const textContent = secondResponse.content.find((b) => b.type === "text");
+  expect(textContent).toBeTruthy();
+  if (textContent && textContent.type === "text") {
+    const lowerContent = textContent.text.toLowerCase();
+    // Should mention red and circle since that's what the image shows
+    expect(lowerContent).toContain("red");
+    expect(lowerContent).toContain("circle");
+  }
+}
+
+/**
+ * Test that tool results containing both text and images work correctly across all providers.
+ * This verifies that:
+ * 1. Tool results can contain mixed content blocks (text + images)
+ * 2. Providers correctly pass both text and images from tool results to the LLM
+ * 3. The LLM can see both the text and images in tool results
+ */
+async function handleToolWithTextAndImageResult<TApi extends Api>(
+  model: Model<TApi>,
+  options?: StreamOptionsWithExtras,
+) {
+  // Check if the model supports images
+  if (!model.input.includes("image")) {
+    console.log(
+      `Skipping tool text+image result test - model ${model.id} doesn't support images`,
+    );
+    return;
+  }
+
+  // Read the test image
+  const imagePath = join(__dirname, "data", "red-circle.png");
+  const imageBuffer = readFileSync(imagePath);
+  const base64Image = imageBuffer.toString("base64");
+
+  // Define a tool that returns both text and an image
+  const getImageSchema = Type.Object({});
+  const getImageTool: Tool<typeof getImageSchema> = {
+    name: "get_circle_with_description",
+    description: "Returns a circle image with a text description",
+    parameters: getImageSchema,
+  };
+
+  const context: Context = {
+    systemPrompt: "You are a helpful assistant that uses tools when asked.",
+    messages: [
+      {
+        role: "user",
+        content:
+          "Use the get_circle_with_description tool and tell me what you learned. Also say what color the shape is.",
+        timestamp: Date.now(),
+      },
+    ],
+    tools: [getImageTool],
+  };
+
+  // First request - LLM should call the tool
+  const firstResponse = await complete(model, context, options);
+  expect(firstResponse.stopReason).toBe("toolUse");
+
+  // Find the tool call
+  const toolCall = firstResponse.content.find((b) => b.type === "toolCall");
+  expect(toolCall).toBeTruthy();
+  if (!toolCall || toolCall.type !== "toolCall") {
+    throw new Error("Expected tool call");
+  }
+  expect(toolCall.name).toBe("get_circle_with_description");
+
+  // Add the tool call to context
+  context.messages.push(firstResponse);
+
+  // Create tool result with BOTH text and image
+  const toolResult: ToolResultMessage = {
+    role: "toolResult",
+    toolCallId: toolCall.id,
+    toolName: toolCall.name,
+    content: [
+      {
+        type: "text",
+        text: "This is a geometric shape with specific properties: it has a diameter of 100 pixels.",
+      },
+      {
+        type: "image",
+        data: base64Image,
+        mimeType: "image/png",
+      },
+    ],
+    isError: false,
+    timestamp: Date.now(),
+  };
+
+  context.messages.push(toolResult);
+
+  // Second request - LLM should describe both the text and image from the tool result
+  const secondResponse = await complete(model, context, options);
+  expect(secondResponse.stopReason).toBe("stop");
+  expect(secondResponse.errorMessage).toBeFalsy();
+
+  // Verify the LLM can see both text and image
+  const textContent = secondResponse.content.find((b) => b.type === "text");
+  expect(textContent).toBeTruthy();
+  if (textContent && textContent.type === "text") {
+    const lowerContent = textContent.text.toLowerCase();
+    // Should mention details from the text (diameter/pixels)
+    expect(lowerContent.match(/diameter|100|pixel/)).toBeTruthy();
+    // Should also mention the visual properties (red and circle)
+    expect(lowerContent).toContain("red");
+    expect(lowerContent).toContain("circle");
+  }
+}
+
+describe("Tool Results with Images", () => {
+  describe.skipIf(!process.env.GEMINI_API_KEY)(
+    "Google Provider (gemini-2.5-flash)",
+    () => {
+      const llm = getModel("google", "gemini-2.5-flash");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Completions Provider (gpt-4o-mini)",
+    () => {
+      const { compat: _compat, ...baseModel } = getModel(
+        "openai",
+        "gpt-4o-mini",
+      );
+      void _compat;
+      const llm: Model<"openai-completions"> = {
+        ...baseModel,
+        api: "openai-completions",
+      };
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Responses Provider (gpt-5-mini)",
+    () => {
+      const llm = getModel("openai", "gpt-5-mini");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!hasAzureOpenAICredentials())(
+    "Azure OpenAI Responses Provider (gpt-4o-mini)",
+    () => {
+      const llm = getModel("azure-openai-responses", "gpt-4o-mini");
+      const azureDeploymentName = resolveAzureDeploymentName(llm.id);
+      const azureOptions = azureDeploymentName ? { azureDeploymentName } : {};
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm, azureOptions);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm, azureOptions);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
+    "Anthropic Provider (claude-haiku-4-5)",
+    () => {
+      const model = getModel("anthropic", "claude-haiku-4-5");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(model);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(model);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.OPENROUTER_API_KEY)(
+    "OpenRouter Provider (glm-4.5v)",
+    () => {
+      const llm = getModel("openrouter", "z-ai/glm-4.5v");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.MISTRAL_API_KEY)(
+    "Mistral Provider (pixtral-12b)",
+    () => {
+      const llm = getModel("mistral", "pixtral-12b");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 5, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 5, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.KIMI_API_KEY)(
+    "Kimi For Coding Provider (k2p5)",
+    () => {
+      const llm = getModel("kimi-coding", "k2p5");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
+    "Vercel AI Gateway Provider (google/gemini-2.5-flash)",
+    () => {
+      const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!hasBedrockCredentials())(
+    "Amazon Bedrock Provider (claude-sonnet-4-5)",
+    () => {
+      const llm = getModel(
+        "amazon-bedrock",
+        "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+      );
+
+      it(
+        "should handle tool result with only image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithImageResult(llm);
+        },
+      );
+
+      it(
+        "should handle tool result with text and image",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await handleToolWithTextAndImageResult(llm);
+        },
+      );
+    },
+  );
+
+  // =========================================================================
+  // OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+  // =========================================================================
+
+  describe("Anthropic OAuth Provider (claude-sonnet-4-5)", () => {
+    const model = getModel("anthropic", "claude-sonnet-4-5");
+
+    it.skipIf(!anthropicOAuthToken)(
+      "should handle tool result with only image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await handleToolWithImageResult(model, { apiKey: anthropicOAuthToken });
+      },
+    );
+
+    it.skipIf(!anthropicOAuthToken)(
+      "should handle tool result with text and image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await handleToolWithTextAndImageResult(model, {
+          apiKey: anthropicOAuthToken,
+        });
+      },
+    );
+  });
+
+  describe("GitHub Copilot Provider", () => {
+    it.skipIf(!githubCopilotToken)(
+      "gpt-4o - should handle tool result with only image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("github-copilot", "gpt-4o");
+        await handleToolWithImageResult(llm, { apiKey: githubCopilotToken });
+      },
+    );
+
+    it.skipIf(!githubCopilotToken)(
+      "gpt-4o - should handle tool result with text and image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("github-copilot", "gpt-4o");
+        await handleToolWithTextAndImageResult(llm, {
+          apiKey: githubCopilotToken,
+        });
+      },
+    );
+
+    it.skipIf(!githubCopilotToken)(
+      "claude-sonnet-4 - should handle tool result with only image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("github-copilot", "claude-sonnet-4");
+        await handleToolWithImageResult(llm, { apiKey: githubCopilotToken });
+      },
+    );
+
+    it.skipIf(!githubCopilotToken)(
+      "claude-sonnet-4 - should handle tool result with text and image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("github-copilot", "claude-sonnet-4");
+        await handleToolWithTextAndImageResult(llm, {
+          apiKey: githubCopilotToken,
+        });
+      },
+    );
+  });
+
+  describe("Google Gemini CLI Provider", () => {
+    it.skipIf(!geminiCliToken)(
+      "gemini-2.5-flash - should handle tool result with only image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+        await handleToolWithImageResult(llm, { apiKey: geminiCliToken });
+      },
+    );
+
+    it.skipIf(!geminiCliToken)(
+      "gemini-2.5-flash - should handle tool result with text and image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+        await handleToolWithTextAndImageResult(llm, { apiKey: geminiCliToken });
+      },
+    );
+  });
+
+  describe("Google Antigravity Provider", () => {
+    it.skipIf(!antigravityToken)(
+      "gemini-3-flash - should handle tool result with only image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-antigravity", "gemini-3-flash");
+        await handleToolWithImageResult(llm, { apiKey: antigravityToken });
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "gemini-3-flash - should handle tool result with text and image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-antigravity", "gemini-3-flash");
+        await handleToolWithTextAndImageResult(llm, {
+          apiKey: antigravityToken,
+        });
+      },
+    );
+
+    /** These two don't work, the model simply won't call the tool, works in pi
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await handleToolWithImageResult(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken });
+			},
+		);**/
+
+    // Note: gpt-oss-120b-medium does not support images, so not tested here
+  });
+
+  describe("OpenAI Codex Provider", () => {
+    it.skipIf(!openaiCodexToken)(
+      "gpt-5.2-codex - should handle tool result with only image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("openai-codex", "gpt-5.2-codex");
+        await handleToolWithImageResult(llm, { apiKey: openaiCodexToken });
+      },
+    );
+
+    it.skipIf(!openaiCodexToken)(
+      "gpt-5.2-codex - should handle tool result with text and image",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("openai-codex", "gpt-5.2-codex");
+        await handleToolWithTextAndImageResult(llm, {
+          apiKey: openaiCodexToken,
+        });
+      },
+    );
+  });
+});
--- a/packages/ai/test/interleaved-thinking.test.ts
+++ b/packages/ai/test/interleaved-thinking.test.ts
@ -0,0 +1,206 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getEnvApiKey } from "../src/env-api-keys.js";
+import { getModel } from "../src/models.js";
+import { completeSimple } from "../src/stream.js";
+import type {
+  Api,
+  Context,
+  Model,
+  StopReason,
+  Tool,
+  ToolCall,
+  ToolResultMessage,
+} from "../src/types.js";
+import { StringEnum } from "../src/utils/typebox-helpers.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+
+const calculatorSchema = Type.Object({
+  a: Type.Number({ description: "First number" }),
+  b: Type.Number({ description: "Second number" }),
+  operation: StringEnum(["add", "subtract", "multiply", "divide"], {
+    description: "The operation to perform.",
+  }),
+});
+
+const calculatorTool: Tool<typeof calculatorSchema> = {
+  name: "calculator",
+  description: "Perform basic arithmetic operations",
+  parameters: calculatorSchema,
+};
+
+type CalculatorOperation = "add" | "subtract" | "multiply" | "divide";
+
+type CalculatorArguments = {
+  a: number;
+  b: number;
+  operation: CalculatorOperation;
+};
+
+function asCalculatorArguments(
+  args: ToolCall["arguments"],
+): CalculatorArguments {
+  if (typeof args !== "object" || args === null) {
+    throw new Error("Tool arguments must be an object");
+  }
+
+  const value = args as Record<string, unknown>;
+  const operation = value.operation;
+  if (
+    typeof value.a !== "number" ||
+    typeof value.b !== "number" ||
+    (operation !== "add" &&
+      operation !== "subtract" &&
+      operation !== "multiply" &&
+      operation !== "divide")
+  ) {
+    throw new Error("Invalid calculator arguments");
+  }
+
+  return { a: value.a, b: value.b, operation };
+}
+
+function evaluateCalculatorCall(toolCall: ToolCall): number {
+  const { a, b, operation } = asCalculatorArguments(toolCall.arguments);
+  switch (operation) {
+    case "add":
+      return a + b;
+    case "subtract":
+      return a - b;
+    case "multiply":
+      return a * b;
+    case "divide":
+      return a / b;
+  }
+}
+
+async function assertSecondToolCallWithInterleavedThinking<TApi extends Api>(
+  llm: Model<TApi>,
+  reasoning: "high" | "xhigh",
+) {
+  const context: Context = {
+    systemPrompt: [
+      "You are a helpful assistant that must use tools for arithmetic.",
+      "Always think before every tool call, not just the first one.",
+      "Do not answer with plain text when a tool call is required.",
+    ].join(" "),
+    messages: [
+      {
+        role: "user",
+        content: [
+          "Use calculator to calculate 328 * 29.",
+          "You must call the calculator tool exactly once.",
+          "Provide the final answer based on the best guess given the tool result, even if it seems unreliable.",
+          "Start by thinking about the steps you will take to solve the problem.",
+        ].join(" "),
+        timestamp: Date.now(),
+      },
+    ],
+    tools: [calculatorTool],
+  };
+
+  const firstResponse = await completeSimple(llm, context, { reasoning });
+
+  expect(firstResponse.stopReason, `Error: ${firstResponse.errorMessage}`).toBe(
+    "toolUse" satisfies StopReason,
+  );
+  expect(firstResponse.content.some((block) => block.type === "thinking")).toBe(
+    true,
+  );
+  expect(firstResponse.content.some((block) => block.type === "toolCall")).toBe(
+    true,
+  );
+
+  const firstToolCall = firstResponse.content.find(
+    (block) => block.type === "toolCall",
+  );
+  expect(firstToolCall?.type).toBe("toolCall");
+  if (!firstToolCall || firstToolCall.type !== "toolCall") {
+    throw new Error("Expected first response to include a tool call");
+  }
+
+  context.messages.push(firstResponse);
+
+  const correctAnswer = evaluateCalculatorCall(firstToolCall);
+  const firstToolResult: ToolResultMessage = {
+    role: "toolResult",
+    toolCallId: firstToolCall.id,
+    toolName: firstToolCall.name,
+    content: [
+      {
+        type: "text",
+        text: `The answer is ${correctAnswer} or ${correctAnswer * 2}.`,
+      },
+    ],
+    isError: false,
+    timestamp: Date.now(),
+  };
+  context.messages.push(firstToolResult);
+
+  const secondResponse = await completeSimple(llm, context, { reasoning });
+
+  expect(
+    secondResponse.stopReason,
+    `Error: ${secondResponse.errorMessage}`,
+  ).toBe("stop" satisfies StopReason);
+  expect(
+    secondResponse.content.some((block) => block.type === "thinking"),
+  ).toBe(true);
+  expect(secondResponse.content.some((block) => block.type === "text")).toBe(
+    true,
+  );
+}
+
+const hasAnthropicCredentials = !!getEnvApiKey("anthropic");
+
+describe.skipIf(!hasBedrockCredentials())(
+  "Amazon Bedrock interleaved thinking",
+  () => {
+    it(
+      "should do interleaved thinking on Claude Opus 4.5",
+      { retry: 3 },
+      async () => {
+        const llm = getModel(
+          "amazon-bedrock",
+          "global.anthropic.claude-opus-4-5-20251101-v1:0",
+        );
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+
+    it(
+      "should do interleaved thinking on Claude Opus 4.6",
+      { retry: 3 },
+      async () => {
+        const llm = getModel(
+          "amazon-bedrock",
+          "global.anthropic.claude-opus-4-6-v1",
+        );
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+  },
+);
+
+describe.skipIf(!hasAnthropicCredentials)(
+  "Anthropic interleaved thinking",
+  () => {
+    it(
+      "should do interleaved thinking on Claude Opus 4.5",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("anthropic", "claude-opus-4-5");
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+
+    it(
+      "should do interleaved thinking on Claude Opus 4.6",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("anthropic", "claude-opus-4-6");
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+  },
+);
--- a/packages/ai/test/oauth.ts
+++ b/packages/ai/test/oauth.ts
@ -0,0 +1,103 @@
+/**
+ * Test helper for resolving API keys from ~/.pi/agent/auth.json
+ *
+ * Supports both API key and OAuth credentials.
+ * OAuth tokens are automatically refreshed if expired and saved back to auth.json.
+ */
+
+import {
+  chmodSync,
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  writeFileSync,
+} from "fs";
+import { homedir } from "os";
+import { dirname, join } from "path";
+import { getOAuthApiKey } from "../src/utils/oauth/index.js";
+import type {
+  OAuthCredentials,
+  OAuthProvider,
+} from "../src/utils/oauth/types.js";
+
+const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
+
+type ApiKeyCredential = {
+  type: "api_key";
+  key: string;
+};
+
+type OAuthCredentialEntry = {
+  type: "oauth";
+} & OAuthCredentials;
+
+type AuthCredential = ApiKeyCredential | OAuthCredentialEntry;
+
+type AuthStorage = Record<string, AuthCredential>;
+
+function loadAuthStorage(): AuthStorage {
+  if (!existsSync(AUTH_PATH)) {
+    return {};
+  }
+  try {
+    const content = readFileSync(AUTH_PATH, "utf-8");
+    return JSON.parse(content);
+  } catch {
+    return {};
+  }
+}
+
+function saveAuthStorage(storage: AuthStorage): void {
+  const configDir = dirname(AUTH_PATH);
+  if (!existsSync(configDir)) {
+    mkdirSync(configDir, { recursive: true, mode: 0o700 });
+  }
+  writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8");
+  chmodSync(AUTH_PATH, 0o600);
+}
+
+/**
+ * Resolve API key for a provider from ~/.pi/agent/auth.json
+ *
+ * For API key credentials, returns the key directly.
+ * For OAuth credentials, returns the access token (refreshing if expired and saving back).
+ *
+ * For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId }
+ */
+export async function resolveApiKey(
+  provider: string,
+): Promise<string | undefined> {
+  const storage = loadAuthStorage();
+  const entry = storage[provider];
+
+  if (!entry) return undefined;
+
+  if (entry.type === "api_key") {
+    return entry.key;
+  }
+
+  if (entry.type === "oauth") {
+    // Build OAuthCredentials record for getOAuthApiKey
+    const oauthCredentials: Record<string, OAuthCredentials> = {};
+    for (const [key, value] of Object.entries(storage)) {
+      if (value.type === "oauth") {
+        const { type: _, ...creds } = value;
+        oauthCredentials[key] = creds;
+      }
+    }
+
+    const result = await getOAuthApiKey(
+      provider as OAuthProvider,
+      oauthCredentials,
+    );
+    if (!result) return undefined;
+
+    // Save refreshed credentials back to auth.json
+    storage[provider] = { type: "oauth", ...result.newCredentials };
+    saveAuthStorage(storage);
+
+    return result.apiKey;
+  }
+
+  return undefined;
+}
--- a/packages/ai/test/openai-codex-stream.test.ts
+++ b/packages/ai/test/openai-codex-stream.test.ts
@ -0,0 +1,506 @@
+import { mkdtempSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { streamOpenAICodexResponses } from "../src/providers/openai-codex-responses.js";
+import type { Context, Model } from "../src/types.js";
+
+const originalFetch = global.fetch;
+const originalAgentDir = process.env.PI_CODING_AGENT_DIR;
+
+afterEach(() => {
+  global.fetch = originalFetch;
+  if (originalAgentDir === undefined) {
+    delete process.env.PI_CODING_AGENT_DIR;
+  } else {
+    process.env.PI_CODING_AGENT_DIR = originalAgentDir;
+  }
+  vi.restoreAllMocks();
+});
+
+describe("openai-codex streaming", () => {
+  it("streams SSE responses into AssistantMessageEventStream", async () => {
+    const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+    process.env.PI_CODING_AGENT_DIR = tempDir;
+
+    const payload = Buffer.from(
+      JSON.stringify({
+        "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" },
+      }),
+      "utf8",
+    ).toString("base64");
+    const token = `aaa.${payload}.bbb`;
+
+    const sse = `${[
+      `data: ${JSON.stringify({
+        type: "response.output_item.added",
+        item: {
+          type: "message",
+          id: "msg_1",
+          role: "assistant",
+          status: "in_progress",
+          content: [],
+        },
+      })}`,
+      `data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`,
+      `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`,
+      `data: ${JSON.stringify({
+        type: "response.output_item.done",
+        item: {
+          type: "message",
+          id: "msg_1",
+          role: "assistant",
+          status: "completed",
+          content: [{ type: "output_text", text: "Hello" }],
+        },
+      })}`,
+      `data: ${JSON.stringify({
+        type: "response.completed",
+        response: {
+          status: "completed",
+          usage: {
+            input_tokens: 5,
+            output_tokens: 3,
+            total_tokens: 8,
+            input_tokens_details: { cached_tokens: 0 },
+          },
+        },
+      })}`,
+    ].join("\n\n")}\n\n`;
+
+    const encoder = new TextEncoder();
+    const stream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(encoder.encode(sse));
+        controller.close();
+      },
+    });
+
+    const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => {
+      const url = typeof input === "string" ? input : input.toString();
+      if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
+        return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), {
+          status: 200,
+        });
+      }
+      if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
+        return new Response("PROMPT", {
+          status: 200,
+          headers: { etag: '"etag"' },
+        });
+      }
+      if (url === "https://chatgpt.com/backend-api/codex/responses") {
+        const headers =
+          init?.headers instanceof Headers ? init.headers : undefined;
+        expect(headers?.get("Authorization")).toBe(`Bearer ${token}`);
+        expect(headers?.get("chatgpt-account-id")).toBe("acc_test");
+        expect(headers?.get("OpenAI-Beta")).toBe("responses=experimental");
+        expect(headers?.get("originator")).toBe("pi");
+        expect(headers?.get("accept")).toBe("text/event-stream");
+        expect(headers?.has("x-api-key")).toBe(false);
+        return new Response(stream, {
+          status: 200,
+          headers: { "content-type": "text/event-stream" },
+        });
+      }
+      return new Response("not found", { status: 404 });
+    });
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"openai-codex-responses"> = {
+      id: "gpt-5.1-codex",
+      name: "GPT-5.1 Codex",
+      api: "openai-codex-responses",
+      provider: "openai-codex",
+      baseUrl: "https://chatgpt.com/backend-api",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 400000,
+      maxTokens: 128000,
+    };
+
+    const context: Context = {
+      systemPrompt: "You are a helpful assistant.",
+      messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+    };
+
+    const streamResult = streamOpenAICodexResponses(model, context, {
+      apiKey: token,
+    });
+    let sawTextDelta = false;
+    let sawDone = false;
+
+    for await (const event of streamResult) {
+      if (event.type === "text_delta") {
+        sawTextDelta = true;
+      }
+      if (event.type === "done") {
+        sawDone = true;
+        expect(event.message.content.find((c) => c.type === "text")?.text).toBe(
+          "Hello",
+        );
+      }
+    }
+
+    expect(sawTextDelta).toBe(true);
+    expect(sawDone).toBe(true);
+  });
+
+  it("sets conversation_id/session_id headers and prompt_cache_key when sessionId is provided", async () => {
+    const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+    process.env.PI_CODING_AGENT_DIR = tempDir;
+
+    const payload = Buffer.from(
+      JSON.stringify({
+        "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" },
+      }),
+      "utf8",
+    ).toString("base64");
+    const token = `aaa.${payload}.bbb`;
+
+    const sse = `${[
+      `data: ${JSON.stringify({
+        type: "response.output_item.added",
+        item: {
+          type: "message",
+          id: "msg_1",
+          role: "assistant",
+          status: "in_progress",
+          content: [],
+        },
+      })}`,
+      `data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`,
+      `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`,
+      `data: ${JSON.stringify({
+        type: "response.output_item.done",
+        item: {
+          type: "message",
+          id: "msg_1",
+          role: "assistant",
+          status: "completed",
+          content: [{ type: "output_text", text: "Hello" }],
+        },
+      })}`,
+      `data: ${JSON.stringify({
+        type: "response.completed",
+        response: {
+          status: "completed",
+          usage: {
+            input_tokens: 5,
+            output_tokens: 3,
+            total_tokens: 8,
+            input_tokens_details: { cached_tokens: 0 },
+          },
+        },
+      })}`,
+    ].join("\n\n")}\n\n`;
+
+    const encoder = new TextEncoder();
+    const stream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(encoder.encode(sse));
+        controller.close();
+      },
+    });
+
+    const sessionId = "test-session-123";
+    const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => {
+      const url = typeof input === "string" ? input : input.toString();
+      if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
+        return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), {
+          status: 200,
+        });
+      }
+      if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
+        return new Response("PROMPT", {
+          status: 200,
+          headers: { etag: '"etag"' },
+        });
+      }
+      if (url === "https://chatgpt.com/backend-api/codex/responses") {
+        const headers =
+          init?.headers instanceof Headers ? init.headers : undefined;
+        // Verify sessionId is set in headers
+        expect(headers?.get("conversation_id")).toBe(sessionId);
+        expect(headers?.get("session_id")).toBe(sessionId);
+
+        // Verify sessionId is set in request body as prompt_cache_key
+        const body =
+          typeof init?.body === "string"
+            ? (JSON.parse(init.body) as Record<string, unknown>)
+            : null;
+        expect(body?.prompt_cache_key).toBe(sessionId);
+        expect(body?.prompt_cache_retention).toBe("in-memory");
+
+        return new Response(stream, {
+          status: 200,
+          headers: { "content-type": "text/event-stream" },
+        });
+      }
+      return new Response("not found", { status: 404 });
+    });
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"openai-codex-responses"> = {
+      id: "gpt-5.1-codex",
+      name: "GPT-5.1 Codex",
+      api: "openai-codex-responses",
+      provider: "openai-codex",
+      baseUrl: "https://chatgpt.com/backend-api",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 400000,
+      maxTokens: 128000,
+    };
+
+    const context: Context = {
+      systemPrompt: "You are a helpful assistant.",
+      messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+    };
+
+    const streamResult = streamOpenAICodexResponses(model, context, {
+      apiKey: token,
+      sessionId,
+    });
+    await streamResult.result();
+  });
+
+  it.each(["gpt-5.3-codex", "gpt-5.4"])(
+    "clamps %s minimal reasoning effort to low",
+    async (modelId) => {
+      const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+      process.env.PI_CODING_AGENT_DIR = tempDir;
+
+      const payload = Buffer.from(
+        JSON.stringify({
+          "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" },
+        }),
+        "utf8",
+      ).toString("base64");
+      const token = `aaa.${payload}.bbb`;
+
+      const sse = `${[
+        `data: ${JSON.stringify({
+          type: "response.output_item.added",
+          item: {
+            type: "message",
+            id: "msg_1",
+            role: "assistant",
+            status: "in_progress",
+            content: [],
+          },
+        })}`,
+        `data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`,
+        `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`,
+        `data: ${JSON.stringify({
+          type: "response.output_item.done",
+          item: {
+            type: "message",
+            id: "msg_1",
+            role: "assistant",
+            status: "completed",
+            content: [{ type: "output_text", text: "Hello" }],
+          },
+        })}`,
+        `data: ${JSON.stringify({
+          type: "response.completed",
+          response: {
+            status: "completed",
+            usage: {
+              input_tokens: 5,
+              output_tokens: 3,
+              total_tokens: 8,
+              input_tokens_details: { cached_tokens: 0 },
+            },
+          },
+        })}`,
+      ].join("\n\n")}\n\n`;
+
+      const encoder = new TextEncoder();
+      const stream = new ReadableStream<Uint8Array>({
+        start(controller) {
+          controller.enqueue(encoder.encode(sse));
+          controller.close();
+        },
+      });
+
+      const fetchMock = vi.fn(
+        async (input: string | URL, init?: RequestInit) => {
+          const url = typeof input === "string" ? input : input.toString();
+          if (
+            url === "https://api.github.com/repos/openai/codex/releases/latest"
+          ) {
+            return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), {
+              status: 200,
+            });
+          }
+          if (
+            url.startsWith("https://raw.githubusercontent.com/openai/codex/")
+          ) {
+            return new Response("PROMPT", {
+              status: 200,
+              headers: { etag: '"etag"' },
+            });
+          }
+          if (url === "https://chatgpt.com/backend-api/codex/responses") {
+            const body =
+              typeof init?.body === "string"
+                ? (JSON.parse(init.body) as Record<string, unknown>)
+                : null;
+            expect(body?.reasoning).toEqual({ effort: "low", summary: "auto" });
+
+            return new Response(stream, {
+              status: 200,
+              headers: { "content-type": "text/event-stream" },
+            });
+          }
+          return new Response("not found", { status: 404 });
+        },
+      );
+
+      global.fetch = fetchMock as typeof fetch;
+
+      const model: Model<"openai-codex-responses"> = {
+        id: modelId,
+        name: modelId,
+        api: "openai-codex-responses",
+        provider: "openai-codex",
+        baseUrl: "https://chatgpt.com/backend-api",
+        reasoning: true,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 400000,
+        maxTokens: 128000,
+      };
+
+      const context: Context = {
+        systemPrompt: "You are a helpful assistant.",
+        messages: [
+          { role: "user", content: "Say hello", timestamp: Date.now() },
+        ],
+      };
+
+      const streamResult = streamOpenAICodexResponses(model, context, {
+        apiKey: token,
+        reasoningEffort: "minimal",
+      });
+      await streamResult.result();
+    },
+  );
+
+  it("does not set conversation_id/session_id headers when sessionId is not provided", async () => {
+    const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+    process.env.PI_CODING_AGENT_DIR = tempDir;
+
+    const payload = Buffer.from(
+      JSON.stringify({
+        "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" },
+      }),
+      "utf8",
+    ).toString("base64");
+    const token = `aaa.${payload}.bbb`;
+
+    const sse = `${[
+      `data: ${JSON.stringify({
+        type: "response.output_item.added",
+        item: {
+          type: "message",
+          id: "msg_1",
+          role: "assistant",
+          status: "in_progress",
+          content: [],
+        },
+      })}`,
+      `data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`,
+      `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`,
+      `data: ${JSON.stringify({
+        type: "response.output_item.done",
+        item: {
+          type: "message",
+          id: "msg_1",
+          role: "assistant",
+          status: "completed",
+          content: [{ type: "output_text", text: "Hello" }],
+        },
+      })}`,
+      `data: ${JSON.stringify({
+        type: "response.completed",
+        response: {
+          status: "completed",
+          usage: {
+            input_tokens: 5,
+            output_tokens: 3,
+            total_tokens: 8,
+            input_tokens_details: { cached_tokens: 0 },
+          },
+        },
+      })}`,
+    ].join("\n\n")}\n\n`;
+
+    const encoder = new TextEncoder();
+    const stream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(encoder.encode(sse));
+        controller.close();
+      },
+    });
+
+    const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => {
+      const url = typeof input === "string" ? input : input.toString();
+      if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
+        return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), {
+          status: 200,
+        });
+      }
+      if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
+        return new Response("PROMPT", {
+          status: 200,
+          headers: { etag: '"etag"' },
+        });
+      }
+      if (url === "https://chatgpt.com/backend-api/codex/responses") {
+        const headers =
+          init?.headers instanceof Headers ? init.headers : undefined;
+        // Verify headers are not set when sessionId is not provided
+        expect(headers?.has("conversation_id")).toBe(false);
+        expect(headers?.has("session_id")).toBe(false);
+
+        return new Response(stream, {
+          status: 200,
+          headers: { "content-type": "text/event-stream" },
+        });
+      }
+      return new Response("not found", { status: 404 });
+    });
+
+    global.fetch = fetchMock as typeof fetch;
+
+    const model: Model<"openai-codex-responses"> = {
+      id: "gpt-5.1-codex",
+      name: "GPT-5.1 Codex",
+      api: "openai-codex-responses",
+      provider: "openai-codex",
+      baseUrl: "https://chatgpt.com/backend-api",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 400000,
+      maxTokens: 128000,
+    };
+
+    const context: Context = {
+      systemPrompt: "You are a helpful assistant.",
+      messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+    };
+
+    // No sessionId provided
+    const streamResult = streamOpenAICodexResponses(model, context, {
+      apiKey: token,
+    });
+    await streamResult.result();
+  });
+});
--- a/packages/ai/test/openai-completions-tool-choice.test.ts
+++ b/packages/ai/test/openai-completions-tool-choice.test.ts
@ -0,0 +1,193 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it, vi } from "vitest";
+import { getModel } from "../src/models.js";
+import { streamSimple } from "../src/stream.js";
+import type { Tool } from "../src/types.js";
+
+const mockState = vi.hoisted(() => ({ lastParams: undefined as unknown }));
+
+vi.mock("openai", () => {
+  class FakeOpenAI {
+    chat = {
+      completions: {
+        create: async (params: unknown) => {
+          mockState.lastParams = params;
+          return {
+            async *[Symbol.asyncIterator]() {
+              yield {
+                choices: [{ delta: {}, finish_reason: "stop" }],
+                usage: {
+                  prompt_tokens: 1,
+                  completion_tokens: 1,
+                  prompt_tokens_details: { cached_tokens: 0 },
+                  completion_tokens_details: { reasoning_tokens: 0 },
+                },
+              };
+            },
+          };
+        },
+      },
+    };
+  }
+
+  return { default: FakeOpenAI };
+});
+
+describe("openai-completions tool_choice", () => {
+  it("forwards toolChoice from simple options to payload", async () => {
+    const { compat: _compat, ...baseModel } = getModel(
+      "openai",
+      "gpt-4o-mini",
+    )!;
+    const model = { ...baseModel, api: "openai-completions" } as const;
+    const tools: Tool[] = [
+      {
+        name: "ping",
+        description: "Ping tool",
+        parameters: Type.Object({
+          ok: Type.Boolean(),
+        }),
+      },
+    ];
+    let payload: unknown;
+
+    await streamSimple(
+      model,
+      {
+        messages: [
+          {
+            role: "user",
+            content: "Call ping with ok=true",
+            timestamp: Date.now(),
+          },
+        ],
+        tools,
+      },
+      {
+        apiKey: "test",
+        toolChoice: "required",
+        onPayload: (params: unknown) => {
+          payload = params;
+        },
+      } as unknown as Parameters<typeof streamSimple>[2],
+    ).result();
+
+    const params = (payload ?? mockState.lastParams) as {
+      tool_choice?: string;
+      tools?: unknown[];
+    };
+    expect(params.tool_choice).toBe("required");
+    expect(Array.isArray(params.tools)).toBe(true);
+    expect(params.tools?.length ?? 0).toBeGreaterThan(0);
+  });
+
+  it("omits strict when compat disables strict mode", async () => {
+    const { compat: _compat, ...baseModel } = getModel(
+      "openai",
+      "gpt-4o-mini",
+    )!;
+    const model = {
+      ...baseModel,
+      api: "openai-completions",
+      compat: { supportsStrictMode: false },
+    } as const;
+    const tools: Tool[] = [
+      {
+        name: "ping",
+        description: "Ping tool",
+        parameters: Type.Object({
+          ok: Type.Boolean(),
+        }),
+      },
+    ];
+    let payload: unknown;
+
+    await streamSimple(
+      model,
+      {
+        messages: [
+          {
+            role: "user",
+            content: "Call ping with ok=true",
+            timestamp: Date.now(),
+          },
+        ],
+        tools,
+      },
+      {
+        apiKey: "test",
+        onPayload: (params: unknown) => {
+          payload = params;
+        },
+      } as unknown as Parameters<typeof streamSimple>[2],
+    ).result();
+
+    const params = (payload ?? mockState.lastParams) as {
+      tools?: Array<{ function?: Record<string, unknown> }>;
+    };
+    const tool = params.tools?.[0]?.function;
+    expect(tool).toBeTruthy();
+    expect(tool?.strict).toBeUndefined();
+    expect("strict" in (tool ?? {})).toBe(false);
+  });
+
+  it("maps groq qwen3 reasoning levels to default reasoning_effort", async () => {
+    const model = getModel("groq", "qwen/qwen3-32b")!;
+    let payload: unknown;
+
+    await streamSimple(
+      model,
+      {
+        messages: [
+          {
+            role: "user",
+            content: "Hi",
+            timestamp: Date.now(),
+          },
+        ],
+      },
+      {
+        apiKey: "test",
+        reasoning: "medium",
+        onPayload: (params: unknown) => {
+          payload = params;
+        },
+      },
+    ).result();
+
+    const params = (payload ?? mockState.lastParams) as {
+      reasoning_effort?: string;
+    };
+    expect(params.reasoning_effort).toBe("default");
+  });
+
+  it("keeps normal reasoning_effort for groq models without compat mapping", async () => {
+    const model = getModel("groq", "openai/gpt-oss-20b")!;
+    let payload: unknown;
+
+    await streamSimple(
+      model,
+      {
+        messages: [
+          {
+            role: "user",
+            content: "Hi",
+            timestamp: Date.now(),
+          },
+        ],
+      },
+      {
+        apiKey: "test",
+        reasoning: "medium",
+        onPayload: (params: unknown) => {
+          payload = params;
+        },
+      },
+    ).result();
+
+    const params = (payload ?? mockState.lastParams) as {
+      reasoning_effort?: string;
+    };
+    expect(params.reasoning_effort).toBe("medium");
+  });
+});
--- a/packages/ai/test/openai-completions-tool-result-images.test.ts
+++ b/packages/ai/test/openai-completions-tool-result-images.test.ts
@ -0,0 +1,111 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { convertMessages } from "../src/providers/openai-completions.js";
+import type {
+  AssistantMessage,
+  Context,
+  Model,
+  OpenAICompletionsCompat,
+  ToolResultMessage,
+  Usage,
+} from "../src/types.js";
+
+const emptyUsage: Usage = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+  totalTokens: 0,
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+};
+
+const compat: Required<OpenAICompletionsCompat> = {
+  supportsStore: true,
+  supportsDeveloperRole: true,
+  supportsReasoningEffort: true,
+  reasoningEffortMap: {},
+  supportsUsageInStreaming: true,
+  maxTokensField: "max_completion_tokens",
+  requiresToolResultName: false,
+  requiresAssistantAfterToolResult: false,
+  requiresThinkingAsText: false,
+  thinkingFormat: "openai",
+  openRouterRouting: {},
+  vercelGatewayRouting: {},
+  supportsStrictMode: true,
+};
+
+function buildToolResult(
+  toolCallId: string,
+  timestamp: number,
+): ToolResultMessage {
+  return {
+    role: "toolResult",
+    toolCallId,
+    toolName: "read",
+    content: [
+      { type: "text", text: "Read image file [image/png]" },
+      { type: "image", data: "ZmFrZQ==", mimeType: "image/png" },
+    ],
+    isError: false,
+    timestamp,
+  };
+}
+
+describe("openai-completions convertMessages", () => {
+  it("batches tool-result images after consecutive tool results", () => {
+    const baseModel = getModel("openai", "gpt-4o-mini");
+    const model: Model<"openai-completions"> = {
+      ...baseModel,
+      api: "openai-completions",
+      input: ["text", "image"],
+    };
+
+    const now = Date.now();
+    const assistantMessage: AssistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "toolCall",
+          id: "tool-1",
+          name: "read",
+          arguments: { path: "img-1.png" },
+        },
+        {
+          type: "toolCall",
+          id: "tool-2",
+          name: "read",
+          arguments: { path: "img-2.png" },
+        },
+      ],
+      api: model.api,
+      provider: model.provider,
+      model: model.id,
+      usage: emptyUsage,
+      stopReason: "toolUse",
+      timestamp: now,
+    };
+
+    const context: Context = {
+      messages: [
+        { role: "user", content: "Read the images", timestamp: now - 2 },
+        assistantMessage,
+        buildToolResult("tool-1", now + 1),
+        buildToolResult("tool-2", now + 2),
+      ],
+    };
+
+    const messages = convertMessages(model, context, compat);
+    const roles = messages.map((message) => message.role);
+    expect(roles).toEqual(["user", "assistant", "tool", "tool", "user"]);
+
+    const imageMessage = messages[messages.length - 1];
+    expect(imageMessage.role).toBe("user");
+    expect(Array.isArray(imageMessage.content)).toBe(true);
+
+    const imageParts = (
+      imageMessage.content as Array<{ type?: string }>
+    ).filter((part) => part?.type === "image_url");
+    expect(imageParts.length).toBe(2);
+  });
+});
--- a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
+++ b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
@ -0,0 +1,326 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete, getEnvApiKey } from "../src/stream.js";
+import type {
+  AssistantMessage,
+  Context,
+  Message,
+  Tool,
+  ToolCall,
+} from "../src/types.js";
+
+const testToolSchema = Type.Object({
+  value: Type.Number({ description: "A number to double" }),
+});
+
+const testTool: Tool<typeof testToolSchema> = {
+  name: "double_number",
+  description: "Doubles a number and returns the result",
+  parameters: testToolSchema,
+};
+
+describe.skipIf(!process.env.OPENAI_API_KEY || !process.env.ANTHROPIC_API_KEY)(
+  "OpenAI Responses reasoning replay e2e",
+  () => {
+    it(
+      "skips reasoning-only history after an aborted turn",
+      { retry: 2 },
+      async () => {
+        const model = getModel("openai", "gpt-5-mini");
+
+        const apiKey = getEnvApiKey("openai");
+        if (!apiKey) {
+          throw new Error("Missing OPENAI_API_KEY");
+        }
+
+        const userMessage: Message = {
+          role: "user",
+          content: "Use the double_number tool to double 21.",
+          timestamp: Date.now(),
+        };
+
+        const assistantResponse = await complete(
+          model,
+          {
+            systemPrompt: "You are a helpful assistant. Use the tool.",
+            messages: [userMessage],
+            tools: [testTool],
+          },
+          {
+            apiKey,
+            reasoningEffort: "high",
+          },
+        );
+
+        const thinkingBlock = assistantResponse.content.find(
+          (block) => block.type === "thinking" && block.thinkingSignature,
+        );
+        if (!thinkingBlock || thinkingBlock.type !== "thinking") {
+          throw new Error("Missing thinking signature from OpenAI Responses");
+        }
+
+        const corruptedAssistant: AssistantMessage = {
+          ...assistantResponse,
+          content: [thinkingBlock],
+          stopReason: "aborted",
+        };
+
+        const followUp: Message = {
+          role: "user",
+          content: "Say hello to confirm you can continue.",
+          timestamp: Date.now(),
+        };
+
+        const context: Context = {
+          systemPrompt: "You are a helpful assistant.",
+          messages: [userMessage, corruptedAssistant, followUp],
+          tools: [testTool],
+        };
+
+        const response = await complete(model, context, {
+          apiKey,
+          reasoningEffort: "high",
+        });
+
+        // The key assertion: no 400 error from orphaned reasoning item
+        expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe(
+          "error",
+        );
+        expect(response.errorMessage).toBeFalsy();
+        // Model should respond (text or tool call)
+        expect(response.content.length).toBeGreaterThan(0);
+      },
+    );
+
+    it(
+      "handles same-provider different-model handoff with tool calls",
+      { retry: 2 },
+      async () => {
+        // This tests the scenario where:
+        // 1. Model A (gpt-5-mini) generates reasoning + function_call
+        // 2. User switches to Model B (gpt-5.2-codex) - same provider, different model
+        // 3. transform-messages: isSameModel=false, thinking converted to text
+        // 4. But tool call ID still has OpenAI pairing history (fc_xxx paired with rs_xxx)
+        // 5. Without fix: OpenAI returns 400 "function_call without required reasoning item"
+        // 6. With fix: tool calls/results converted to text, conversation continues
+
+        const modelA = getModel("openai", "gpt-5-mini");
+        const modelB = getModel("openai", "gpt-5.2-codex");
+
+        const apiKey = getEnvApiKey("openai");
+        if (!apiKey) {
+          throw new Error("Missing OPENAI_API_KEY");
+        }
+
+        const userMessage: Message = {
+          role: "user",
+          content: "Use the double_number tool to double 21.",
+          timestamp: Date.now(),
+        };
+
+        // Get a real response from Model A with reasoning + tool call
+        const assistantResponse = await complete(
+          modelA,
+          {
+            systemPrompt:
+              "You are a helpful assistant. Always use the tool when asked.",
+            messages: [userMessage],
+            tools: [testTool],
+          },
+          {
+            apiKey,
+            reasoningEffort: "high",
+          },
+        );
+
+        const toolCallBlock = assistantResponse.content.find(
+          (block) => block.type === "toolCall",
+        ) as ToolCall | undefined;
+
+        if (!toolCallBlock) {
+          throw new Error(
+            "Missing tool call from OpenAI Responses - model did not use the tool",
+          );
+        }
+
+        // Provide a tool result
+        const toolResult: Message = {
+          role: "toolResult",
+          toolCallId: toolCallBlock.id,
+          toolName: toolCallBlock.name,
+          content: [{ type: "text", text: "42" }],
+          isError: false,
+          timestamp: Date.now(),
+        };
+
+        const followUp: Message = {
+          role: "user",
+          content: "What was the result? Answer with just the number.",
+          timestamp: Date.now(),
+        };
+
+        // Now continue with Model B (different model, same provider)
+        const context: Context = {
+          systemPrompt: "You are a helpful assistant. Answer concisely.",
+          messages: [userMessage, assistantResponse, toolResult, followUp],
+          tools: [testTool],
+        };
+
+        let capturedPayload: any = null;
+        const response = await complete(modelB, context, {
+          apiKey,
+          reasoningEffort: "high",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // The key assertion: no 400 error from orphaned function_call
+        expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe(
+          "error",
+        );
+        expect(response.errorMessage).toBeFalsy();
+        expect(response.content.length).toBeGreaterThan(0);
+
+        // Log what was sent for debugging
+        const input = capturedPayload?.input as any[];
+        const functionCalls =
+          input?.filter((item: any) => item.type === "function_call") || [];
+        const reasoningItems =
+          input?.filter((item: any) => item.type === "reasoning") || [];
+
+        console.log("Payload sent to API:");
+        console.log("- function_calls:", functionCalls.length);
+        console.log("- reasoning items:", reasoningItems.length);
+        console.log("- full input:", JSON.stringify(input, null, 2));
+
+        // Verify the model understood the context
+        const responseText = response.content
+          .filter((b) => b.type === "text")
+          .map((b) => (b as any).text)
+          .join("");
+        expect(responseText).toContain("42");
+      },
+    );
+
+    it(
+      "handles cross-provider handoff from Anthropic to OpenAI Codex",
+      { retry: 2 },
+      async () => {
+        // This tests cross-provider handoff:
+        // 1. Anthropic model generates thinking + function_call (toolu_xxx ID)
+        // 2. User switches to OpenAI Codex
+        // 3. transform-messages: isSameModel=false, thinking converted to text
+        // 4. Tool call ID is Anthropic format (toolu_xxx), no OpenAI pairing history
+        // 5. Should work because foreign IDs have no pairing expectation
+
+        const anthropicModel = getModel("anthropic", "claude-sonnet-4-5");
+        const codexModel = getModel("openai", "gpt-5.2-codex");
+
+        const anthropicApiKey = getEnvApiKey("anthropic");
+        const openaiApiKey = getEnvApiKey("openai");
+        if (!anthropicApiKey || !openaiApiKey) {
+          throw new Error("Missing API keys");
+        }
+
+        const userMessage: Message = {
+          role: "user",
+          content: "Use the double_number tool to double 21.",
+          timestamp: Date.now(),
+        };
+
+        // Get a real response from Anthropic with thinking + tool call
+        const assistantResponse = await complete(
+          anthropicModel,
+          {
+            systemPrompt:
+              "You are a helpful assistant. Always use the tool when asked.",
+            messages: [userMessage],
+            tools: [testTool],
+          },
+          {
+            apiKey: anthropicApiKey,
+            thinkingEnabled: true,
+            thinkingBudgetTokens: 5000,
+          },
+        );
+
+        const toolCallBlock = assistantResponse.content.find(
+          (block) => block.type === "toolCall",
+        ) as ToolCall | undefined;
+
+        if (!toolCallBlock) {
+          throw new Error(
+            "Missing tool call from Anthropic - model did not use the tool",
+          );
+        }
+
+        console.log("Anthropic tool call ID:", toolCallBlock.id);
+
+        // Provide a tool result
+        const toolResult: Message = {
+          role: "toolResult",
+          toolCallId: toolCallBlock.id,
+          toolName: toolCallBlock.name,
+          content: [{ type: "text", text: "42" }],
+          isError: false,
+          timestamp: Date.now(),
+        };
+
+        const followUp: Message = {
+          role: "user",
+          content: "What was the result? Answer with just the number.",
+          timestamp: Date.now(),
+        };
+
+        // Now continue with Codex (different provider)
+        const context: Context = {
+          systemPrompt: "You are a helpful assistant. Answer concisely.",
+          messages: [userMessage, assistantResponse, toolResult, followUp],
+          tools: [testTool],
+        };
+
+        let capturedPayload: any = null;
+        const response = await complete(codexModel, context, {
+          apiKey: openaiApiKey,
+          reasoningEffort: "high",
+          onPayload: (payload) => {
+            capturedPayload = payload;
+          },
+        });
+
+        // Log what was sent
+        const input = capturedPayload?.input as any[];
+        const functionCalls =
+          input?.filter((item: any) => item.type === "function_call") || [];
+        const reasoningItems =
+          input?.filter((item: any) => item.type === "reasoning") || [];
+
+        console.log("Payload sent to Codex:");
+        console.log("- function_calls:", functionCalls.length);
+        console.log("- reasoning items:", reasoningItems.length);
+        if (functionCalls.length > 0) {
+          console.log(
+            "- function_call IDs:",
+            functionCalls.map((fc: any) => fc.id),
+          );
+        }
+
+        // The key assertion: no 400 error
+        expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe(
+          "error",
+        );
+        expect(response.errorMessage).toBeFalsy();
+        expect(response.content.length).toBeGreaterThan(0);
+
+        // Verify the model understood the context
+        const responseText = response.content
+          .filter((b) => b.type === "text")
+          .map((b) => (b as any).text)
+          .join("");
+        expect(responseText).toContain("42");
+      },
+    );
+  },
+);
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
--- a/packages/ai/test/supports-xhigh.test.ts
+++ b/packages/ai/test/supports-xhigh.test.ts
@ -0,0 +1,28 @@
+import { describe, expect, it } from "vitest";
+import { getModel, supportsXhigh } from "../src/models.js";
+
+describe("supportsXhigh", () => {
+  it("returns true for Anthropic Opus 4.6 on anthropic-messages API", () => {
+    const model = getModel("anthropic", "claude-opus-4-6");
+    expect(model).toBeDefined();
+    expect(supportsXhigh(model!)).toBe(true);
+  });
+
+  it("returns false for non-Opus Anthropic models", () => {
+    const model = getModel("anthropic", "claude-sonnet-4-5");
+    expect(model).toBeDefined();
+    expect(supportsXhigh(model!)).toBe(false);
+  });
+
+  it("returns true for GPT-5.4 models", () => {
+    const model = getModel("openai-codex", "gpt-5.4");
+    expect(model).toBeDefined();
+    expect(supportsXhigh(model!)).toBe(true);
+  });
+
+  it("returns false for OpenRouter Opus 4.6 (openai-completions API)", () => {
+    const model = getModel("openrouter", "anthropic/claude-opus-4.6");
+    expect(model).toBeDefined();
+    expect(supportsXhigh(model!)).toBe(false);
+  });
+});
--- a/packages/ai/test/tokens.test.ts
+++ b/packages/ai/test/tokens.test.ts
@ -0,0 +1,397 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Api, Context, Model, StreamOptions } from "../src/types.js";
+
+type StreamOptionsWithExtras = StreamOptions & Record<string, unknown>;
+
+import {
+  hasAzureOpenAICredentials,
+  resolveAzureDeploymentName,
+} from "./azure-utils.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+  resolveApiKey("anthropic"),
+  resolveApiKey("github-copilot"),
+  resolveApiKey("google-gemini-cli"),
+  resolveApiKey("google-antigravity"),
+  resolveApiKey("openai-codex"),
+]);
+const [
+  anthropicOAuthToken,
+  githubCopilotToken,
+  geminiCliToken,
+  antigravityToken,
+  openaiCodexToken,
+] = oauthTokens;
+
+async function testTokensOnAbort<TApi extends Api>(
+  llm: Model<TApi>,
+  options: StreamOptionsWithExtras = {},
+) {
+  const context: Context = {
+    messages: [
+      {
+        role: "user",
+        content:
+          "Write a long poem with 20 stanzas about the beauty of nature.",
+        timestamp: Date.now(),
+      },
+    ],
+    systemPrompt: "You are a helpful assistant.",
+  };
+
+  const controller = new AbortController();
+  const response = stream(llm, context, {
+    ...options,
+    signal: controller.signal,
+  });
+
+  let abortFired = false;
+  let text = "";
+  for await (const event of response) {
+    if (
+      !abortFired &&
+      (event.type === "text_delta" || event.type === "thinking_delta")
+    ) {
+      text += event.delta;
+      if (text.length >= 1000) {
+        abortFired = true;
+        controller.abort();
+      }
+    }
+  }
+
+  const msg = await response.result();
+
+  expect(msg.stopReason).toBe("aborted");
+
+  // OpenAI providers, OpenAI Codex, Gemini CLI, zai, Amazon Bedrock, and the GPT-OSS model on Antigravity only send usage in the final chunk,
+  // so when aborted they have no token stats. Anthropic and Google send usage information early in the stream.
+  // MiniMax reports input tokens but not output tokens when aborted.
+  if (
+    llm.api === "openai-completions" ||
+    llm.api === "mistral-conversations" ||
+    llm.api === "openai-responses" ||
+    llm.api === "azure-openai-responses" ||
+    llm.api === "openai-codex-responses" ||
+    llm.provider === "google-gemini-cli" ||
+    llm.provider === "zai" ||
+    llm.provider === "amazon-bedrock" ||
+    llm.provider === "vercel-ai-gateway" ||
+    (llm.provider === "google-antigravity" && llm.id.includes("gpt-oss"))
+  ) {
+    expect(msg.usage.input).toBe(0);
+    expect(msg.usage.output).toBe(0);
+  } else if (llm.provider === "minimax") {
+    // MiniMax reports input tokens early but output tokens only in final chunk
+    expect(msg.usage.input).toBeGreaterThan(0);
+    expect(msg.usage.output).toBe(0);
+  } else {
+    expect(msg.usage.input).toBeGreaterThan(0);
+    expect(msg.usage.output).toBeGreaterThan(0);
+
+    // Some providers (Antigravity, Copilot) have zero cost rates
+    if (llm.cost.input > 0) {
+      expect(msg.usage.cost.input).toBeGreaterThan(0);
+      expect(msg.usage.cost.total).toBeGreaterThan(0);
+    }
+  }
+}
+
+describe("Token Statistics on Abort", () => {
+  describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
+    const llm = getModel("google", "gemini-2.5-flash");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm, { thinking: { enabled: true } });
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Completions Provider",
+    () => {
+      const { compat: _compat, ...baseModel } = getModel(
+        "openai",
+        "gpt-4o-mini",
+      )!;
+      void _compat;
+      const llm: Model<"openai-completions"> = {
+        ...baseModel,
+        api: "openai-completions",
+      };
+
+      it(
+        "should include token stats when aborted mid-stream",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testTokensOnAbort(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Responses Provider",
+    () => {
+      const llm = getModel("openai", "gpt-5-mini");
+
+      it(
+        "should include token stats when aborted mid-stream",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testTokensOnAbort(llm);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!hasAzureOpenAICredentials())(
+    "Azure OpenAI Responses Provider",
+    () => {
+      const llm = getModel("azure-openai-responses", "gpt-4o-mini");
+      const azureDeploymentName = resolveAzureDeploymentName(llm.id);
+      const azureOptions = azureDeploymentName ? { azureDeploymentName } : {};
+
+      it(
+        "should include token stats when aborted mid-stream",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testTokensOnAbort(llm, azureOptions);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
+    const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
+    const llm = getModel("xai", "grok-3-fast");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
+    const llm = getModel("groq", "openai/gpt-oss-20b");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
+    const llm = getModel("cerebras", "gpt-oss-120b");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider", () => {
+    const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
+    const llm = getModel("zai", "glm-4.5-flash");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
+    const llm = getModel("mistral", "devstral-medium-latest");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider", () => {
+    const llm = getModel("minimax", "MiniMax-M2.1");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.KIMI_API_KEY)("Kimi For Coding Provider", () => {
+    const llm = getModel("kimi-coding", "kimi-k2-thinking");
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
+    "Vercel AI Gateway Provider",
+    () => {
+      const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
+
+      it(
+        "should include token stats when aborted mid-stream",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testTokensOnAbort(llm);
+        },
+      );
+    },
+  );
+
+  // =========================================================================
+  // OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+  // =========================================================================
+
+  describe("Anthropic OAuth Provider", () => {
+    const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+    it.skipIf(!anthropicOAuthToken)(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm, { apiKey: anthropicOAuthToken });
+      },
+    );
+  });
+
+  describe("GitHub Copilot Provider", () => {
+    it.skipIf(!githubCopilotToken)(
+      "gpt-4o - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("github-copilot", "gpt-4o");
+        await testTokensOnAbort(llm, { apiKey: githubCopilotToken });
+      },
+    );
+
+    it.skipIf(!githubCopilotToken)(
+      "claude-sonnet-4 - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("github-copilot", "claude-sonnet-4");
+        await testTokensOnAbort(llm, { apiKey: githubCopilotToken });
+      },
+    );
+  });
+
+  describe("Google Gemini CLI Provider", () => {
+    it.skipIf(!geminiCliToken)(
+      "gemini-2.5-flash - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+        await testTokensOnAbort(llm, { apiKey: geminiCliToken });
+      },
+    );
+  });
+
+  describe("Google Antigravity Provider", () => {
+    it.skipIf(!antigravityToken)(
+      "gemini-3-flash - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-antigravity", "gemini-3-flash");
+        await testTokensOnAbort(llm, { apiKey: antigravityToken });
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "claude-sonnet-4-5 - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+        await testTokensOnAbort(llm, { apiKey: antigravityToken });
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "gpt-oss-120b-medium - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+        await testTokensOnAbort(llm, { apiKey: antigravityToken });
+      },
+    );
+  });
+
+  describe("OpenAI Codex Provider", () => {
+    it.skipIf(!openaiCodexToken)(
+      "gpt-5.2-codex - should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const llm = getModel("openai-codex", "gpt-5.2-codex");
+        await testTokensOnAbort(llm, { apiKey: openaiCodexToken });
+      },
+    );
+  });
+
+  describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
+    const llm = getModel(
+      "amazon-bedrock",
+      "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+    );
+
+    it(
+      "should include token stats when aborted mid-stream",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testTokensOnAbort(llm);
+      },
+    );
+  });
+});
--- a/packages/ai/test/tool-call-id-normalization.test.ts
+++ b/packages/ai/test/tool-call-id-normalization.test.ts
@ -0,0 +1,320 @@
+/**
+ * Tool Call ID Normalization Tests
+ *
+ * Tests that tool call IDs from OpenAI Responses API (github-copilot, openai-codex, opencode)
+ * are properly normalized when sent to other providers.
+ *
+ * OpenAI Responses API generates IDs in format: {call_id}|{id}
+ * where {id} can be 400+ chars with special characters (+, /, =).
+ *
+ * Regression test for: https://github.com/badlogic/pi-mono/issues/1022
+ */
+
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { completeSimple, getEnvApiKey } from "../src/stream.js";
+import type {
+  AssistantMessage,
+  Message,
+  Tool,
+  ToolResultMessage,
+} from "../src/types.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve API keys
+const copilotToken = await resolveApiKey("github-copilot");
+const openrouterKey = getEnvApiKey("openrouter");
+const codexToken = await resolveApiKey("openai-codex");
+
+// Simple echo tool for testing
+const echoToolSchema = Type.Object({
+  message: Type.String({ description: "Message to echo back" }),
+});
+
+const echoTool: Tool<typeof echoToolSchema> = {
+  name: "echo",
+  description: "Echoes the message back",
+  parameters: echoToolSchema,
+};
+
+/**
+ * Test 1: Live cross-provider handoff
+ *
+ * 1. Use github-copilot gpt-5.2-codex to generate a tool call
+ * 2. Switch to openrouter openai/gpt-5.2-codex and complete
+ * 3. Switch to openai-codex gpt-5.2-codex and complete
+ *
+ * Both should succeed without "call_id too long" errors.
+ */
+describe("Tool Call ID Normalization - Live Handoff", () => {
+  it.skipIf(!copilotToken || !openrouterKey)(
+    "github-copilot -> openrouter should normalize pipe-separated IDs",
+    async () => {
+      const copilotModel = getModel("github-copilot", "gpt-5.2-codex");
+      const openrouterModel = getModel("openrouter", "openai/gpt-5.2-codex");
+
+      // Step 1: Generate tool call with github-copilot
+      const userMessage: Message = {
+        role: "user",
+        content: "Use the echo tool to echo 'hello world'",
+        timestamp: Date.now(),
+      };
+
+      const assistantResponse = await completeSimple(
+        copilotModel,
+        {
+          systemPrompt:
+            "You are a helpful assistant. Use the echo tool when asked.",
+          messages: [userMessage],
+          tools: [echoTool],
+        },
+        { apiKey: copilotToken },
+      );
+
+      expect(
+        assistantResponse.stopReason,
+        `Copilot error: ${assistantResponse.errorMessage}`,
+      ).toBe("toolUse");
+
+      const toolCall = assistantResponse.content.find(
+        (c) => c.type === "toolCall",
+      );
+      expect(toolCall).toBeDefined();
+      expect(toolCall!.type).toBe("toolCall");
+
+      // Verify it's a pipe-separated ID (OpenAI Responses format)
+      if (toolCall?.type === "toolCall") {
+        expect(toolCall.id).toContain("|");
+        console.log(
+          `Tool call ID from github-copilot: ${toolCall.id.slice(0, 80)}...`,
+        );
+      }
+
+      // Create tool result
+      const toolResult: ToolResultMessage = {
+        role: "toolResult",
+        toolCallId: (toolCall as any).id,
+        toolName: "echo",
+        content: [{ type: "text", text: "hello world" }],
+        isError: false,
+        timestamp: Date.now(),
+      };
+
+      // Step 2: Complete with openrouter (uses openai-completions API)
+      const openrouterResponse = await completeSimple(
+        openrouterModel,
+        {
+          systemPrompt: "You are a helpful assistant.",
+          messages: [
+            userMessage,
+            assistantResponse,
+            toolResult,
+            { role: "user", content: "Say hi", timestamp: Date.now() },
+          ],
+          tools: [echoTool],
+        },
+        { apiKey: openrouterKey },
+      );
+
+      // Should NOT fail with "call_id too long" error
+      expect(
+        openrouterResponse.stopReason,
+        `OpenRouter error: ${openrouterResponse.errorMessage}`,
+      ).not.toBe("error");
+      expect(openrouterResponse.errorMessage).toBeUndefined();
+    },
+    60000,
+  );
+
+  it.skipIf(!copilotToken || !codexToken)(
+    "github-copilot -> openai-codex should normalize pipe-separated IDs",
+    async () => {
+      const copilotModel = getModel("github-copilot", "gpt-5.2-codex");
+      const codexModel = getModel("openai-codex", "gpt-5.2-codex");
+
+      // Step 1: Generate tool call with github-copilot
+      const userMessage: Message = {
+        role: "user",
+        content: "Use the echo tool to echo 'test message'",
+        timestamp: Date.now(),
+      };
+
+      const assistantResponse = await completeSimple(
+        copilotModel,
+        {
+          systemPrompt:
+            "You are a helpful assistant. Use the echo tool when asked.",
+          messages: [userMessage],
+          tools: [echoTool],
+        },
+        { apiKey: copilotToken },
+      );
+
+      expect(
+        assistantResponse.stopReason,
+        `Copilot error: ${assistantResponse.errorMessage}`,
+      ).toBe("toolUse");
+
+      const toolCall = assistantResponse.content.find(
+        (c) => c.type === "toolCall",
+      );
+      expect(toolCall).toBeDefined();
+
+      // Create tool result
+      const toolResult: ToolResultMessage = {
+        role: "toolResult",
+        toolCallId: (toolCall as any).id,
+        toolName: "echo",
+        content: [{ type: "text", text: "test message" }],
+        isError: false,
+        timestamp: Date.now(),
+      };
+
+      // Step 2: Complete with openai-codex (uses openai-codex-responses API)
+      const codexResponse = await completeSimple(
+        codexModel,
+        {
+          systemPrompt: "You are a helpful assistant.",
+          messages: [
+            userMessage,
+            assistantResponse,
+            toolResult,
+            { role: "user", content: "Say hi", timestamp: Date.now() },
+          ],
+          tools: [echoTool],
+        },
+        { apiKey: codexToken },
+      );
+
+      // Should NOT fail with ID validation error
+      expect(
+        codexResponse.stopReason,
+        `Codex error: ${codexResponse.errorMessage}`,
+      ).not.toBe("error");
+      expect(codexResponse.errorMessage).toBeUndefined();
+    },
+    60000,
+  );
+});
+
+/**
+ * Test 2: Prefilled context with exact failing IDs from issue #1022
+ *
+ * Uses the exact tool call ID format that caused the error:
+ * "call_xxx|very_long_base64_with_special_chars+/="
+ */
+describe("Tool Call ID Normalization - Prefilled Context", () => {
+  // Exact tool call ID from issue #1022 JSONL
+  const FAILING_TOOL_CALL_ID =
+    "call_pAYbIr76hXIjncD9UE4eGfnS|t5nnb2qYMFWGSsr13fhCd1CaCu3t3qONEPuOudu4HSVEtA8YJSL6FAZUxvoOoD792VIJWl91g87EdqsCWp9krVsdBysQoDaf9lMCLb8BS4EYi4gQd5kBQBYLlgD71PYwvf+TbMD9J9/5OMD42oxSRj8H+vRf78/l2Xla33LWz4nOgsddBlbvabICRs8GHt5C9PK5keFtzyi3lsyVKNlfduK3iphsZqs4MLv4zyGJnvZo/+QzShyk5xnMSQX/f98+aEoNflEApCdEOXipipgeiNWnpFSHbcwmMkZoJhURNu+JEz3xCh1mrXeYoN5o+trLL3IXJacSsLYXDrYTipZZbJFRPAucgbnjYBC+/ZzJOfkwCs+Gkw7EoZR7ZQgJ8ma+9586n4tT4cI8DEhBSZsWMjrCt8dxKg==";
+
+  // Build prefilled context with the failing ID
+  function buildPrefilledMessages(): Message[] {
+    const userMessage: Message = {
+      role: "user",
+      content: "Use the echo tool to echo 'hello'",
+      timestamp: Date.now() - 2000,
+    };
+
+    const assistantMessage: AssistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "toolCall",
+          id: FAILING_TOOL_CALL_ID,
+          name: "echo",
+          arguments: { message: "hello" },
+        },
+      ],
+      api: "openai-responses",
+      provider: "github-copilot",
+      model: "gpt-5.2-codex",
+      usage: {
+        input: 100,
+        output: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 150,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "toolUse",
+      timestamp: Date.now() - 1500,
+    };
+
+    const toolResult: ToolResultMessage = {
+      role: "toolResult",
+      toolCallId: FAILING_TOOL_CALL_ID,
+      toolName: "echo",
+      content: [{ type: "text", text: "hello" }],
+      isError: false,
+      timestamp: Date.now() - 1000,
+    };
+
+    const followUpUser: Message = {
+      role: "user",
+      content: "Say hi",
+      timestamp: Date.now(),
+    };
+
+    return [userMessage, assistantMessage, toolResult, followUpUser];
+  }
+
+  it.skipIf(!openrouterKey)(
+    "openrouter should handle prefilled context with long pipe-separated IDs",
+    async () => {
+      const model = getModel("openrouter", "openai/gpt-5.2-codex");
+      const messages = buildPrefilledMessages();
+
+      const response = await completeSimple(
+        model,
+        {
+          systemPrompt: "You are a helpful assistant.",
+          messages,
+          tools: [echoTool],
+        },
+        { apiKey: openrouterKey },
+      );
+
+      // Should NOT fail with "call_id too long" error
+      expect(
+        response.stopReason,
+        `OpenRouter error: ${response.errorMessage}`,
+      ).not.toBe("error");
+      if (response.errorMessage) {
+        expect(response.errorMessage).not.toContain("call_id");
+        expect(response.errorMessage).not.toContain("too long");
+      }
+    },
+    30000,
+  );
+
+  it.skipIf(!codexToken)(
+    "openai-codex should handle prefilled context with long pipe-separated IDs",
+    async () => {
+      const model = getModel("openai-codex", "gpt-5.2-codex");
+      const messages = buildPrefilledMessages();
+
+      const response = await completeSimple(
+        model,
+        {
+          systemPrompt: "You are a helpful assistant.",
+          messages,
+          tools: [echoTool],
+        },
+        { apiKey: codexToken },
+      );
+
+      // Should NOT fail with ID validation error
+      expect(
+        response.stopReason,
+        `Codex error: ${response.errorMessage}`,
+      ).not.toBe("error");
+      if (response.errorMessage) {
+        expect(response.errorMessage).not.toContain("id");
+        expect(response.errorMessage).not.toContain("additional characters");
+      }
+    },
+    30000,
+  );
+});
--- a/packages/ai/test/tool-call-without-result.test.ts
+++ b/packages/ai/test/tool-call-without-result.test.ts
@ -0,0 +1,412 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { Api, Context, Model, StreamOptions, Tool } from "../src/types.js";
+
+type StreamOptionsWithExtras = StreamOptions & Record<string, unknown>;
+
+import {
+  hasAzureOpenAICredentials,
+  resolveAzureDeploymentName,
+} from "./azure-utils.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+  resolveApiKey("anthropic"),
+  resolveApiKey("github-copilot"),
+  resolveApiKey("google-gemini-cli"),
+  resolveApiKey("google-antigravity"),
+  resolveApiKey("openai-codex"),
+]);
+const [
+  anthropicOAuthToken,
+  githubCopilotToken,
+  geminiCliToken,
+  antigravityToken,
+  openaiCodexToken,
+] = oauthTokens;
+
+// Simple calculate tool
+const calculateSchema = Type.Object({
+  expression: Type.String({
+    description: "The mathematical expression to evaluate",
+  }),
+});
+
+const calculateTool: Tool = {
+  name: "calculate",
+  description: "Evaluate mathematical expressions",
+  parameters: calculateSchema,
+};
+
+async function testToolCallWithoutResult<TApi extends Api>(
+  model: Model<TApi>,
+  options: StreamOptionsWithExtras = {},
+) {
+  // Step 1: Create context with the calculate tool
+  const context: Context = {
+    systemPrompt:
+      "You are a helpful assistant. Use the calculate tool when asked to perform calculations.",
+    messages: [],
+    tools: [calculateTool],
+  };
+
+  // Step 2: Ask the LLM to make a tool call
+  context.messages.push({
+    role: "user",
+    content: "Please calculate 25 * 18 using the calculate tool.",
+    timestamp: Date.now(),
+  });
+
+  // Step 3: Get the assistant's response (should contain a tool call)
+  const firstResponse = await complete(model, context, options);
+  context.messages.push(firstResponse);
+
+  console.log("First response:", JSON.stringify(firstResponse, null, 2));
+
+  // Verify the response contains a tool call
+  const hasToolCall = firstResponse.content.some(
+    (block) => block.type === "toolCall",
+  );
+  expect(hasToolCall).toBe(true);
+
+  if (!hasToolCall) {
+    throw new Error(
+      "Expected assistant to make a tool call, but none was found",
+    );
+  }
+
+  // Step 4: Send a user message WITHOUT providing tool result
+  // This simulates the scenario where a tool call was aborted/cancelled
+  context.messages.push({
+    role: "user",
+    content: "Never mind, just tell me what is 2+2?",
+    timestamp: Date.now(),
+  });
+
+  // Step 5: The fix should filter out the orphaned tool call, and the request should succeed
+  const secondResponse = await complete(model, context, options);
+  console.log("Second response:", JSON.stringify(secondResponse, null, 2));
+
+  // The request should succeed (not error) - that's the main thing we're testing
+  expect(secondResponse.stopReason).not.toBe("error");
+
+  // Should have some content in the response
+  expect(secondResponse.content.length).toBeGreaterThan(0);
+
+  // The LLM may choose to answer directly or make a new tool call - either is fine
+  // The important thing is it didn't fail with the orphaned tool call error
+  const textContent = secondResponse.content
+    .filter((block) => block.type === "text")
+    .map((block) => (block.type === "text" ? block.text : ""))
+    .join(" ");
+  const toolCalls = secondResponse.content.filter(
+    (block) => block.type === "toolCall",
+  ).length;
+  expect(toolCalls || textContent.length).toBeGreaterThan(0);
+  console.log("Answer:", textContent);
+
+  // Verify the stop reason is either "stop" or "toolUse" (new tool call)
+  expect(["stop", "toolUse"]).toContain(secondResponse.stopReason);
+}
+
+describe("Tool Call Without Result Tests", () => {
+  // =========================================================================
+  // API Key-based providers
+  // =========================================================================
+
+  describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
+    const model = getModel("google", "gemini-2.5-flash");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Completions Provider",
+    () => {
+      const { compat: _compat, ...baseModel } = getModel(
+        "openai",
+        "gpt-4o-mini",
+      )!;
+      void _compat;
+      const model: Model<"openai-completions"> = {
+        ...baseModel,
+        api: "openai-completions",
+      };
+
+      it(
+        "should filter out tool calls without corresponding tool results",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testToolCallWithoutResult(model);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Responses Provider",
+    () => {
+      const model = getModel("openai", "gpt-5-mini");
+
+      it(
+        "should filter out tool calls without corresponding tool results",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testToolCallWithoutResult(model);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!hasAzureOpenAICredentials())(
+    "Azure OpenAI Responses Provider",
+    () => {
+      const model = getModel("azure-openai-responses", "gpt-4o-mini");
+      const azureDeploymentName = resolveAzureDeploymentName(model.id);
+      const azureOptions = azureDeploymentName ? { azureDeploymentName } : {};
+
+      it(
+        "should filter out tool calls without corresponding tool results",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testToolCallWithoutResult(model, azureOptions);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
+    const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
+    const model = getModel("xai", "grok-3-fast");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
+    const model = getModel("groq", "openai/gpt-oss-20b");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
+    const model = getModel("cerebras", "gpt-oss-120b");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider", () => {
+    const model = getModel("huggingface", "moonshotai/Kimi-K2.5");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
+    const model = getModel("zai", "glm-4.5-flash");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
+    const model = getModel("mistral", "devstral-medium-latest");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider", () => {
+    const model = getModel("minimax", "MiniMax-M2.1");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.KIMI_API_KEY)("Kimi For Coding Provider", () => {
+    const model = getModel("kimi-coding", "kimi-k2-thinking");
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
+    "Vercel AI Gateway Provider",
+    () => {
+      const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
+
+      it(
+        "should filter out tool calls without corresponding tool results",
+        { retry: 3, timeout: 30000 },
+        async () => {
+          await testToolCallWithoutResult(model);
+        },
+      );
+    },
+  );
+
+  describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
+    const model = getModel(
+      "amazon-bedrock",
+      "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+    );
+
+    it(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model);
+      },
+    );
+  });
+
+  // =========================================================================
+  // OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+  // =========================================================================
+
+  describe("Anthropic OAuth Provider", () => {
+    const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+    it.skipIf(!anthropicOAuthToken)(
+      "should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        await testToolCallWithoutResult(model, { apiKey: anthropicOAuthToken });
+      },
+    );
+  });
+
+  describe("GitHub Copilot Provider", () => {
+    it.skipIf(!githubCopilotToken)(
+      "gpt-4o - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("github-copilot", "gpt-4o");
+        await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
+      },
+    );
+
+    it.skipIf(!githubCopilotToken)(
+      "claude-sonnet-4 - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("github-copilot", "claude-sonnet-4");
+        await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
+      },
+    );
+  });
+
+  describe("Google Gemini CLI Provider", () => {
+    it.skipIf(!geminiCliToken)(
+      "gemini-2.5-flash - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("google-gemini-cli", "gemini-2.5-flash");
+        await testToolCallWithoutResult(model, { apiKey: geminiCliToken });
+      },
+    );
+  });
+
+  describe("Google Antigravity Provider", () => {
+    it.skipIf(!antigravityToken)(
+      "gemini-3-flash - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("google-antigravity", "gemini-3-flash");
+        await testToolCallWithoutResult(model, { apiKey: antigravityToken });
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "claude-sonnet-4-5 - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("google-antigravity", "claude-sonnet-4-5");
+        await testToolCallWithoutResult(model, { apiKey: antigravityToken });
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "gpt-oss-120b-medium - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("google-antigravity", "gpt-oss-120b-medium");
+        await testToolCallWithoutResult(model, { apiKey: antigravityToken });
+      },
+    );
+  });
+
+  describe("OpenAI Codex Provider", () => {
+    it.skipIf(!openaiCodexToken)(
+      "gpt-5.2-codex - should filter out tool calls without corresponding tool results",
+      { retry: 3, timeout: 30000 },
+      async () => {
+        const model = getModel("openai-codex", "gpt-5.2-codex");
+        await testToolCallWithoutResult(model, { apiKey: openaiCodexToken });
+      },
+    );
+  });
+});
--- a/packages/ai/test/total-tokens.test.ts
+++ b/packages/ai/test/total-tokens.test.ts
@ -0,0 +1,785 @@
+/**
+ * Test totalTokens field across all providers.
+ *
+ * totalTokens represents the total number of tokens processed by the LLM,
+ * including input (with cache) and output (with thinking). This is the
+ * base for calculating context size for the next request.
+ *
+ * - OpenAI Completions: Uses native total_tokens field
+ * - OpenAI Responses: Uses native total_tokens field
+ * - Google: Uses native totalTokenCount field
+ * - Anthropic: Computed as input + output + cacheRead + cacheWrite
+ * - Other OpenAI-compatible providers: Uses native total_tokens field
+ */
+
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type {
+  Api,
+  Context,
+  Model,
+  StreamOptions,
+  Usage,
+} from "../src/types.js";
+
+type StreamOptionsWithExtras = StreamOptions & Record<string, unknown>;
+
+import {
+  hasAzureOpenAICredentials,
+  resolveAzureDeploymentName,
+} from "./azure-utils.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { resolveApiKey } from "./oauth.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+  resolveApiKey("anthropic"),
+  resolveApiKey("github-copilot"),
+  resolveApiKey("google-gemini-cli"),
+  resolveApiKey("google-antigravity"),
+  resolveApiKey("openai-codex"),
+]);
+const [
+  anthropicOAuthToken,
+  githubCopilotToken,
+  geminiCliToken,
+  antigravityToken,
+  openaiCodexToken,
+] = oauthTokens;
+
+// Generate a long system prompt to trigger caching (>2k bytes for most providers)
+const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
+
+Here is some additional context that makes this system prompt long enough to trigger caching:
+
+${Array(50)
+  .fill(
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.",
+  )
+  .join("\n\n")}
+
+Remember: Always be helpful and concise.`;
+
+async function testTotalTokensWithCache<TApi extends Api>(
+  llm: Model<TApi>,
+  options: StreamOptionsWithExtras = {},
+): Promise<{ first: Usage; second: Usage }> {
+  // First request - no cache
+  const context1: Context = {
+    systemPrompt: LONG_SYSTEM_PROMPT,
+    messages: [
+      {
+        role: "user",
+        content: "What is 2 + 2? Reply with just the number.",
+        timestamp: Date.now(),
+      },
+    ],
+  };
+
+  const response1 = await complete(llm, context1, options);
+  expect(response1.stopReason).toBe("stop");
+
+  // Second request - should trigger cache read (same system prompt, add conversation)
+  const context2: Context = {
+    systemPrompt: LONG_SYSTEM_PROMPT,
+    messages: [
+      ...context1.messages,
+      response1, // Include previous assistant response
+      {
+        role: "user",
+        content: "What is 3 + 3? Reply with just the number.",
+        timestamp: Date.now(),
+      },
+    ],
+  };
+
+  const response2 = await complete(llm, context2, options);
+  expect(response2.stopReason).toBe("stop");
+
+  return { first: response1.usage, second: response2.usage };
+}
+
+function logUsage(label: string, usage: Usage) {
+  const computed =
+    usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
+  console.log(`  ${label}:`);
+  console.log(
+    `    input: ${usage.input}, output: ${usage.output}, cacheRead: ${usage.cacheRead}, cacheWrite: ${usage.cacheWrite}`,
+  );
+  console.log(`    totalTokens: ${usage.totalTokens}, computed: ${computed}`);
+}
+
+function assertTotalTokensEqualsComponents(usage: Usage) {
+  const computed =
+    usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
+  expect(usage.totalTokens).toBe(computed);
+}
+
+describe("totalTokens field", () => {
+  // =========================================================================
+  // Anthropic
+  // =========================================================================
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
+    it(
+      "claude-3-5-haiku - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+        console.log(`\nAnthropic / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.ANTHROPIC_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+
+        // Anthropic should have cache activity
+        const hasCache =
+          second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
+        expect(hasCache).toBe(true);
+      },
+    );
+  });
+
+  describe("Anthropic (OAuth)", () => {
+    it.skipIf(!anthropicOAuthToken)(
+      "claude-sonnet-4 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("anthropic", "claude-sonnet-4-20250514");
+
+        console.log(`\nAnthropic OAuth / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: anthropicOAuthToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+
+        // Anthropic should have cache activity
+        const hasCache =
+          second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
+        expect(hasCache).toBe(true);
+      },
+    );
+  });
+
+  // =========================================================================
+  // OpenAI
+  // =========================================================================
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
+    it(
+      "gpt-4o-mini - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const { compat: _compat, ...baseModel } = getModel(
+          "openai",
+          "gpt-4o-mini",
+        )!;
+        void _compat;
+        const llm: Model<"openai-completions"> = {
+          ...baseModel,
+          api: "openai-completions",
+        };
+
+        console.log(`\nOpenAI Completions / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm);
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
+    it(
+      "gpt-4o - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("openai", "gpt-4o");
+
+        console.log(`\nOpenAI Responses / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm);
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  describe.skipIf(!hasAzureOpenAICredentials())(
+    "Azure OpenAI Responses",
+    () => {
+      it(
+        "gpt-4o-mini - should return totalTokens equal to sum of components",
+        { retry: 3, timeout: 60000 },
+        async () => {
+          const llm = getModel("azure-openai-responses", "gpt-4o-mini");
+          const azureDeploymentName = resolveAzureDeploymentName(llm.id);
+          const azureOptions = azureDeploymentName
+            ? { azureDeploymentName }
+            : {};
+
+          console.log(`\nAzure OpenAI Responses / ${llm.id}:`);
+          const { first, second } = await testTotalTokensWithCache(
+            llm,
+            azureOptions,
+          );
+
+          logUsage("First request", first);
+          logUsage("Second request", second);
+
+          assertTotalTokensEqualsComponents(first);
+          assertTotalTokensEqualsComponents(second);
+        },
+      );
+    },
+  );
+
+  // =========================================================================
+  // Google
+  // =========================================================================
+
+  describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
+    it(
+      "gemini-2.0-flash - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("google", "gemini-2.0-flash");
+
+        console.log(`\nGoogle / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm);
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // xAI
+  // =========================================================================
+
+  describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
+    it(
+      "grok-3-fast - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("xai", "grok-3-fast");
+
+        console.log(`\nxAI / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.XAI_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Groq
+  // =========================================================================
+
+  describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
+    it(
+      "openai/gpt-oss-120b - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("groq", "openai/gpt-oss-120b");
+
+        console.log(`\nGroq / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.GROQ_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Cerebras
+  // =========================================================================
+
+  describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
+    it(
+      "gpt-oss-120b - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("cerebras", "gpt-oss-120b");
+
+        console.log(`\nCerebras / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.CEREBRAS_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Hugging Face
+  // =========================================================================
+
+  describe.skipIf(!process.env.HF_TOKEN)("Hugging Face", () => {
+    it(
+      "Kimi-K2.5 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
+
+        console.log(`\nHugging Face / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.HF_TOKEN,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // z.ai
+  // =========================================================================
+
+  describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
+    it(
+      "glm-4.5-flash - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("zai", "glm-4.5-flash");
+
+        console.log(`\nz.ai / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.ZAI_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Mistral
+  // =========================================================================
+
+  describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral", () => {
+    it(
+      "devstral-medium-latest - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("mistral", "devstral-medium-latest");
+
+        console.log(`\nMistral / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.MISTRAL_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // MiniMax
+  // =========================================================================
+
+  describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax", () => {
+    it(
+      "MiniMax-M2.1 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("minimax", "MiniMax-M2.1");
+
+        console.log(`\nMiniMax / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.MINIMAX_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Kimi For Coding
+  // =========================================================================
+
+  describe.skipIf(!process.env.KIMI_API_KEY)("Kimi For Coding", () => {
+    it(
+      "kimi-k2-thinking - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("kimi-coding", "kimi-k2-thinking");
+
+        console.log(`\nKimi For Coding / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.KIMI_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Vercel AI Gateway
+  // =========================================================================
+
+  describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway", () => {
+    it(
+      "google/gemini-2.5-flash - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
+
+        console.log(`\nVercel AI Gateway / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.AI_GATEWAY_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // OpenRouter - Multiple backend providers
+  // =========================================================================
+
+  describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
+    it(
+      "anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
+
+        console.log(`\nOpenRouter / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.OPENROUTER_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it(
+      "deepseek/deepseek-chat - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("openrouter", "deepseek/deepseek-chat");
+
+        console.log(`\nOpenRouter / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.OPENROUTER_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it(
+      "mistralai/mistral-small-3.2-24b-instruct - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel(
+          "openrouter",
+          "mistralai/mistral-small-3.2-24b-instruct",
+        );
+
+        console.log(`\nOpenRouter / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.OPENROUTER_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it(
+      "google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
+
+        console.log(`\nOpenRouter / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.OPENROUTER_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it(
+      "meta-llama/llama-4-maverick - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
+
+        console.log(`\nOpenRouter / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: process.env.OPENROUTER_API_KEY,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // GitHub Copilot (OAuth)
+  // =========================================================================
+
+  describe("GitHub Copilot (OAuth)", () => {
+    it.skipIf(!githubCopilotToken)(
+      "gpt-4o - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("github-copilot", "gpt-4o");
+
+        console.log(`\nGitHub Copilot / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: githubCopilotToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it.skipIf(!githubCopilotToken)(
+      "claude-sonnet-4 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("github-copilot", "claude-sonnet-4");
+
+        console.log(`\nGitHub Copilot / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: githubCopilotToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Google Gemini CLI (OAuth)
+  // =========================================================================
+
+  describe("Google Gemini CLI (OAuth)", () => {
+    it.skipIf(!geminiCliToken)(
+      "gemini-2.5-flash - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+
+        console.log(`\nGoogle Gemini CLI / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: geminiCliToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // Google Antigravity (OAuth)
+  // =========================================================================
+
+  describe("Google Antigravity (OAuth)", () => {
+    it.skipIf(!antigravityToken)(
+      "gemini-3-flash - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("google-antigravity", "gemini-3-flash");
+
+        console.log(`\nGoogle Antigravity / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: antigravityToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "claude-sonnet-4-5 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+
+        console.log(`\nGoogle Antigravity / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: antigravityToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+
+    it.skipIf(!antigravityToken)(
+      "gpt-oss-120b-medium - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+
+        console.log(`\nGoogle Antigravity / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: antigravityToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
+    it(
+      "claude-sonnet-4-5 - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel(
+          "amazon-bedrock",
+          "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+        );
+
+        console.log(`\nAmazon Bedrock / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm);
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+
+  // =========================================================================
+  // OpenAI Codex (OAuth)
+  // =========================================================================
+
+  describe("OpenAI Codex (OAuth)", () => {
+    it.skipIf(!openaiCodexToken)(
+      "gpt-5.2-codex - should return totalTokens equal to sum of components",
+      { retry: 3, timeout: 60000 },
+      async () => {
+        const llm = getModel("openai-codex", "gpt-5.2-codex");
+
+        console.log(`\nOpenAI Codex / ${llm.id}:`);
+        const { first, second } = await testTotalTokensWithCache(llm, {
+          apiKey: openaiCodexToken,
+        });
+
+        logUsage("First request", first);
+        logUsage("Second request", second);
+
+        assertTotalTokensEqualsComponents(first);
+        assertTotalTokensEqualsComponents(second);
+      },
+    );
+  });
+});
--- a/packages/ai/test/transform-messages-copilot-openai-to-anthropic.test.ts
+++ b/packages/ai/test/transform-messages-copilot-openai-to-anthropic.test.ts
@ -0,0 +1,140 @@
+import { describe, expect, it } from "vitest";
+import { transformMessages } from "../src/providers/transform-messages.js";
+import type {
+  AssistantMessage,
+  Message,
+  Model,
+  ToolCall,
+} from "../src/types.js";
+
+// Normalize function matching what anthropic.ts uses
+function anthropicNormalizeToolCallId(
+  id: string,
+  _model: Model<"anthropic-messages">,
+  _source: AssistantMessage,
+): string {
+  return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+}
+
+function makeCopilotClaudeModel(): Model<"anthropic-messages"> {
+  return {
+    id: "claude-sonnet-4",
+    name: "Claude Sonnet 4",
+    api: "anthropic-messages",
+    provider: "github-copilot",
+    baseUrl: "https://api.individual.githubcopilot.com",
+    reasoning: true,
+    input: ["text", "image"],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: 128000,
+    maxTokens: 16000,
+  };
+}
+
+describe("OpenAI to Anthropic session migration for Copilot Claude", () => {
+  it("converts thinking blocks to plain text when source model differs", () => {
+    const model = makeCopilotClaudeModel();
+    const messages: Message[] = [
+      { role: "user", content: "hello", timestamp: Date.now() },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "Let me think about this...",
+            thinkingSignature: "reasoning_content",
+          },
+          { type: "text", text: "Hi there!" },
+        ],
+        api: "openai-completions",
+        provider: "github-copilot",
+        model: "gpt-4o",
+        usage: {
+          input: 0,
+          output: 0,
+          cacheRead: 0,
+          cacheWrite: 0,
+          totalTokens: 0,
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+        },
+        stopReason: "stop",
+        timestamp: Date.now(),
+      },
+    ];
+
+    const result = transformMessages(
+      messages,
+      model,
+      anthropicNormalizeToolCallId,
+    );
+    const assistantMsg = result.find(
+      (m) => m.role === "assistant",
+    ) as AssistantMessage;
+
+    // Thinking block should be converted to text since models differ
+    const textBlocks = assistantMsg.content.filter((b) => b.type === "text");
+    const thinkingBlocks = assistantMsg.content.filter(
+      (b) => b.type === "thinking",
+    );
+    expect(thinkingBlocks).toHaveLength(0);
+    expect(textBlocks.length).toBeGreaterThanOrEqual(2);
+  });
+
+  it("removes thoughtSignature from tool calls when migrating between models", () => {
+    const model = makeCopilotClaudeModel();
+    const messages: Message[] = [
+      { role: "user", content: "run a command", timestamp: Date.now() },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: "call_123",
+            name: "bash",
+            arguments: { command: "ls" },
+            thoughtSignature: JSON.stringify({
+              type: "reasoning.encrypted",
+              id: "call_123",
+              data: "encrypted",
+            }),
+          },
+        ],
+        api: "openai-responses",
+        provider: "github-copilot",
+        model: "gpt-5",
+        usage: {
+          input: 0,
+          output: 0,
+          cacheRead: 0,
+          cacheWrite: 0,
+          totalTokens: 0,
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+        },
+        stopReason: "toolUse",
+        timestamp: Date.now(),
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123",
+        toolName: "bash",
+        content: [{ type: "text", text: "output" }],
+        isError: false,
+        timestamp: Date.now(),
+      },
+    ];
+
+    const result = transformMessages(
+      messages,
+      model,
+      anthropicNormalizeToolCallId,
+    );
+    const assistantMsg = result.find(
+      (m) => m.role === "assistant",
+    ) as AssistantMessage;
+    const toolCall = assistantMsg.content.find(
+      (b) => b.type === "toolCall",
+    ) as ToolCall;
+
+    expect(toolCall.thoughtSignature).toBeUndefined();
+  });
+});
--- a/packages/ai/test/unicode-surrogate.test.ts
+++ b/packages/ai/test/unicode-surrogate.test.ts
--- a/packages/ai/test/xhigh.test.ts
+++ b/packages/ai/test/xhigh.test.ts
@ -0,0 +1,81 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Context, Model } from "../src/types.js";
+
+function makeContext(): Context {
+  return {
+    messages: [
+      {
+        role: "user",
+        content: `What is ${(Math.random() * 100) | 0} + ${(Math.random() * 100) | 0}? Think step by step.`,
+        timestamp: Date.now(),
+      },
+    ],
+  };
+}
+
+describe.skipIf(!process.env.OPENAI_API_KEY)("xhigh reasoning", () => {
+  describe("codex-max (supports xhigh)", () => {
+    // Note: codex models only support the responses API, not chat completions
+    it("should work with openai-responses", async () => {
+      const model = getModel("openai", "gpt-5.1-codex-max");
+      const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+      let hasThinking = false;
+
+      for await (const event of s) {
+        if (
+          event.type === "thinking_start" ||
+          event.type === "thinking_delta"
+        ) {
+          hasThinking = true;
+        }
+      }
+
+      const response = await s.result();
+      expect(response.stopReason, `Error: ${response.errorMessage}`).toBe(
+        "stop",
+      );
+      expect(response.content.some((b) => b.type === "text")).toBe(true);
+      expect(
+        hasThinking || response.content.some((b) => b.type === "thinking"),
+      ).toBe(true);
+    });
+  });
+
+  describe("gpt-5-mini (does not support xhigh)", () => {
+    it("should error with openai-responses when using xhigh", async () => {
+      const model = getModel("openai", "gpt-5-mini");
+      const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+
+      for await (const _ of s) {
+        // drain events
+      }
+
+      const response = await s.result();
+      expect(response.stopReason).toBe("error");
+      expect(response.errorMessage).toContain("xhigh");
+    });
+
+    it("should error with openai-completions when using xhigh", async () => {
+      const { compat: _compat, ...baseModel } = getModel(
+        "openai",
+        "gpt-5-mini",
+      );
+      void _compat;
+      const model: Model<"openai-completions"> = {
+        ...baseModel,
+        api: "openai-completions",
+      };
+      const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+
+      for await (const _ of s) {
+        // drain events
+      }
+
+      const response = await s.result();
+      expect(response.stopReason).toBe("error");
+      expect(response.errorMessage).toContain("xhigh");
+    });
+  });
+});
--- a/packages/ai/test/zen.test.ts
+++ b/packages/ai/test/zen.test.ts
@ -0,0 +1,30 @@
+import { describe, expect, it } from "vitest";
+import { MODELS } from "../src/models.generated.js";
+import { complete } from "../src/stream.js";
+import type { Model } from "../src/types.js";
+
+describe.skipIf(!process.env.OPENCODE_API_KEY)(
+  "OpenCode Models Smoke Test",
+  () => {
+    const providers = [
+      { key: "opencode", label: "OpenCode Zen" },
+      { key: "opencode-go", label: "OpenCode Go" },
+    ] as const;
+
+    providers.forEach(({ key, label }) => {
+      const providerModels = Object.values(MODELS[key]);
+      providerModels.forEach((model) => {
+        it(`${label}: ${model.id}`, async () => {
+          const response = await complete(model as Model<any>, {
+            messages: [
+              { role: "user", content: "Say hello.", timestamp: Date.now() },
+            ],
+          });
+
+          expect(response.content).toBeTruthy();
+          expect(response.stopReason).toBe("stop");
+        }, 60000);
+      });
+    });
+  },
+);