move pi-mono into companion-cloud as apps/companion-os

- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-20 20:01:10 +00:00 · 2026-03-07 09:22:50 -08:00 · 2026-03-07 09:22:50 -08:00 · 0250f72976
commit 0250f72976
579 changed files with 206942 additions and 0 deletions
--- a/packages/ai/src/providers/amazon-bedrock.ts
+++ b/packages/ai/src/providers/amazon-bedrock.ts
@ -0,0 +1,894 @@
+import {
+  BedrockRuntimeClient,
+  type BedrockRuntimeClientConfig,
+  StopReason as BedrockStopReason,
+  type Tool as BedrockTool,
+  CachePointType,
+  CacheTTL,
+  type ContentBlock,
+  type ContentBlockDeltaEvent,
+  type ContentBlockStartEvent,
+  type ContentBlockStopEvent,
+  ConversationRole,
+  ConverseStreamCommand,
+  type ConverseStreamMetadataEvent,
+  ImageFormat,
+  type Message,
+  type SystemContentBlock,
+  type ToolChoice,
+  type ToolConfiguration,
+  ToolResultStatus,
+} from "@aws-sdk/client-bedrock-runtime";
+
+import { calculateCost } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  CacheRetention,
+  Context,
+  Model,
+  SimpleStreamOptions,
+  StopReason,
+  StreamFunction,
+  StreamOptions,
+  TextContent,
+  ThinkingBudgets,
+  ThinkingContent,
+  ThinkingLevel,
+  Tool,
+  ToolCall,
+  ToolResultMessage,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import {
+  adjustMaxTokensForThinking,
+  buildBaseOptions,
+  clampReasoning,
+} from "./simple-options.js";
+import { transformMessages } from "./transform-messages.js";
+
+export interface BedrockOptions extends StreamOptions {
+  region?: string;
+  profile?: string;
+  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
+  /* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
+  reasoning?: ThinkingLevel;
+  /* Custom token budgets per thinking level. Overrides default budgets. */
+  thinkingBudgets?: ThinkingBudgets;
+  /* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
+  interleavedThinking?: boolean;
+}
+
+type Block = (TextContent | ThinkingContent | ToolCall) & {
+  index?: number;
+  partialJson?: string;
+};
+
+export const streamBedrock: StreamFunction<
+  "bedrock-converse-stream",
+  BedrockOptions
+> = (
+  model: Model<"bedrock-converse-stream">,
+  context: Context,
+  options: BedrockOptions = {},
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: "bedrock-converse-stream" as Api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    const blocks = output.content as Block[];
+
+    const config: BedrockRuntimeClientConfig = {
+      profile: options.profile,
+    };
+
+    // in Node.js/Bun environment only
+    if (
+      typeof process !== "undefined" &&
+      (process.versions?.node || process.versions?.bun)
+    ) {
+      // Region resolution: explicit option > env vars > SDK default chain.
+      // When AWS_PROFILE is set, we leave region undefined so the SDK can
+      // resovle it from aws profile configs. Otherwise fall back to us-east-1.
+      const explicitRegion =
+        options.region ||
+        process.env.AWS_REGION ||
+        process.env.AWS_DEFAULT_REGION;
+      if (explicitRegion) {
+        config.region = explicitRegion;
+      } else if (!process.env.AWS_PROFILE) {
+        config.region = "us-east-1";
+      }
+
+      // Support proxies that don't need authentication
+      if (process.env.AWS_BEDROCK_SKIP_AUTH === "1") {
+        config.credentials = {
+          accessKeyId: "dummy-access-key",
+          secretAccessKey: "dummy-secret-key",
+        };
+      }
+
+      if (
+        process.env.HTTP_PROXY ||
+        process.env.HTTPS_PROXY ||
+        process.env.NO_PROXY ||
+        process.env.http_proxy ||
+        process.env.https_proxy ||
+        process.env.no_proxy
+      ) {
+        const nodeHttpHandler = await import("@smithy/node-http-handler");
+        const proxyAgent = await import("proxy-agent");
+
+        const agent = new proxyAgent.ProxyAgent();
+
+        // Bedrock runtime uses NodeHttp2Handler by default since v3.798.0, which is based
+        // on `http2` module and has no support for http agent.
+        // Use NodeHttpHandler to support http agent.
+        config.requestHandler = new nodeHttpHandler.NodeHttpHandler({
+          httpAgent: agent,
+          httpsAgent: agent,
+        });
+      } else if (process.env.AWS_BEDROCK_FORCE_HTTP1 === "1") {
+        // Some custom endpoints require HTTP/1.1 instead of HTTP/2
+        const nodeHttpHandler = await import("@smithy/node-http-handler");
+        config.requestHandler = new nodeHttpHandler.NodeHttpHandler();
+      }
+    } else {
+      // Non-Node environment (browser): fall back to us-east-1 since
+      // there's no config file resolution available.
+      config.region = options.region || "us-east-1";
+    }
+
+    try {
+      const client = new BedrockRuntimeClient(config);
+
+      const cacheRetention = resolveCacheRetention(options.cacheRetention);
+      const commandInput = {
+        modelId: model.id,
+        messages: convertMessages(context, model, cacheRetention),
+        system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
+        inferenceConfig: {
+          maxTokens: options.maxTokens,
+          temperature: options.temperature,
+        },
+        toolConfig: convertToolConfig(context.tools, options.toolChoice),
+        additionalModelRequestFields: buildAdditionalModelRequestFields(
+          model,
+          options,
+        ),
+      };
+      options?.onPayload?.(commandInput);
+      const command = new ConverseStreamCommand(commandInput);
+
+      const response = await client.send(command, {
+        abortSignal: options.signal,
+      });
+
+      for await (const item of response.stream!) {
+        if (item.messageStart) {
+          if (item.messageStart.role !== ConversationRole.ASSISTANT) {
+            throw new Error(
+              "Unexpected assistant message start but got user message start instead",
+            );
+          }
+          stream.push({ type: "start", partial: output });
+        } else if (item.contentBlockStart) {
+          handleContentBlockStart(
+            item.contentBlockStart,
+            blocks,
+            output,
+            stream,
+          );
+        } else if (item.contentBlockDelta) {
+          handleContentBlockDelta(
+            item.contentBlockDelta,
+            blocks,
+            output,
+            stream,
+          );
+        } else if (item.contentBlockStop) {
+          handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
+        } else if (item.messageStop) {
+          output.stopReason = mapStopReason(item.messageStop.stopReason);
+        } else if (item.metadata) {
+          handleMetadata(item.metadata, model, output);
+        } else if (item.internalServerException) {
+          throw new Error(
+            `Internal server error: ${item.internalServerException.message}`,
+          );
+        } else if (item.modelStreamErrorException) {
+          throw new Error(
+            `Model stream error: ${item.modelStreamErrorException.message}`,
+          );
+        } else if (item.validationException) {
+          throw new Error(
+            `Validation error: ${item.validationException.message}`,
+          );
+        } else if (item.throttlingException) {
+          throw new Error(
+            `Throttling error: ${item.throttlingException.message}`,
+          );
+        } else if (item.serviceUnavailableException) {
+          throw new Error(
+            `Service unavailable: ${item.serviceUnavailableException.message}`,
+          );
+        }
+      }
+
+      if (options.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "error" || output.stopReason === "aborted") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      for (const block of output.content) {
+        delete (block as Block).index;
+        delete (block as Block).partialJson;
+      }
+      output.stopReason = options.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+export const streamSimpleBedrock: StreamFunction<
+  "bedrock-converse-stream",
+  SimpleStreamOptions
+> = (
+  model: Model<"bedrock-converse-stream">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const base = buildBaseOptions(model, options, undefined);
+  if (!options?.reasoning) {
+    return streamBedrock(model, context, {
+      ...base,
+      reasoning: undefined,
+    } satisfies BedrockOptions);
+  }
+
+  if (
+    model.id.includes("anthropic.claude") ||
+    model.id.includes("anthropic/claude")
+  ) {
+    if (supportsAdaptiveThinking(model.id)) {
+      return streamBedrock(model, context, {
+        ...base,
+        reasoning: options.reasoning,
+        thinkingBudgets: options.thinkingBudgets,
+      } satisfies BedrockOptions);
+    }
+
+    const adjusted = adjustMaxTokensForThinking(
+      base.maxTokens || 0,
+      model.maxTokens,
+      options.reasoning,
+      options.thinkingBudgets,
+    );
+
+    return streamBedrock(model, context, {
+      ...base,
+      maxTokens: adjusted.maxTokens,
+      reasoning: options.reasoning,
+      thinkingBudgets: {
+        ...(options.thinkingBudgets || {}),
+        [clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
+      },
+    } satisfies BedrockOptions);
+  }
+
+  return streamBedrock(model, context, {
+    ...base,
+    reasoning: options.reasoning,
+    thinkingBudgets: options.thinkingBudgets,
+  } satisfies BedrockOptions);
+};
+
+function handleContentBlockStart(
+  event: ContentBlockStartEvent,
+  blocks: Block[],
+  output: AssistantMessage,
+  stream: AssistantMessageEventStream,
+): void {
+  const index = event.contentBlockIndex!;
+  const start = event.start;
+
+  if (start?.toolUse) {
+    const block: Block = {
+      type: "toolCall",
+      id: start.toolUse.toolUseId || "",
+      name: start.toolUse.name || "",
+      arguments: {},
+      partialJson: "",
+      index,
+    };
+    output.content.push(block);
+    stream.push({
+      type: "toolcall_start",
+      contentIndex: blocks.length - 1,
+      partial: output,
+    });
+  }
+}
+
+function handleContentBlockDelta(
+  event: ContentBlockDeltaEvent,
+  blocks: Block[],
+  output: AssistantMessage,
+  stream: AssistantMessageEventStream,
+): void {
+  const contentBlockIndex = event.contentBlockIndex!;
+  const delta = event.delta;
+  let index = blocks.findIndex((b) => b.index === contentBlockIndex);
+  let block = blocks[index];
+
+  if (delta?.text !== undefined) {
+    // If no text block exists yet, create one, as `handleContentBlockStart` is not sent for text blocks
+    if (!block) {
+      const newBlock: Block = {
+        type: "text",
+        text: "",
+        index: contentBlockIndex,
+      };
+      output.content.push(newBlock);
+      index = blocks.length - 1;
+      block = blocks[index];
+      stream.push({ type: "text_start", contentIndex: index, partial: output });
+    }
+    if (block.type === "text") {
+      block.text += delta.text;
+      stream.push({
+        type: "text_delta",
+        contentIndex: index,
+        delta: delta.text,
+        partial: output,
+      });
+    }
+  } else if (delta?.toolUse && block?.type === "toolCall") {
+    block.partialJson = (block.partialJson || "") + (delta.toolUse.input || "");
+    block.arguments = parseStreamingJson(block.partialJson);
+    stream.push({
+      type: "toolcall_delta",
+      contentIndex: index,
+      delta: delta.toolUse.input || "",
+      partial: output,
+    });
+  } else if (delta?.reasoningContent) {
+    let thinkingBlock = block;
+    let thinkingIndex = index;
+
+    if (!thinkingBlock) {
+      const newBlock: Block = {
+        type: "thinking",
+        thinking: "",
+        thinkingSignature: "",
+        index: contentBlockIndex,
+      };
+      output.content.push(newBlock);
+      thinkingIndex = blocks.length - 1;
+      thinkingBlock = blocks[thinkingIndex];
+      stream.push({
+        type: "thinking_start",
+        contentIndex: thinkingIndex,
+        partial: output,
+      });
+    }
+
+    if (thinkingBlock?.type === "thinking") {
+      if (delta.reasoningContent.text) {
+        thinkingBlock.thinking += delta.reasoningContent.text;
+        stream.push({
+          type: "thinking_delta",
+          contentIndex: thinkingIndex,
+          delta: delta.reasoningContent.text,
+          partial: output,
+        });
+      }
+      if (delta.reasoningContent.signature) {
+        thinkingBlock.thinkingSignature =
+          (thinkingBlock.thinkingSignature || "") +
+          delta.reasoningContent.signature;
+      }
+    }
+  }
+}
+
+function handleMetadata(
+  event: ConverseStreamMetadataEvent,
+  model: Model<"bedrock-converse-stream">,
+  output: AssistantMessage,
+): void {
+  if (event.usage) {
+    output.usage.input = event.usage.inputTokens || 0;
+    output.usage.output = event.usage.outputTokens || 0;
+    output.usage.cacheRead = event.usage.cacheReadInputTokens || 0;
+    output.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;
+    output.usage.totalTokens =
+      event.usage.totalTokens || output.usage.input + output.usage.output;
+    calculateCost(model, output.usage);
+  }
+}
+
+function handleContentBlockStop(
+  event: ContentBlockStopEvent,
+  blocks: Block[],
+  output: AssistantMessage,
+  stream: AssistantMessageEventStream,
+): void {
+  const index = blocks.findIndex((b) => b.index === event.contentBlockIndex);
+  const block = blocks[index];
+  if (!block) return;
+  delete (block as Block).index;
+
+  switch (block.type) {
+    case "text":
+      stream.push({
+        type: "text_end",
+        contentIndex: index,
+        content: block.text,
+        partial: output,
+      });
+      break;
+    case "thinking":
+      stream.push({
+        type: "thinking_end",
+        contentIndex: index,
+        content: block.thinking,
+        partial: output,
+      });
+      break;
+    case "toolCall":
+      block.arguments = parseStreamingJson(block.partialJson);
+      delete (block as Block).partialJson;
+      stream.push({
+        type: "toolcall_end",
+        contentIndex: index,
+        toolCall: block,
+        partial: output,
+      });
+      break;
+  }
+}
+
+/**
+ * Check if the model supports adaptive thinking (Opus 4.6 and Sonnet 4.6).
+ */
+function supportsAdaptiveThinking(modelId: string): boolean {
+  return (
+    modelId.includes("opus-4-6") ||
+    modelId.includes("opus-4.6") ||
+    modelId.includes("sonnet-4-6") ||
+    modelId.includes("sonnet-4.6")
+  );
+}
+
+function mapThinkingLevelToEffort(
+  level: SimpleStreamOptions["reasoning"],
+  modelId: string,
+): "low" | "medium" | "high" | "max" {
+  switch (level) {
+    case "minimal":
+    case "low":
+      return "low";
+    case "medium":
+      return "medium";
+    case "high":
+      return "high";
+    case "xhigh":
+      return modelId.includes("opus-4-6") || modelId.includes("opus-4.6")
+        ? "max"
+        : "high";
+    default:
+      return "high";
+  }
+}
+
+/**
+ * Resolve cache retention preference.
+ * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
+ */
+function resolveCacheRetention(
+  cacheRetention?: CacheRetention,
+): CacheRetention {
+  if (cacheRetention) {
+    return cacheRetention;
+  }
+  if (
+    typeof process !== "undefined" &&
+    process.env.PI_CACHE_RETENTION === "long"
+  ) {
+    return "long";
+  }
+  return "short";
+}
+
+/**
+ * Check if the model supports prompt caching.
+ * Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
+ */
+function supportsPromptCaching(
+  model: Model<"bedrock-converse-stream">,
+): boolean {
+  if (model.cost.cacheRead || model.cost.cacheWrite) {
+    return true;
+  }
+
+  const id = model.id.toLowerCase();
+  // Claude 4.x models (opus-4, sonnet-4, haiku-4)
+  if (id.includes("claude") && (id.includes("-4-") || id.includes("-4.")))
+    return true;
+  // Claude 3.7 Sonnet
+  if (id.includes("claude-3-7-sonnet")) return true;
+  // Claude 3.5 Haiku
+  if (id.includes("claude-3-5-haiku")) return true;
+  return false;
+}
+
+/**
+ * Check if the model supports thinking signatures in reasoningContent.
+ * Only Anthropic Claude models support the signature field.
+ * Other models (OpenAI, Qwen, Minimax, Moonshot, etc.) reject it with:
+ * "This model doesn't support the reasoningContent.reasoningText.signature field"
+ */
+function supportsThinkingSignature(
+  model: Model<"bedrock-converse-stream">,
+): boolean {
+  const id = model.id.toLowerCase();
+  return id.includes("anthropic.claude") || id.includes("anthropic/claude");
+}
+
+function buildSystemPrompt(
+  systemPrompt: string | undefined,
+  model: Model<"bedrock-converse-stream">,
+  cacheRetention: CacheRetention,
+): SystemContentBlock[] | undefined {
+  if (!systemPrompt) return undefined;
+
+  const blocks: SystemContentBlock[] = [
+    { text: sanitizeSurrogates(systemPrompt) },
+  ];
+
+  // Add cache point for supported Claude models when caching is enabled
+  if (cacheRetention !== "none" && supportsPromptCaching(model)) {
+    blocks.push({
+      cachePoint: {
+        type: CachePointType.DEFAULT,
+        ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
+      },
+    });
+  }
+
+  return blocks;
+}
+
+function normalizeToolCallId(id: string): string {
+  const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_");
+  return sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;
+}
+
+function convertMessages(
+  context: Context,
+  model: Model<"bedrock-converse-stream">,
+  cacheRetention: CacheRetention,
+): Message[] {
+  const result: Message[] = [];
+  const transformedMessages = transformMessages(
+    context.messages,
+    model,
+    normalizeToolCallId,
+  );
+
+  for (let i = 0; i < transformedMessages.length; i++) {
+    const m = transformedMessages[i];
+
+    switch (m.role) {
+      case "user":
+        result.push({
+          role: ConversationRole.USER,
+          content:
+            typeof m.content === "string"
+              ? [{ text: sanitizeSurrogates(m.content) }]
+              : m.content.map((c) => {
+                  switch (c.type) {
+                    case "text":
+                      return { text: sanitizeSurrogates(c.text) };
+                    case "image":
+                      return { image: createImageBlock(c.mimeType, c.data) };
+                    default:
+                      throw new Error("Unknown user content type");
+                  }
+                }),
+        });
+        break;
+      case "assistant": {
+        // Skip assistant messages with empty content (e.g., from aborted requests)
+        // Bedrock rejects messages with empty content arrays
+        if (m.content.length === 0) {
+          continue;
+        }
+        const contentBlocks: ContentBlock[] = [];
+        for (const c of m.content) {
+          switch (c.type) {
+            case "text":
+              // Skip empty text blocks
+              if (c.text.trim().length === 0) continue;
+              contentBlocks.push({ text: sanitizeSurrogates(c.text) });
+              break;
+            case "toolCall":
+              contentBlocks.push({
+                toolUse: { toolUseId: c.id, name: c.name, input: c.arguments },
+              });
+              break;
+            case "thinking":
+              // Skip empty thinking blocks
+              if (c.thinking.trim().length === 0) continue;
+              // Only Anthropic models support the signature field in reasoningText.
+              // For other models, we omit the signature to avoid errors like:
+              // "This model doesn't support the reasoningContent.reasoningText.signature field"
+              if (supportsThinkingSignature(model)) {
+                contentBlocks.push({
+                  reasoningContent: {
+                    reasoningText: {
+                      text: sanitizeSurrogates(c.thinking),
+                      signature: c.thinkingSignature,
+                    },
+                  },
+                });
+              } else {
+                contentBlocks.push({
+                  reasoningContent: {
+                    reasoningText: { text: sanitizeSurrogates(c.thinking) },
+                  },
+                });
+              }
+              break;
+            default:
+              throw new Error("Unknown assistant content type");
+          }
+        }
+        // Skip if all content blocks were filtered out
+        if (contentBlocks.length === 0) {
+          continue;
+        }
+        result.push({
+          role: ConversationRole.ASSISTANT,
+          content: contentBlocks,
+        });
+        break;
+      }
+      case "toolResult": {
+        // Collect all consecutive toolResult messages into a single user message
+        // Bedrock requires all tool results to be in one message
+        const toolResults: ContentBlock.ToolResultMember[] = [];
+
+        // Add current tool result with all content blocks combined
+        toolResults.push({
+          toolResult: {
+            toolUseId: m.toolCallId,
+            content: m.content.map((c) =>
+              c.type === "image"
+                ? { image: createImageBlock(c.mimeType, c.data) }
+                : { text: sanitizeSurrogates(c.text) },
+            ),
+            status: m.isError
+              ? ToolResultStatus.ERROR
+              : ToolResultStatus.SUCCESS,
+          },
+        });
+
+        // Look ahead for consecutive toolResult messages
+        let j = i + 1;
+        while (
+          j < transformedMessages.length &&
+          transformedMessages[j].role === "toolResult"
+        ) {
+          const nextMsg = transformedMessages[j] as ToolResultMessage;
+          toolResults.push({
+            toolResult: {
+              toolUseId: nextMsg.toolCallId,
+              content: nextMsg.content.map((c) =>
+                c.type === "image"
+                  ? { image: createImageBlock(c.mimeType, c.data) }
+                  : { text: sanitizeSurrogates(c.text) },
+              ),
+              status: nextMsg.isError
+                ? ToolResultStatus.ERROR
+                : ToolResultStatus.SUCCESS,
+            },
+          });
+          j++;
+        }
+
+        // Skip the messages we've already processed
+        i = j - 1;
+
+        result.push({
+          role: ConversationRole.USER,
+          content: toolResults,
+        });
+        break;
+      }
+      default:
+        throw new Error("Unknown message role");
+    }
+  }
+
+  // Add cache point to the last user message for supported Claude models when caching is enabled
+  if (
+    cacheRetention !== "none" &&
+    supportsPromptCaching(model) &&
+    result.length > 0
+  ) {
+    const lastMessage = result[result.length - 1];
+    if (lastMessage.role === ConversationRole.USER && lastMessage.content) {
+      (lastMessage.content as ContentBlock[]).push({
+        cachePoint: {
+          type: CachePointType.DEFAULT,
+          ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
+        },
+      });
+    }
+  }
+
+  return result;
+}
+
+function convertToolConfig(
+  tools: Tool[] | undefined,
+  toolChoice: BedrockOptions["toolChoice"],
+): ToolConfiguration | undefined {
+  if (!tools?.length || toolChoice === "none") return undefined;
+
+  const bedrockTools: BedrockTool[] = tools.map((tool) => ({
+    toolSpec: {
+      name: tool.name,
+      description: tool.description,
+      inputSchema: { json: tool.parameters },
+    },
+  }));
+
+  let bedrockToolChoice: ToolChoice | undefined;
+  switch (toolChoice) {
+    case "auto":
+      bedrockToolChoice = { auto: {} };
+      break;
+    case "any":
+      bedrockToolChoice = { any: {} };
+      break;
+    default:
+      if (toolChoice?.type === "tool") {
+        bedrockToolChoice = { tool: { name: toolChoice.name } };
+      }
+  }
+
+  return { tools: bedrockTools, toolChoice: bedrockToolChoice };
+}
+
+function mapStopReason(reason: string | undefined): StopReason {
+  switch (reason) {
+    case BedrockStopReason.END_TURN:
+    case BedrockStopReason.STOP_SEQUENCE:
+      return "stop";
+    case BedrockStopReason.MAX_TOKENS:
+    case BedrockStopReason.MODEL_CONTEXT_WINDOW_EXCEEDED:
+      return "length";
+    case BedrockStopReason.TOOL_USE:
+      return "toolUse";
+    default:
+      return "error";
+  }
+}
+
+function buildAdditionalModelRequestFields(
+  model: Model<"bedrock-converse-stream">,
+  options: BedrockOptions,
+): Record<string, any> | undefined {
+  if (!options.reasoning || !model.reasoning) {
+    return undefined;
+  }
+
+  if (
+    model.id.includes("anthropic.claude") ||
+    model.id.includes("anthropic/claude")
+  ) {
+    const result: Record<string, any> = supportsAdaptiveThinking(model.id)
+      ? {
+          thinking: { type: "adaptive" },
+          output_config: {
+            effort: mapThinkingLevelToEffort(options.reasoning, model.id),
+          },
+        }
+      : (() => {
+          const defaultBudgets: Record<ThinkingLevel, number> = {
+            minimal: 1024,
+            low: 2048,
+            medium: 8192,
+            high: 16384,
+            xhigh: 16384, // Claude doesn't support xhigh, clamp to high
+          };
+
+          // Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
+          const level =
+            options.reasoning === "xhigh" ? "high" : options.reasoning;
+          const budget =
+            options.thinkingBudgets?.[level] ??
+            defaultBudgets[options.reasoning];
+
+          return {
+            thinking: {
+              type: "enabled",
+              budget_tokens: budget,
+            },
+          };
+        })();
+
+    if (
+      !supportsAdaptiveThinking(model.id) &&
+      (options.interleavedThinking ?? true)
+    ) {
+      result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
+    }
+
+    return result;
+  }
+
+  return undefined;
+}
+
+function createImageBlock(mimeType: string, data: string) {
+  let format: ImageFormat;
+  switch (mimeType) {
+    case "image/jpeg":
+    case "image/jpg":
+      format = ImageFormat.JPEG;
+      break;
+    case "image/png":
+      format = ImageFormat.PNG;
+      break;
+    case "image/gif":
+      format = ImageFormat.GIF;
+      break;
+    case "image/webp":
+      format = ImageFormat.WEBP;
+      break;
+    default:
+      throw new Error(`Unknown image type: ${mimeType}`);
+  }
+
+  const binaryString = atob(data);
+  const bytes = new Uint8Array(binaryString.length);
+  for (let i = 0; i < binaryString.length; i++) {
+    bytes[i] = binaryString.charCodeAt(i);
+  }
+
+  return { source: { bytes }, format };
+}
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -0,0 +1,989 @@
+import Anthropic from "@anthropic-ai/sdk";
+import type {
+  ContentBlockParam,
+  MessageCreateParamsStreaming,
+  MessageParam,
+} from "@anthropic-ai/sdk/resources/messages.js";
+import { getEnvApiKey } from "../env-api-keys.js";
+import { calculateCost } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  CacheRetention,
+  Context,
+  ImageContent,
+  Message,
+  Model,
+  SimpleStreamOptions,
+  StopReason,
+  StreamFunction,
+  StreamOptions,
+  TextContent,
+  ThinkingContent,
+  Tool,
+  ToolCall,
+  ToolResultMessage,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+
+import {
+  buildCopilotDynamicHeaders,
+  hasCopilotVisionInput,
+} from "./github-copilot-headers.js";
+import {
+  adjustMaxTokensForThinking,
+  buildBaseOptions,
+} from "./simple-options.js";
+import { transformMessages } from "./transform-messages.js";
+
+/**
+ * Resolve cache retention preference.
+ * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
+ */
+function resolveCacheRetention(
+  cacheRetention?: CacheRetention,
+): CacheRetention {
+  if (cacheRetention) {
+    return cacheRetention;
+  }
+  if (
+    typeof process !== "undefined" &&
+    process.env.PI_CACHE_RETENTION === "long"
+  ) {
+    return "long";
+  }
+  return "short";
+}
+
+function getCacheControl(
+  baseUrl: string,
+  cacheRetention?: CacheRetention,
+): {
+  retention: CacheRetention;
+  cacheControl?: { type: "ephemeral"; ttl?: "1h" };
+} {
+  const retention = resolveCacheRetention(cacheRetention);
+  if (retention === "none") {
+    return { retention };
+  }
+  const ttl =
+    retention === "long" && baseUrl.includes("api.anthropic.com")
+      ? "1h"
+      : undefined;
+  return {
+    retention,
+    cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
+  };
+}
+
+// Stealth mode: Mimic Claude Code's tool naming exactly
+const claudeCodeVersion = "2.1.62";
+
+// Claude Code 2.x tool names (canonical casing)
+// Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md
+// To update: https://github.com/badlogic/cchistory
+const claudeCodeTools = [
+  "Read",
+  "Write",
+  "Edit",
+  "Bash",
+  "Grep",
+  "Glob",
+  "AskUserQuestion",
+  "EnterPlanMode",
+  "ExitPlanMode",
+  "KillShell",
+  "NotebookEdit",
+  "Skill",
+  "Task",
+  "TaskOutput",
+  "TodoWrite",
+  "WebFetch",
+  "WebSearch",
+];
+
+const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
+
+// Convert tool name to CC canonical casing if it matches (case-insensitive)
+const toClaudeCodeName = (name: string) =>
+  ccToolLookup.get(name.toLowerCase()) ?? name;
+const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
+  if (tools && tools.length > 0) {
+    const lowerName = name.toLowerCase();
+    const matchedTool = tools.find(
+      (tool) => tool.name.toLowerCase() === lowerName,
+    );
+    if (matchedTool) return matchedTool.name;
+  }
+  return name;
+};
+
+/**
+ * Convert content blocks to Anthropic API format
+ */
+function convertContentBlocks(content: (TextContent | ImageContent)[]):
+  | string
+  | Array<
+      | { type: "text"; text: string }
+      | {
+          type: "image";
+          source: {
+            type: "base64";
+            media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
+            data: string;
+          };
+        }
+    > {
+  // If only text blocks, return as concatenated string for simplicity
+  const hasImages = content.some((c) => c.type === "image");
+  if (!hasImages) {
+    return sanitizeSurrogates(
+      content.map((c) => (c as TextContent).text).join("\n"),
+    );
+  }
+
+  // If we have images, convert to content block array
+  const blocks = content.map((block) => {
+    if (block.type === "text") {
+      return {
+        type: "text" as const,
+        text: sanitizeSurrogates(block.text),
+      };
+    }
+    return {
+      type: "image" as const,
+      source: {
+        type: "base64" as const,
+        media_type: block.mimeType as
+          | "image/jpeg"
+          | "image/png"
+          | "image/gif"
+          | "image/webp",
+        data: block.data,
+      },
+    };
+  });
+
+  // If only images (no text), add placeholder text block
+  const hasText = blocks.some((b) => b.type === "text");
+  if (!hasText) {
+    blocks.unshift({
+      type: "text" as const,
+      text: "(see attached image)",
+    });
+  }
+
+  return blocks;
+}
+
+export type AnthropicEffort = "low" | "medium" | "high" | "max";
+
+export interface AnthropicOptions extends StreamOptions {
+  /**
+   * Enable extended thinking.
+   * For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
+   * For older models: uses budget-based thinking with thinkingBudgetTokens.
+   */
+  thinkingEnabled?: boolean;
+  /**
+   * Token budget for extended thinking (older models only).
+   * Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
+   */
+  thinkingBudgetTokens?: number;
+  /**
+   * Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6).
+   * Controls how much thinking Claude allocates:
+   * - "max": Always thinks with no constraints (Opus 4.6 only)
+   * - "high": Always thinks, deep reasoning (default)
+   * - "medium": Moderate thinking, may skip for simple queries
+   * - "low": Minimal thinking, skips for simple tasks
+   * Ignored for older models.
+   */
+  effort?: AnthropicEffort;
+  interleavedThinking?: boolean;
+  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
+}
+
+function mergeHeaders(
+  ...headerSources: (Record<string, string> | undefined)[]
+): Record<string, string> {
+  const merged: Record<string, string> = {};
+  for (const headers of headerSources) {
+    if (headers) {
+      Object.assign(merged, headers);
+    }
+  }
+  return merged;
+}
+
+export const streamAnthropic: StreamFunction<
+  "anthropic-messages",
+  AnthropicOptions
+> = (
+  model: Model<"anthropic-messages">,
+  context: Context,
+  options?: AnthropicOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: model.api as Api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    try {
+      const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
+
+      let copilotDynamicHeaders: Record<string, string> | undefined;
+      if (model.provider === "github-copilot") {
+        const hasImages = hasCopilotVisionInput(context.messages);
+        copilotDynamicHeaders = buildCopilotDynamicHeaders({
+          messages: context.messages,
+          hasImages,
+        });
+      }
+
+      const { client, isOAuthToken } = createClient(
+        model,
+        apiKey,
+        options?.interleavedThinking ?? true,
+        options?.headers,
+        copilotDynamicHeaders,
+      );
+      const params = buildParams(model, context, isOAuthToken, options);
+      options?.onPayload?.(params);
+      const anthropicStream = client.messages.stream(
+        { ...params, stream: true },
+        { signal: options?.signal },
+      );
+      stream.push({ type: "start", partial: output });
+
+      type Block = (
+        | ThinkingContent
+        | TextContent
+        | (ToolCall & { partialJson: string })
+      ) & { index: number };
+      const blocks = output.content as Block[];
+
+      for await (const event of anthropicStream) {
+        if (event.type === "message_start") {
+          // Capture initial token usage from message_start event
+          // This ensures we have input token counts even if the stream is aborted early
+          output.usage.input = event.message.usage.input_tokens || 0;
+          output.usage.output = event.message.usage.output_tokens || 0;
+          output.usage.cacheRead =
+            event.message.usage.cache_read_input_tokens || 0;
+          output.usage.cacheWrite =
+            event.message.usage.cache_creation_input_tokens || 0;
+          // Anthropic doesn't provide total_tokens, compute from components
+          output.usage.totalTokens =
+            output.usage.input +
+            output.usage.output +
+            output.usage.cacheRead +
+            output.usage.cacheWrite;
+          calculateCost(model, output.usage);
+        } else if (event.type === "content_block_start") {
+          if (event.content_block.type === "text") {
+            const block: Block = {
+              type: "text",
+              text: "",
+              index: event.index,
+            };
+            output.content.push(block);
+            stream.push({
+              type: "text_start",
+              contentIndex: output.content.length - 1,
+              partial: output,
+            });
+          } else if (event.content_block.type === "thinking") {
+            const block: Block = {
+              type: "thinking",
+              thinking: "",
+              thinkingSignature: "",
+              index: event.index,
+            };
+            output.content.push(block);
+            stream.push({
+              type: "thinking_start",
+              contentIndex: output.content.length - 1,
+              partial: output,
+            });
+          } else if (event.content_block.type === "redacted_thinking") {
+            const block: Block = {
+              type: "thinking",
+              thinking: "[Reasoning redacted]",
+              thinkingSignature: event.content_block.data,
+              redacted: true,
+              index: event.index,
+            };
+            output.content.push(block);
+            stream.push({
+              type: "thinking_start",
+              contentIndex: output.content.length - 1,
+              partial: output,
+            });
+          } else if (event.content_block.type === "tool_use") {
+            const block: Block = {
+              type: "toolCall",
+              id: event.content_block.id,
+              name: isOAuthToken
+                ? fromClaudeCodeName(event.content_block.name, context.tools)
+                : event.content_block.name,
+              arguments:
+                (event.content_block.input as Record<string, any>) ?? {},
+              partialJson: "",
+              index: event.index,
+            };
+            output.content.push(block);
+            stream.push({
+              type: "toolcall_start",
+              contentIndex: output.content.length - 1,
+              partial: output,
+            });
+          }
+        } else if (event.type === "content_block_delta") {
+          if (event.delta.type === "text_delta") {
+            const index = blocks.findIndex((b) => b.index === event.index);
+            const block = blocks[index];
+            if (block && block.type === "text") {
+              block.text += event.delta.text;
+              stream.push({
+                type: "text_delta",
+                contentIndex: index,
+                delta: event.delta.text,
+                partial: output,
+              });
+            }
+          } else if (event.delta.type === "thinking_delta") {
+            const index = blocks.findIndex((b) => b.index === event.index);
+            const block = blocks[index];
+            if (block && block.type === "thinking") {
+              block.thinking += event.delta.thinking;
+              stream.push({
+                type: "thinking_delta",
+                contentIndex: index,
+                delta: event.delta.thinking,
+                partial: output,
+              });
+            }
+          } else if (event.delta.type === "input_json_delta") {
+            const index = blocks.findIndex((b) => b.index === event.index);
+            const block = blocks[index];
+            if (block && block.type === "toolCall") {
+              block.partialJson += event.delta.partial_json;
+              block.arguments = parseStreamingJson(block.partialJson);
+              stream.push({
+                type: "toolcall_delta",
+                contentIndex: index,
+                delta: event.delta.partial_json,
+                partial: output,
+              });
+            }
+          } else if (event.delta.type === "signature_delta") {
+            const index = blocks.findIndex((b) => b.index === event.index);
+            const block = blocks[index];
+            if (block && block.type === "thinking") {
+              block.thinkingSignature = block.thinkingSignature || "";
+              block.thinkingSignature += event.delta.signature;
+            }
+          }
+        } else if (event.type === "content_block_stop") {
+          const index = blocks.findIndex((b) => b.index === event.index);
+          const block = blocks[index];
+          if (block) {
+            delete (block as any).index;
+            if (block.type === "text") {
+              stream.push({
+                type: "text_end",
+                contentIndex: index,
+                content: block.text,
+                partial: output,
+              });
+            } else if (block.type === "thinking") {
+              stream.push({
+                type: "thinking_end",
+                contentIndex: index,
+                content: block.thinking,
+                partial: output,
+              });
+            } else if (block.type === "toolCall") {
+              block.arguments = parseStreamingJson(block.partialJson);
+              delete (block as any).partialJson;
+              stream.push({
+                type: "toolcall_end",
+                contentIndex: index,
+                toolCall: block,
+                partial: output,
+              });
+            }
+          }
+        } else if (event.type === "message_delta") {
+          if (event.delta.stop_reason) {
+            output.stopReason = mapStopReason(event.delta.stop_reason);
+          }
+          // Only update usage fields if present (not null).
+          // Preserves input_tokens from message_start when proxies omit it in message_delta.
+          if (event.usage.input_tokens != null) {
+            output.usage.input = event.usage.input_tokens;
+          }
+          if (event.usage.output_tokens != null) {
+            output.usage.output = event.usage.output_tokens;
+          }
+          if (event.usage.cache_read_input_tokens != null) {
+            output.usage.cacheRead = event.usage.cache_read_input_tokens;
+          }
+          if (event.usage.cache_creation_input_tokens != null) {
+            output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
+          }
+          // Anthropic doesn't provide total_tokens, compute from components
+          output.usage.totalTokens =
+            output.usage.input +
+            output.usage.output +
+            output.usage.cacheRead +
+            output.usage.cacheWrite;
+          calculateCost(model, output.usage);
+        }
+      }
+
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      for (const block of output.content) delete (block as any).index;
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+/**
+ * Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6)
+ */
+function supportsAdaptiveThinking(modelId: string): boolean {
+  // Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix)
+  return (
+    modelId.includes("opus-4-6") ||
+    modelId.includes("opus-4.6") ||
+    modelId.includes("sonnet-4-6") ||
+    modelId.includes("sonnet-4.6")
+  );
+}
+
+/**
+ * Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
+ * Note: effort "max" is only valid on Opus 4.6.
+ */
+function mapThinkingLevelToEffort(
+  level: SimpleStreamOptions["reasoning"],
+  modelId: string,
+): AnthropicEffort {
+  switch (level) {
+    case "minimal":
+      return "low";
+    case "low":
+      return "low";
+    case "medium":
+      return "medium";
+    case "high":
+      return "high";
+    case "xhigh":
+      return modelId.includes("opus-4-6") || modelId.includes("opus-4.6")
+        ? "max"
+        : "high";
+    default:
+      return "high";
+  }
+}
+
+export const streamSimpleAnthropic: StreamFunction<
+  "anthropic-messages",
+  SimpleStreamOptions
+> = (
+  model: Model<"anthropic-messages">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+  if (!apiKey) {
+    throw new Error(`No API key for provider: ${model.provider}`);
+  }
+
+  const base = buildBaseOptions(model, options, apiKey);
+  if (!options?.reasoning) {
+    return streamAnthropic(model, context, {
+      ...base,
+      thinkingEnabled: false,
+    } satisfies AnthropicOptions);
+  }
+
+  // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
+  // For older models: use budget-based thinking
+  if (supportsAdaptiveThinking(model.id)) {
+    const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
+    return streamAnthropic(model, context, {
+      ...base,
+      thinkingEnabled: true,
+      effort,
+    } satisfies AnthropicOptions);
+  }
+
+  const adjusted = adjustMaxTokensForThinking(
+    base.maxTokens || 0,
+    model.maxTokens,
+    options.reasoning,
+    options.thinkingBudgets,
+  );
+
+  return streamAnthropic(model, context, {
+    ...base,
+    maxTokens: adjusted.maxTokens,
+    thinkingEnabled: true,
+    thinkingBudgetTokens: adjusted.thinkingBudget,
+  } satisfies AnthropicOptions);
+};
+
+function isOAuthToken(apiKey: string): boolean {
+  return apiKey.includes("sk-ant-oat");
+}
+
+function createClient(
+  model: Model<"anthropic-messages">,
+  apiKey: string,
+  interleavedThinking: boolean,
+  optionsHeaders?: Record<string, string>,
+  dynamicHeaders?: Record<string, string>,
+): { client: Anthropic; isOAuthToken: boolean } {
+  // Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
+  // The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
+  const needsInterleavedBeta =
+    interleavedThinking && !supportsAdaptiveThinking(model.id);
+
+  // Copilot: Bearer auth, selective betas (no fine-grained-tool-streaming)
+  if (model.provider === "github-copilot") {
+    const betaFeatures: string[] = [];
+    if (needsInterleavedBeta) {
+      betaFeatures.push("interleaved-thinking-2025-05-14");
+    }
+
+    const client = new Anthropic({
+      apiKey: null,
+      authToken: apiKey,
+      baseURL: model.baseUrl,
+      dangerouslyAllowBrowser: true,
+      defaultHeaders: mergeHeaders(
+        {
+          accept: "application/json",
+          "anthropic-dangerous-direct-browser-access": "true",
+          ...(betaFeatures.length > 0
+            ? { "anthropic-beta": betaFeatures.join(",") }
+            : {}),
+        },
+        model.headers,
+        dynamicHeaders,
+        optionsHeaders,
+      ),
+    });
+
+    return { client, isOAuthToken: false };
+  }
+
+  const betaFeatures = ["fine-grained-tool-streaming-2025-05-14"];
+  if (needsInterleavedBeta) {
+    betaFeatures.push("interleaved-thinking-2025-05-14");
+  }
+
+  // OAuth: Bearer auth, Claude Code identity headers
+  if (isOAuthToken(apiKey)) {
+    const client = new Anthropic({
+      apiKey: null,
+      authToken: apiKey,
+      baseURL: model.baseUrl,
+      dangerouslyAllowBrowser: true,
+      defaultHeaders: mergeHeaders(
+        {
+          accept: "application/json",
+          "anthropic-dangerous-direct-browser-access": "true",
+          "anthropic-beta": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(",")}`,
+          "user-agent": `claude-cli/${claudeCodeVersion}`,
+          "x-app": "cli",
+        },
+        model.headers,
+        optionsHeaders,
+      ),
+    });
+
+    return { client, isOAuthToken: true };
+  }
+
+  // API key auth
+  const client = new Anthropic({
+    apiKey,
+    baseURL: model.baseUrl,
+    dangerouslyAllowBrowser: true,
+    defaultHeaders: mergeHeaders(
+      {
+        accept: "application/json",
+        "anthropic-dangerous-direct-browser-access": "true",
+        "anthropic-beta": betaFeatures.join(","),
+      },
+      model.headers,
+      optionsHeaders,
+    ),
+  });
+
+  return { client, isOAuthToken: false };
+}
+
+function buildParams(
+  model: Model<"anthropic-messages">,
+  context: Context,
+  isOAuthToken: boolean,
+  options?: AnthropicOptions,
+): MessageCreateParamsStreaming {
+  const { cacheControl } = getCacheControl(
+    model.baseUrl,
+    options?.cacheRetention,
+  );
+  const params: MessageCreateParamsStreaming = {
+    model: model.id,
+    messages: convertMessages(
+      context.messages,
+      model,
+      isOAuthToken,
+      cacheControl,
+    ),
+    max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
+    stream: true,
+  };
+
+  // For OAuth tokens, we MUST include Claude Code identity
+  if (isOAuthToken) {
+    params.system = [
+      {
+        type: "text",
+        text: "You are Claude Code, Anthropic's official CLI for Claude.",
+        ...(cacheControl ? { cache_control: cacheControl } : {}),
+      },
+    ];
+    if (context.systemPrompt) {
+      params.system.push({
+        type: "text",
+        text: sanitizeSurrogates(context.systemPrompt),
+        ...(cacheControl ? { cache_control: cacheControl } : {}),
+      });
+    }
+  } else if (context.systemPrompt) {
+    // Add cache control to system prompt for non-OAuth tokens
+    params.system = [
+      {
+        type: "text",
+        text: sanitizeSurrogates(context.systemPrompt),
+        ...(cacheControl ? { cache_control: cacheControl } : {}),
+      },
+    ];
+  }
+
+  // Temperature is incompatible with extended thinking (adaptive or budget-based).
+  if (options?.temperature !== undefined && !options?.thinkingEnabled) {
+    params.temperature = options.temperature;
+  }
+
+  if (context.tools) {
+    params.tools = convertTools(context.tools, isOAuthToken);
+  }
+
+  // Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models)
+  if (options?.thinkingEnabled && model.reasoning) {
+    if (supportsAdaptiveThinking(model.id)) {
+      // Adaptive thinking: Claude decides when and how much to think
+      params.thinking = { type: "adaptive" };
+      if (options.effort) {
+        params.output_config = { effort: options.effort };
+      }
+    } else {
+      // Budget-based thinking for older models
+      params.thinking = {
+        type: "enabled",
+        budget_tokens: options.thinkingBudgetTokens || 1024,
+      };
+    }
+  }
+
+  if (options?.metadata) {
+    const userId = options.metadata.user_id;
+    if (typeof userId === "string") {
+      params.metadata = { user_id: userId };
+    }
+  }
+
+  if (options?.toolChoice) {
+    if (typeof options.toolChoice === "string") {
+      params.tool_choice = { type: options.toolChoice };
+    } else {
+      params.tool_choice = options.toolChoice;
+    }
+  }
+
+  return params;
+}
+
+// Normalize tool call IDs to match Anthropic's required pattern and length
+function normalizeToolCallId(id: string): string {
+  return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+}
+
+function convertMessages(
+  messages: Message[],
+  model: Model<"anthropic-messages">,
+  isOAuthToken: boolean,
+  cacheControl?: { type: "ephemeral"; ttl?: "1h" },
+): MessageParam[] {
+  const params: MessageParam[] = [];
+
+  // Transform messages for cross-provider compatibility
+  const transformedMessages = transformMessages(
+    messages,
+    model,
+    normalizeToolCallId,
+  );
+
+  for (let i = 0; i < transformedMessages.length; i++) {
+    const msg = transformedMessages[i];
+
+    if (msg.role === "user") {
+      if (typeof msg.content === "string") {
+        if (msg.content.trim().length > 0) {
+          params.push({
+            role: "user",
+            content: sanitizeSurrogates(msg.content),
+          });
+        }
+      } else {
+        const blocks: ContentBlockParam[] = msg.content.map((item) => {
+          if (item.type === "text") {
+            return {
+              type: "text",
+              text: sanitizeSurrogates(item.text),
+            };
+          } else {
+            return {
+              type: "image",
+              source: {
+                type: "base64",
+                media_type: item.mimeType as
+                  | "image/jpeg"
+                  | "image/png"
+                  | "image/gif"
+                  | "image/webp",
+                data: item.data,
+              },
+            };
+          }
+        });
+        let filteredBlocks = !model?.input.includes("image")
+          ? blocks.filter((b) => b.type !== "image")
+          : blocks;
+        filteredBlocks = filteredBlocks.filter((b) => {
+          if (b.type === "text") {
+            return b.text.trim().length > 0;
+          }
+          return true;
+        });
+        if (filteredBlocks.length === 0) continue;
+        params.push({
+          role: "user",
+          content: filteredBlocks,
+        });
+      }
+    } else if (msg.role === "assistant") {
+      const blocks: ContentBlockParam[] = [];
+
+      for (const block of msg.content) {
+        if (block.type === "text") {
+          if (block.text.trim().length === 0) continue;
+          blocks.push({
+            type: "text",
+            text: sanitizeSurrogates(block.text),
+          });
+        } else if (block.type === "thinking") {
+          // Redacted thinking: pass the opaque payload back as redacted_thinking
+          if (block.redacted) {
+            blocks.push({
+              type: "redacted_thinking",
+              data: block.thinkingSignature!,
+            });
+            continue;
+          }
+          if (block.thinking.trim().length === 0) continue;
+          // If thinking signature is missing/empty (e.g., from aborted stream),
+          // convert to plain text block without <thinking> tags to avoid API rejection
+          // and prevent Claude from mimicking the tags in responses
+          if (
+            !block.thinkingSignature ||
+            block.thinkingSignature.trim().length === 0
+          ) {
+            blocks.push({
+              type: "text",
+              text: sanitizeSurrogates(block.thinking),
+            });
+          } else {
+            blocks.push({
+              type: "thinking",
+              thinking: sanitizeSurrogates(block.thinking),
+              signature: block.thinkingSignature,
+            });
+          }
+        } else if (block.type === "toolCall") {
+          blocks.push({
+            type: "tool_use",
+            id: block.id,
+            name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
+            input: block.arguments ?? {},
+          });
+        }
+      }
+      if (blocks.length === 0) continue;
+      params.push({
+        role: "assistant",
+        content: blocks,
+      });
+    } else if (msg.role === "toolResult") {
+      // Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
+      const toolResults: ContentBlockParam[] = [];
+
+      // Add the current tool result
+      toolResults.push({
+        type: "tool_result",
+        tool_use_id: msg.toolCallId,
+        content: convertContentBlocks(msg.content),
+        is_error: msg.isError,
+      });
+
+      // Look ahead for consecutive toolResult messages
+      let j = i + 1;
+      while (
+        j < transformedMessages.length &&
+        transformedMessages[j].role === "toolResult"
+      ) {
+        const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
+        toolResults.push({
+          type: "tool_result",
+          tool_use_id: nextMsg.toolCallId,
+          content: convertContentBlocks(nextMsg.content),
+          is_error: nextMsg.isError,
+        });
+        j++;
+      }
+
+      // Skip the messages we've already processed
+      i = j - 1;
+
+      // Add a single user message with all tool results
+      params.push({
+        role: "user",
+        content: toolResults,
+      });
+    }
+  }
+
+  // Add cache_control to the last user message to cache conversation history
+  if (cacheControl && params.length > 0) {
+    const lastMessage = params[params.length - 1];
+    if (lastMessage.role === "user") {
+      if (Array.isArray(lastMessage.content)) {
+        const lastBlock = lastMessage.content[lastMessage.content.length - 1];
+        if (
+          lastBlock &&
+          (lastBlock.type === "text" ||
+            lastBlock.type === "image" ||
+            lastBlock.type === "tool_result")
+        ) {
+          (lastBlock as any).cache_control = cacheControl;
+        }
+      } else if (typeof lastMessage.content === "string") {
+        lastMessage.content = [
+          {
+            type: "text",
+            text: lastMessage.content,
+            cache_control: cacheControl,
+          },
+        ] as any;
+      }
+    }
+  }
+
+  return params;
+}
+
+function convertTools(
+  tools: Tool[],
+  isOAuthToken: boolean,
+): Anthropic.Messages.Tool[] {
+  if (!tools) return [];
+
+  return tools.map((tool) => {
+    const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
+
+    return {
+      name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
+      description: tool.description,
+      input_schema: {
+        type: "object" as const,
+        properties: jsonSchema.properties || {},
+        required: jsonSchema.required || [],
+      },
+    };
+  });
+}
+
+function mapStopReason(
+  reason: Anthropic.Messages.StopReason | string,
+): StopReason {
+  switch (reason) {
+    case "end_turn":
+      return "stop";
+    case "max_tokens":
+      return "length";
+    case "tool_use":
+      return "toolUse";
+    case "refusal":
+      return "error";
+    case "pause_turn": // Stop is good enough -> resubmit
+      return "stop";
+    case "stop_sequence":
+      return "stop"; // We don't supply stop sequences, so this should never happen
+    case "sensitive": // Content flagged by safety filters (not yet in SDK types)
+      return "error";
+    default:
+      // Handle unknown stop reasons gracefully (API may add new values)
+      throw new Error(`Unhandled stop reason: ${reason}`);
+  }
+}
--- a/packages/ai/src/providers/azure-openai-responses.ts
+++ b/packages/ai/src/providers/azure-openai-responses.ts
@ -0,0 +1,297 @@
+import { AzureOpenAI } from "openai";
+import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
+import { getEnvApiKey } from "../env-api-keys.js";
+import { supportsXhigh } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  Context,
+  Model,
+  SimpleStreamOptions,
+  StreamFunction,
+  StreamOptions,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import {
+  convertResponsesMessages,
+  convertResponsesTools,
+  processResponsesStream,
+} from "./openai-responses-shared.js";
+import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+
+const DEFAULT_AZURE_API_VERSION = "v1";
+const AZURE_TOOL_CALL_PROVIDERS = new Set([
+  "openai",
+  "openai-codex",
+  "opencode",
+  "azure-openai-responses",
+]);
+
+function parseDeploymentNameMap(
+  value: string | undefined,
+): Map<string, string> {
+  const map = new Map<string, string>();
+  if (!value) return map;
+  for (const entry of value.split(",")) {
+    const trimmed = entry.trim();
+    if (!trimmed) continue;
+    const [modelId, deploymentName] = trimmed.split("=", 2);
+    if (!modelId || !deploymentName) continue;
+    map.set(modelId.trim(), deploymentName.trim());
+  }
+  return map;
+}
+
+function resolveDeploymentName(
+  model: Model<"azure-openai-responses">,
+  options?: AzureOpenAIResponsesOptions,
+): string {
+  if (options?.azureDeploymentName) {
+    return options.azureDeploymentName;
+  }
+  const mappedDeployment = parseDeploymentNameMap(
+    process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP,
+  ).get(model.id);
+  return mappedDeployment || model.id;
+}
+
+// Azure OpenAI Responses-specific options
+export interface AzureOpenAIResponsesOptions extends StreamOptions {
+  reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
+  reasoningSummary?: "auto" | "detailed" | "concise" | null;
+  azureApiVersion?: string;
+  azureResourceName?: string;
+  azureBaseUrl?: string;
+  azureDeploymentName?: string;
+}
+
+/**
+ * Generate function for Azure OpenAI Responses API
+ */
+export const streamAzureOpenAIResponses: StreamFunction<
+  "azure-openai-responses",
+  AzureOpenAIResponsesOptions
+> = (
+  model: Model<"azure-openai-responses">,
+  context: Context,
+  options?: AzureOpenAIResponsesOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  // Start async processing
+  (async () => {
+    const deploymentName = resolveDeploymentName(model, options);
+
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: "azure-openai-responses" as Api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    try {
+      // Create Azure OpenAI client
+      const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
+      const client = createClient(model, apiKey, options);
+      const params = buildParams(model, context, options, deploymentName);
+      options?.onPayload?.(params);
+      const openaiStream = await client.responses.create(
+        params,
+        options?.signal ? { signal: options.signal } : undefined,
+      );
+      stream.push({ type: "start", partial: output });
+
+      await processResponsesStream(openaiStream, output, stream, model);
+
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      for (const block of output.content)
+        delete (block as { index?: number }).index;
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+export const streamSimpleAzureOpenAIResponses: StreamFunction<
+  "azure-openai-responses",
+  SimpleStreamOptions
+> = (
+  model: Model<"azure-openai-responses">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+  if (!apiKey) {
+    throw new Error(`No API key for provider: ${model.provider}`);
+  }
+
+  const base = buildBaseOptions(model, options, apiKey);
+  const reasoningEffort = supportsXhigh(model)
+    ? options?.reasoning
+    : clampReasoning(options?.reasoning);
+
+  return streamAzureOpenAIResponses(model, context, {
+    ...base,
+    reasoningEffort,
+  } satisfies AzureOpenAIResponsesOptions);
+};
+
+function normalizeAzureBaseUrl(baseUrl: string): string {
+  return baseUrl.replace(/\/+$/, "");
+}
+
+function buildDefaultBaseUrl(resourceName: string): string {
+  return `https://${resourceName}.openai.azure.com/openai/v1`;
+}
+
+function resolveAzureConfig(
+  model: Model<"azure-openai-responses">,
+  options?: AzureOpenAIResponsesOptions,
+): { baseUrl: string; apiVersion: string } {
+  const apiVersion =
+    options?.azureApiVersion ||
+    process.env.AZURE_OPENAI_API_VERSION ||
+    DEFAULT_AZURE_API_VERSION;
+
+  const baseUrl =
+    options?.azureBaseUrl?.trim() ||
+    process.env.AZURE_OPENAI_BASE_URL?.trim() ||
+    undefined;
+  const resourceName =
+    options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;
+
+  let resolvedBaseUrl = baseUrl;
+
+  if (!resolvedBaseUrl && resourceName) {
+    resolvedBaseUrl = buildDefaultBaseUrl(resourceName);
+  }
+
+  if (!resolvedBaseUrl && model.baseUrl) {
+    resolvedBaseUrl = model.baseUrl;
+  }
+
+  if (!resolvedBaseUrl) {
+    throw new Error(
+      "Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.",
+    );
+  }
+
+  return {
+    baseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),
+    apiVersion,
+  };
+}
+
+function createClient(
+  model: Model<"azure-openai-responses">,
+  apiKey: string,
+  options?: AzureOpenAIResponsesOptions,
+) {
+  if (!apiKey) {
+    if (!process.env.AZURE_OPENAI_API_KEY) {
+      throw new Error(
+        "Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
+      );
+    }
+    apiKey = process.env.AZURE_OPENAI_API_KEY;
+  }
+
+  const headers = { ...model.headers };
+
+  if (options?.headers) {
+    Object.assign(headers, options.headers);
+  }
+
+  const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
+
+  return new AzureOpenAI({
+    apiKey,
+    apiVersion,
+    dangerouslyAllowBrowser: true,
+    defaultHeaders: headers,
+    baseURL: baseUrl,
+  });
+}
+
+function buildParams(
+  model: Model<"azure-openai-responses">,
+  context: Context,
+  options: AzureOpenAIResponsesOptions | undefined,
+  deploymentName: string,
+) {
+  const messages = convertResponsesMessages(
+    model,
+    context,
+    AZURE_TOOL_CALL_PROVIDERS,
+  );
+
+  const params: ResponseCreateParamsStreaming = {
+    model: deploymentName,
+    input: messages,
+    stream: true,
+    prompt_cache_key: options?.sessionId,
+  };
+
+  if (options?.maxTokens) {
+    params.max_output_tokens = options?.maxTokens;
+  }
+
+  if (options?.temperature !== undefined) {
+    params.temperature = options?.temperature;
+  }
+
+  if (context.tools) {
+    params.tools = convertResponsesTools(context.tools);
+  }
+
+  if (model.reasoning) {
+    if (options?.reasoningEffort || options?.reasoningSummary) {
+      params.reasoning = {
+        effort: options?.reasoningEffort || "medium",
+        summary: options?.reasoningSummary || "auto",
+      };
+      params.include = ["reasoning.encrypted_content"];
+    } else {
+      if (model.name.toLowerCase().startsWith("gpt-5")) {
+        // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
+        messages.push({
+          role: "developer",
+          content: [
+            {
+              type: "input_text",
+              text: "# Juice: 0 !important",
+            },
+          ],
+        });
+      }
+    }
+  }
+
+  return params;
+}
--- a/packages/ai/src/providers/github-copilot-headers.ts
+++ b/packages/ai/src/providers/github-copilot-headers.ts
@ -0,0 +1,37 @@
+import type { Message } from "../types.js";
+
+// Copilot expects X-Initiator to indicate whether the request is user-initiated
+// or agent-initiated (e.g. follow-up after assistant/tool messages).
+export function inferCopilotInitiator(messages: Message[]): "user" | "agent" {
+  const last = messages[messages.length - 1];
+  return last && last.role !== "user" ? "agent" : "user";
+}
+
+// Copilot requires Copilot-Vision-Request header when sending images
+export function hasCopilotVisionInput(messages: Message[]): boolean {
+  return messages.some((msg) => {
+    if (msg.role === "user" && Array.isArray(msg.content)) {
+      return msg.content.some((c) => c.type === "image");
+    }
+    if (msg.role === "toolResult" && Array.isArray(msg.content)) {
+      return msg.content.some((c) => c.type === "image");
+    }
+    return false;
+  });
+}
+
+export function buildCopilotDynamicHeaders(params: {
+  messages: Message[];
+  hasImages: boolean;
+}): Record<string, string> {
+  const headers: Record<string, string> = {
+    "X-Initiator": inferCopilotInitiator(params.messages),
+    "Openai-Intent": "conversation-edits",
+  };
+
+  if (params.hasImages) {
+    headers["Copilot-Vision-Request"] = "true";
+  }
+
+  return headers;
+}
--- a/packages/ai/src/providers/google-gemini-cli.ts
+++ b/packages/ai/src/providers/google-gemini-cli.ts
--- a/packages/ai/src/providers/google-shared.ts
+++ b/packages/ai/src/providers/google-shared.ts
@ -0,0 +1,373 @@
+/**
+ * Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
+ */
+
+import {
+  type Content,
+  FinishReason,
+  FunctionCallingConfigMode,
+  type Part,
+} from "@google/genai";
+import type {
+  Context,
+  ImageContent,
+  Model,
+  StopReason,
+  TextContent,
+  Tool,
+} from "../types.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import { transformMessages } from "./transform-messages.js";
+
+type GoogleApiType =
+  | "google-generative-ai"
+  | "google-gemini-cli"
+  | "google-vertex";
+
+/**
+ * Determines whether a streamed Gemini `Part` should be treated as "thinking".
+ *
+ * Protocol note (Gemini / Vertex AI thought signatures):
+ * - `thought: true` is the definitive marker for thinking content (thought summaries).
+ * - `thoughtSignature` is an encrypted representation of the model's internal thought process
+ *   used to preserve reasoning context across multi-turn interactions.
+ * - `thoughtSignature` can appear on ANY part type (text, functionCall, etc.) - it does NOT
+ *   indicate the part itself is thinking content.
+ * - For non-functionCall responses, the signature appears on the last part for context replay.
+ * - When persisting/replaying model outputs, signature-bearing parts must be preserved as-is;
+ *   do not merge/move signatures across parts.
+ *
+ * See: https://ai.google.dev/gemini-api/docs/thought-signatures
+ */
+export function isThinkingPart(
+  part: Pick<Part, "thought" | "thoughtSignature">,
+): boolean {
+  return part.thought === true;
+}
+
+/**
+ * Retain thought signatures during streaming.
+ *
+ * Some backends only send `thoughtSignature` on the first delta for a given part/block; later deltas may omit it.
+ * This helper preserves the last non-empty signature for the current block.
+ *
+ * Note: this does NOT merge or move signatures across distinct response parts. It only prevents
+ * a signature from being overwritten with `undefined` within the same streamed block.
+ */
+export function retainThoughtSignature(
+  existing: string | undefined,
+  incoming: string | undefined,
+): string | undefined {
+  if (typeof incoming === "string" && incoming.length > 0) return incoming;
+  return existing;
+}
+
+// Thought signatures must be base64 for Google APIs (TYPE_BYTES).
+const base64SignaturePattern = /^[A-Za-z0-9+/]+={0,2}$/;
+
+// Sentinel value that tells the Gemini API to skip thought signature validation.
+// Used for unsigned function call parts (e.g. replayed from providers without thought signatures).
+// See: https://ai.google.dev/gemini-api/docs/thought-signatures
+const SKIP_THOUGHT_SIGNATURE = "skip_thought_signature_validator";
+
+function isValidThoughtSignature(signature: string | undefined): boolean {
+  if (!signature) return false;
+  if (signature.length % 4 !== 0) return false;
+  return base64SignaturePattern.test(signature);
+}
+
+/**
+ * Only keep signatures from the same provider/model and with valid base64.
+ */
+function resolveThoughtSignature(
+  isSameProviderAndModel: boolean,
+  signature: string | undefined,
+): string | undefined {
+  return isSameProviderAndModel && isValidThoughtSignature(signature)
+    ? signature
+    : undefined;
+}
+
+/**
+ * Models via Google APIs that require explicit tool call IDs in function calls/responses.
+ */
+export function requiresToolCallId(modelId: string): boolean {
+  return modelId.startsWith("claude-") || modelId.startsWith("gpt-oss-");
+}
+
+/**
+ * Convert internal messages to Gemini Content[] format.
+ */
+export function convertMessages<T extends GoogleApiType>(
+  model: Model<T>,
+  context: Context,
+): Content[] {
+  const contents: Content[] = [];
+  const normalizeToolCallId = (id: string): string => {
+    if (!requiresToolCallId(model.id)) return id;
+    return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+  };
+
+  const transformedMessages = transformMessages(
+    context.messages,
+    model,
+    normalizeToolCallId,
+  );
+
+  for (const msg of transformedMessages) {
+    if (msg.role === "user") {
+      if (typeof msg.content === "string") {
+        contents.push({
+          role: "user",
+          parts: [{ text: sanitizeSurrogates(msg.content) }],
+        });
+      } else {
+        const parts: Part[] = msg.content.map((item) => {
+          if (item.type === "text") {
+            return { text: sanitizeSurrogates(item.text) };
+          } else {
+            return {
+              inlineData: {
+                mimeType: item.mimeType,
+                data: item.data,
+              },
+            };
+          }
+        });
+        const filteredParts = !model.input.includes("image")
+          ? parts.filter((p) => p.text !== undefined)
+          : parts;
+        if (filteredParts.length === 0) continue;
+        contents.push({
+          role: "user",
+          parts: filteredParts,
+        });
+      }
+    } else if (msg.role === "assistant") {
+      const parts: Part[] = [];
+      // Check if message is from same provider and model - only then keep thinking blocks
+      const isSameProviderAndModel =
+        msg.provider === model.provider && msg.model === model.id;
+
+      for (const block of msg.content) {
+        if (block.type === "text") {
+          // Skip empty text blocks - they can cause issues with some models (e.g. Claude via Antigravity)
+          if (!block.text || block.text.trim() === "") continue;
+          const thoughtSignature = resolveThoughtSignature(
+            isSameProviderAndModel,
+            block.textSignature,
+          );
+          parts.push({
+            text: sanitizeSurrogates(block.text),
+            ...(thoughtSignature && { thoughtSignature }),
+          });
+        } else if (block.type === "thinking") {
+          // Skip empty thinking blocks
+          if (!block.thinking || block.thinking.trim() === "") continue;
+          // Only keep as thinking block if same provider AND same model
+          // Otherwise convert to plain text (no tags to avoid model mimicking them)
+          if (isSameProviderAndModel) {
+            const thoughtSignature = resolveThoughtSignature(
+              isSameProviderAndModel,
+              block.thinkingSignature,
+            );
+            parts.push({
+              thought: true,
+              text: sanitizeSurrogates(block.thinking),
+              ...(thoughtSignature && { thoughtSignature }),
+            });
+          } else {
+            parts.push({
+              text: sanitizeSurrogates(block.thinking),
+            });
+          }
+        } else if (block.type === "toolCall") {
+          const thoughtSignature = resolveThoughtSignature(
+            isSameProviderAndModel,
+            block.thoughtSignature,
+          );
+          // Gemini 3 requires thoughtSignature on all function calls when thinking mode is enabled.
+          // Use the skip_thought_signature_validator sentinel for unsigned function calls
+          // (e.g. replayed from providers without thought signatures like Claude via Antigravity).
+          const isGemini3 = model.id.toLowerCase().includes("gemini-3");
+          const effectiveSignature =
+            thoughtSignature ||
+            (isGemini3 ? SKIP_THOUGHT_SIGNATURE : undefined);
+          const part: Part = {
+            functionCall: {
+              name: block.name,
+              args: block.arguments ?? {},
+              ...(requiresToolCallId(model.id) ? { id: block.id } : {}),
+            },
+            ...(effectiveSignature && { thoughtSignature: effectiveSignature }),
+          };
+          parts.push(part);
+        }
+      }
+
+      if (parts.length === 0) continue;
+      contents.push({
+        role: "model",
+        parts,
+      });
+    } else if (msg.role === "toolResult") {
+      // Extract text and image content
+      const textContent = msg.content.filter(
+        (c): c is TextContent => c.type === "text",
+      );
+      const textResult = textContent.map((c) => c.text).join("\n");
+      const imageContent = model.input.includes("image")
+        ? msg.content.filter((c): c is ImageContent => c.type === "image")
+        : [];
+
+      const hasText = textResult.length > 0;
+      const hasImages = imageContent.length > 0;
+
+      // Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts
+      // See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
+      // Older models don't support this, so we put images in a separate user message.
+      const supportsMultimodalFunctionResponse = model.id.includes("gemini-3");
+
+      // Use "output" key for success, "error" key for errors as per SDK documentation
+      const responseValue = hasText
+        ? sanitizeSurrogates(textResult)
+        : hasImages
+          ? "(see attached image)"
+          : "";
+
+      const imageParts: Part[] = imageContent.map((imageBlock) => ({
+        inlineData: {
+          mimeType: imageBlock.mimeType,
+          data: imageBlock.data,
+        },
+      }));
+
+      const includeId = requiresToolCallId(model.id);
+      const functionResponsePart: Part = {
+        functionResponse: {
+          name: msg.toolName,
+          response: msg.isError
+            ? { error: responseValue }
+            : { output: responseValue },
+          // Nest images inside functionResponse.parts for Gemini 3
+          ...(hasImages &&
+            supportsMultimodalFunctionResponse && { parts: imageParts }),
+          ...(includeId ? { id: msg.toolCallId } : {}),
+        },
+      };
+
+      // Cloud Code Assist API requires all function responses to be in a single user turn.
+      // Check if the last content is already a user turn with function responses and merge.
+      const lastContent = contents[contents.length - 1];
+      if (
+        lastContent?.role === "user" &&
+        lastContent.parts?.some((p) => p.functionResponse)
+      ) {
+        lastContent.parts.push(functionResponsePart);
+      } else {
+        contents.push({
+          role: "user",
+          parts: [functionResponsePart],
+        });
+      }
+
+      // For older models, add images in a separate user message
+      if (hasImages && !supportsMultimodalFunctionResponse) {
+        contents.push({
+          role: "user",
+          parts: [{ text: "Tool result image:" }, ...imageParts],
+        });
+      }
+    }
+  }
+
+  return contents;
+}
+
+/**
+ * Convert tools to Gemini function declarations format.
+ *
+ * By default uses `parametersJsonSchema` which supports full JSON Schema (including
+ * anyOf, oneOf, const, etc.). Set `useParameters` to true to use the legacy `parameters`
+ * field instead (OpenAPI 3.03 Schema). This is needed for Cloud Code Assist with Claude
+ * models, where the API translates `parameters` into Anthropic's `input_schema`.
+ */
+export function convertTools(
+  tools: Tool[],
+  useParameters = false,
+): { functionDeclarations: Record<string, unknown>[] }[] | undefined {
+  if (tools.length === 0) return undefined;
+  return [
+    {
+      functionDeclarations: tools.map((tool) => ({
+        name: tool.name,
+        description: tool.description,
+        ...(useParameters
+          ? { parameters: tool.parameters }
+          : { parametersJsonSchema: tool.parameters }),
+      })),
+    },
+  ];
+}
+
+/**
+ * Map tool choice string to Gemini FunctionCallingConfigMode.
+ */
+export function mapToolChoice(choice: string): FunctionCallingConfigMode {
+  switch (choice) {
+    case "auto":
+      return FunctionCallingConfigMode.AUTO;
+    case "none":
+      return FunctionCallingConfigMode.NONE;
+    case "any":
+      return FunctionCallingConfigMode.ANY;
+    default:
+      return FunctionCallingConfigMode.AUTO;
+  }
+}
+
+/**
+ * Map Gemini FinishReason to our StopReason.
+ */
+export function mapStopReason(reason: FinishReason): StopReason {
+  switch (reason) {
+    case FinishReason.STOP:
+      return "stop";
+    case FinishReason.MAX_TOKENS:
+      return "length";
+    case FinishReason.BLOCKLIST:
+    case FinishReason.PROHIBITED_CONTENT:
+    case FinishReason.SPII:
+    case FinishReason.SAFETY:
+    case FinishReason.IMAGE_SAFETY:
+    case FinishReason.IMAGE_PROHIBITED_CONTENT:
+    case FinishReason.IMAGE_RECITATION:
+    case FinishReason.IMAGE_OTHER:
+    case FinishReason.RECITATION:
+    case FinishReason.FINISH_REASON_UNSPECIFIED:
+    case FinishReason.OTHER:
+    case FinishReason.LANGUAGE:
+    case FinishReason.MALFORMED_FUNCTION_CALL:
+    case FinishReason.UNEXPECTED_TOOL_CALL:
+    case FinishReason.NO_IMAGE:
+      return "error";
+    default: {
+      const _exhaustive: never = reason;
+      throw new Error(`Unhandled stop reason: ${_exhaustive}`);
+    }
+  }
+}
+
+/**
+ * Map string finish reason to our StopReason (for raw API responses).
+ */
+export function mapStopReasonString(reason: string): StopReason {
+  switch (reason) {
+    case "STOP":
+      return "stop";
+    case "MAX_TOKENS":
+      return "length";
+    default:
+      return "error";
+  }
+}
--- a/packages/ai/src/providers/google-vertex.ts
+++ b/packages/ai/src/providers/google-vertex.ts
@ -0,0 +1,529 @@
+import {
+  type GenerateContentConfig,
+  type GenerateContentParameters,
+  GoogleGenAI,
+  type ThinkingConfig,
+  ThinkingLevel,
+} from "@google/genai";
+import { calculateCost } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  Context,
+  Model,
+  ThinkingLevel as PiThinkingLevel,
+  SimpleStreamOptions,
+  StreamFunction,
+  StreamOptions,
+  TextContent,
+  ThinkingBudgets,
+  ThinkingContent,
+  ToolCall,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import type { GoogleThinkingLevel } from "./google-gemini-cli.js";
+import {
+  convertMessages,
+  convertTools,
+  isThinkingPart,
+  mapStopReason,
+  mapToolChoice,
+  retainThoughtSignature,
+} from "./google-shared.js";
+import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+
+export interface GoogleVertexOptions extends StreamOptions {
+  toolChoice?: "auto" | "none" | "any";
+  thinking?: {
+    enabled: boolean;
+    budgetTokens?: number; // -1 for dynamic, 0 to disable
+    level?: GoogleThinkingLevel;
+  };
+  project?: string;
+  location?: string;
+}
+
+const API_VERSION = "v1";
+
+const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, ThinkingLevel> = {
+  THINKING_LEVEL_UNSPECIFIED: ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
+  MINIMAL: ThinkingLevel.MINIMAL,
+  LOW: ThinkingLevel.LOW,
+  MEDIUM: ThinkingLevel.MEDIUM,
+  HIGH: ThinkingLevel.HIGH,
+};
+
+// Counter for generating unique tool call IDs
+let toolCallCounter = 0;
+
+export const streamGoogleVertex: StreamFunction<
+  "google-vertex",
+  GoogleVertexOptions
+> = (
+  model: Model<"google-vertex">,
+  context: Context,
+  options?: GoogleVertexOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: "google-vertex" as Api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    try {
+      const project = resolveProject(options);
+      const location = resolveLocation(options);
+      const client = createClient(model, project, location, options?.headers);
+      const params = buildParams(model, context, options);
+      options?.onPayload?.(params);
+      const googleStream = await client.models.generateContentStream(params);
+
+      stream.push({ type: "start", partial: output });
+      let currentBlock: TextContent | ThinkingContent | null = null;
+      const blocks = output.content;
+      const blockIndex = () => blocks.length - 1;
+      for await (const chunk of googleStream) {
+        const candidate = chunk.candidates?.[0];
+        if (candidate?.content?.parts) {
+          for (const part of candidate.content.parts) {
+            if (part.text !== undefined) {
+              const isThinking = isThinkingPart(part);
+              if (
+                !currentBlock ||
+                (isThinking && currentBlock.type !== "thinking") ||
+                (!isThinking && currentBlock.type !== "text")
+              ) {
+                if (currentBlock) {
+                  if (currentBlock.type === "text") {
+                    stream.push({
+                      type: "text_end",
+                      contentIndex: blocks.length - 1,
+                      content: currentBlock.text,
+                      partial: output,
+                    });
+                  } else {
+                    stream.push({
+                      type: "thinking_end",
+                      contentIndex: blockIndex(),
+                      content: currentBlock.thinking,
+                      partial: output,
+                    });
+                  }
+                }
+                if (isThinking) {
+                  currentBlock = {
+                    type: "thinking",
+                    thinking: "",
+                    thinkingSignature: undefined,
+                  };
+                  output.content.push(currentBlock);
+                  stream.push({
+                    type: "thinking_start",
+                    contentIndex: blockIndex(),
+                    partial: output,
+                  });
+                } else {
+                  currentBlock = { type: "text", text: "" };
+                  output.content.push(currentBlock);
+                  stream.push({
+                    type: "text_start",
+                    contentIndex: blockIndex(),
+                    partial: output,
+                  });
+                }
+              }
+              if (currentBlock.type === "thinking") {
+                currentBlock.thinking += part.text;
+                currentBlock.thinkingSignature = retainThoughtSignature(
+                  currentBlock.thinkingSignature,
+                  part.thoughtSignature,
+                );
+                stream.push({
+                  type: "thinking_delta",
+                  contentIndex: blockIndex(),
+                  delta: part.text,
+                  partial: output,
+                });
+              } else {
+                currentBlock.text += part.text;
+                currentBlock.textSignature = retainThoughtSignature(
+                  currentBlock.textSignature,
+                  part.thoughtSignature,
+                );
+                stream.push({
+                  type: "text_delta",
+                  contentIndex: blockIndex(),
+                  delta: part.text,
+                  partial: output,
+                });
+              }
+            }
+
+            if (part.functionCall) {
+              if (currentBlock) {
+                if (currentBlock.type === "text") {
+                  stream.push({
+                    type: "text_end",
+                    contentIndex: blockIndex(),
+                    content: currentBlock.text,
+                    partial: output,
+                  });
+                } else {
+                  stream.push({
+                    type: "thinking_end",
+                    contentIndex: blockIndex(),
+                    content: currentBlock.thinking,
+                    partial: output,
+                  });
+                }
+                currentBlock = null;
+              }
+
+              const providedId = part.functionCall.id;
+              const needsNewId =
+                !providedId ||
+                output.content.some(
+                  (b) => b.type === "toolCall" && b.id === providedId,
+                );
+              const toolCallId = needsNewId
+                ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
+                : providedId;
+
+              const toolCall: ToolCall = {
+                type: "toolCall",
+                id: toolCallId,
+                name: part.functionCall.name || "",
+                arguments:
+                  (part.functionCall.args as Record<string, any>) ?? {},
+                ...(part.thoughtSignature && {
+                  thoughtSignature: part.thoughtSignature,
+                }),
+              };
+
+              output.content.push(toolCall);
+              stream.push({
+                type: "toolcall_start",
+                contentIndex: blockIndex(),
+                partial: output,
+              });
+              stream.push({
+                type: "toolcall_delta",
+                contentIndex: blockIndex(),
+                delta: JSON.stringify(toolCall.arguments),
+                partial: output,
+              });
+              stream.push({
+                type: "toolcall_end",
+                contentIndex: blockIndex(),
+                toolCall,
+                partial: output,
+              });
+            }
+          }
+        }
+
+        if (candidate?.finishReason) {
+          output.stopReason = mapStopReason(candidate.finishReason);
+          if (output.content.some((b) => b.type === "toolCall")) {
+            output.stopReason = "toolUse";
+          }
+        }
+
+        if (chunk.usageMetadata) {
+          output.usage = {
+            input: chunk.usageMetadata.promptTokenCount || 0,
+            output:
+              (chunk.usageMetadata.candidatesTokenCount || 0) +
+              (chunk.usageMetadata.thoughtsTokenCount || 0),
+            cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
+            cacheWrite: 0,
+            totalTokens: chunk.usageMetadata.totalTokenCount || 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          };
+          calculateCost(model, output.usage);
+        }
+      }
+
+      if (currentBlock) {
+        if (currentBlock.type === "text") {
+          stream.push({
+            type: "text_end",
+            contentIndex: blockIndex(),
+            content: currentBlock.text,
+            partial: output,
+          });
+        } else {
+          stream.push({
+            type: "thinking_end",
+            contentIndex: blockIndex(),
+            content: currentBlock.thinking,
+            partial: output,
+          });
+        }
+      }
+
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      // Remove internal index property used during streaming
+      for (const block of output.content) {
+        if ("index" in block) {
+          delete (block as { index?: number }).index;
+        }
+      }
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+export const streamSimpleGoogleVertex: StreamFunction<
+  "google-vertex",
+  SimpleStreamOptions
+> = (
+  model: Model<"google-vertex">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const base = buildBaseOptions(model, options, undefined);
+  if (!options?.reasoning) {
+    return streamGoogleVertex(model, context, {
+      ...base,
+      thinking: { enabled: false },
+    } satisfies GoogleVertexOptions);
+  }
+
+  const effort = clampReasoning(options.reasoning)!;
+  const geminiModel = model as unknown as Model<"google-generative-ai">;
+
+  if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
+    return streamGoogleVertex(model, context, {
+      ...base,
+      thinking: {
+        enabled: true,
+        level: getGemini3ThinkingLevel(effort, geminiModel),
+      },
+    } satisfies GoogleVertexOptions);
+  }
+
+  return streamGoogleVertex(model, context, {
+    ...base,
+    thinking: {
+      enabled: true,
+      budgetTokens: getGoogleBudget(
+        geminiModel,
+        effort,
+        options.thinkingBudgets,
+      ),
+    },
+  } satisfies GoogleVertexOptions);
+};
+
+function createClient(
+  model: Model<"google-vertex">,
+  project: string,
+  location: string,
+  optionsHeaders?: Record<string, string>,
+): GoogleGenAI {
+  const httpOptions: { headers?: Record<string, string> } = {};
+
+  if (model.headers || optionsHeaders) {
+    httpOptions.headers = { ...model.headers, ...optionsHeaders };
+  }
+
+  const hasHttpOptions = Object.values(httpOptions).some(Boolean);
+
+  return new GoogleGenAI({
+    vertexai: true,
+    project,
+    location,
+    apiVersion: API_VERSION,
+    httpOptions: hasHttpOptions ? httpOptions : undefined,
+  });
+}
+
+function resolveProject(options?: GoogleVertexOptions): string {
+  const project =
+    options?.project ||
+    process.env.GOOGLE_CLOUD_PROJECT ||
+    process.env.GCLOUD_PROJECT;
+  if (!project) {
+    throw new Error(
+      "Vertex AI requires a project ID. Set GOOGLE_CLOUD_PROJECT/GCLOUD_PROJECT or pass project in options.",
+    );
+  }
+  return project;
+}
+
+function resolveLocation(options?: GoogleVertexOptions): string {
+  const location = options?.location || process.env.GOOGLE_CLOUD_LOCATION;
+  if (!location) {
+    throw new Error(
+      "Vertex AI requires a location. Set GOOGLE_CLOUD_LOCATION or pass location in options.",
+    );
+  }
+  return location;
+}
+
+function buildParams(
+  model: Model<"google-vertex">,
+  context: Context,
+  options: GoogleVertexOptions = {},
+): GenerateContentParameters {
+  const contents = convertMessages(model, context);
+
+  const generationConfig: GenerateContentConfig = {};
+  if (options.temperature !== undefined) {
+    generationConfig.temperature = options.temperature;
+  }
+  if (options.maxTokens !== undefined) {
+    generationConfig.maxOutputTokens = options.maxTokens;
+  }
+
+  const config: GenerateContentConfig = {
+    ...(Object.keys(generationConfig).length > 0 && generationConfig),
+    ...(context.systemPrompt && {
+      systemInstruction: sanitizeSurrogates(context.systemPrompt),
+    }),
+    ...(context.tools &&
+      context.tools.length > 0 && { tools: convertTools(context.tools) }),
+  };
+
+  if (context.tools && context.tools.length > 0 && options.toolChoice) {
+    config.toolConfig = {
+      functionCallingConfig: {
+        mode: mapToolChoice(options.toolChoice),
+      },
+    };
+  } else {
+    config.toolConfig = undefined;
+  }
+
+  if (options.thinking?.enabled && model.reasoning) {
+    const thinkingConfig: ThinkingConfig = { includeThoughts: true };
+    if (options.thinking.level !== undefined) {
+      thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level];
+    } else if (options.thinking.budgetTokens !== undefined) {
+      thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
+    }
+    config.thinkingConfig = thinkingConfig;
+  }
+
+  if (options.signal) {
+    if (options.signal.aborted) {
+      throw new Error("Request aborted");
+    }
+    config.abortSignal = options.signal;
+  }
+
+  const params: GenerateContentParameters = {
+    model: model.id,
+    contents,
+    config,
+  };
+
+  return params;
+}
+
+type ClampedThinkingLevel = Exclude<PiThinkingLevel, "xhigh">;
+
+function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
+  return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase());
+}
+
+function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
+  return /gemini-3(?:\.\d+)?-flash/.test(model.id.toLowerCase());
+}
+
+function getGemini3ThinkingLevel(
+  effort: ClampedThinkingLevel,
+  model: Model<"google-generative-ai">,
+): GoogleThinkingLevel {
+  if (isGemini3ProModel(model)) {
+    switch (effort) {
+      case "minimal":
+      case "low":
+        return "LOW";
+      case "medium":
+      case "high":
+        return "HIGH";
+    }
+  }
+  switch (effort) {
+    case "minimal":
+      return "MINIMAL";
+    case "low":
+      return "LOW";
+    case "medium":
+      return "MEDIUM";
+    case "high":
+      return "HIGH";
+  }
+}
+
+function getGoogleBudget(
+  model: Model<"google-generative-ai">,
+  effort: ClampedThinkingLevel,
+  customBudgets?: ThinkingBudgets,
+): number {
+  if (customBudgets?.[effort] !== undefined) {
+    return customBudgets[effort]!;
+  }
+
+  if (model.id.includes("2.5-pro")) {
+    const budgets: Record<ClampedThinkingLevel, number> = {
+      minimal: 128,
+      low: 2048,
+      medium: 8192,
+      high: 32768,
+    };
+    return budgets[effort];
+  }
+
+  if (model.id.includes("2.5-flash")) {
+    const budgets: Record<ClampedThinkingLevel, number> = {
+      minimal: 128,
+      low: 2048,
+      medium: 8192,
+      high: 24576,
+    };
+    return budgets[effort];
+  }
+
+  return -1;
+}
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -0,0 +1,501 @@
+import {
+  type GenerateContentConfig,
+  type GenerateContentParameters,
+  GoogleGenAI,
+  type ThinkingConfig,
+} from "@google/genai";
+import { getEnvApiKey } from "../env-api-keys.js";
+import { calculateCost } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  Context,
+  Model,
+  SimpleStreamOptions,
+  StreamFunction,
+  StreamOptions,
+  TextContent,
+  ThinkingBudgets,
+  ThinkingContent,
+  ThinkingLevel,
+  ToolCall,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import type { GoogleThinkingLevel } from "./google-gemini-cli.js";
+import {
+  convertMessages,
+  convertTools,
+  isThinkingPart,
+  mapStopReason,
+  mapToolChoice,
+  retainThoughtSignature,
+} from "./google-shared.js";
+import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+
+export interface GoogleOptions extends StreamOptions {
+  toolChoice?: "auto" | "none" | "any";
+  thinking?: {
+    enabled: boolean;
+    budgetTokens?: number; // -1 for dynamic, 0 to disable
+    level?: GoogleThinkingLevel;
+  };
+}
+
+// Counter for generating unique tool call IDs
+let toolCallCounter = 0;
+
+export const streamGoogle: StreamFunction<
+  "google-generative-ai",
+  GoogleOptions
+> = (
+  model: Model<"google-generative-ai">,
+  context: Context,
+  options?: GoogleOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: "google-generative-ai" as Api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    try {
+      const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
+      const client = createClient(model, apiKey, options?.headers);
+      const params = buildParams(model, context, options);
+      options?.onPayload?.(params);
+      const googleStream = await client.models.generateContentStream(params);
+
+      stream.push({ type: "start", partial: output });
+      let currentBlock: TextContent | ThinkingContent | null = null;
+      const blocks = output.content;
+      const blockIndex = () => blocks.length - 1;
+      for await (const chunk of googleStream) {
+        const candidate = chunk.candidates?.[0];
+        if (candidate?.content?.parts) {
+          for (const part of candidate.content.parts) {
+            if (part.text !== undefined) {
+              const isThinking = isThinkingPart(part);
+              if (
+                !currentBlock ||
+                (isThinking && currentBlock.type !== "thinking") ||
+                (!isThinking && currentBlock.type !== "text")
+              ) {
+                if (currentBlock) {
+                  if (currentBlock.type === "text") {
+                    stream.push({
+                      type: "text_end",
+                      contentIndex: blocks.length - 1,
+                      content: currentBlock.text,
+                      partial: output,
+                    });
+                  } else {
+                    stream.push({
+                      type: "thinking_end",
+                      contentIndex: blockIndex(),
+                      content: currentBlock.thinking,
+                      partial: output,
+                    });
+                  }
+                }
+                if (isThinking) {
+                  currentBlock = {
+                    type: "thinking",
+                    thinking: "",
+                    thinkingSignature: undefined,
+                  };
+                  output.content.push(currentBlock);
+                  stream.push({
+                    type: "thinking_start",
+                    contentIndex: blockIndex(),
+                    partial: output,
+                  });
+                } else {
+                  currentBlock = { type: "text", text: "" };
+                  output.content.push(currentBlock);
+                  stream.push({
+                    type: "text_start",
+                    contentIndex: blockIndex(),
+                    partial: output,
+                  });
+                }
+              }
+              if (currentBlock.type === "thinking") {
+                currentBlock.thinking += part.text;
+                currentBlock.thinkingSignature = retainThoughtSignature(
+                  currentBlock.thinkingSignature,
+                  part.thoughtSignature,
+                );
+                stream.push({
+                  type: "thinking_delta",
+                  contentIndex: blockIndex(),
+                  delta: part.text,
+                  partial: output,
+                });
+              } else {
+                currentBlock.text += part.text;
+                currentBlock.textSignature = retainThoughtSignature(
+                  currentBlock.textSignature,
+                  part.thoughtSignature,
+                );
+                stream.push({
+                  type: "text_delta",
+                  contentIndex: blockIndex(),
+                  delta: part.text,
+                  partial: output,
+                });
+              }
+            }
+
+            if (part.functionCall) {
+              if (currentBlock) {
+                if (currentBlock.type === "text") {
+                  stream.push({
+                    type: "text_end",
+                    contentIndex: blockIndex(),
+                    content: currentBlock.text,
+                    partial: output,
+                  });
+                } else {
+                  stream.push({
+                    type: "thinking_end",
+                    contentIndex: blockIndex(),
+                    content: currentBlock.thinking,
+                    partial: output,
+                  });
+                }
+                currentBlock = null;
+              }
+
+              // Generate unique ID if not provided or if it's a duplicate
+              const providedId = part.functionCall.id;
+              const needsNewId =
+                !providedId ||
+                output.content.some(
+                  (b) => b.type === "toolCall" && b.id === providedId,
+                );
+              const toolCallId = needsNewId
+                ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
+                : providedId;
+
+              const toolCall: ToolCall = {
+                type: "toolCall",
+                id: toolCallId,
+                name: part.functionCall.name || "",
+                arguments:
+                  (part.functionCall.args as Record<string, any>) ?? {},
+                ...(part.thoughtSignature && {
+                  thoughtSignature: part.thoughtSignature,
+                }),
+              };
+
+              output.content.push(toolCall);
+              stream.push({
+                type: "toolcall_start",
+                contentIndex: blockIndex(),
+                partial: output,
+              });
+              stream.push({
+                type: "toolcall_delta",
+                contentIndex: blockIndex(),
+                delta: JSON.stringify(toolCall.arguments),
+                partial: output,
+              });
+              stream.push({
+                type: "toolcall_end",
+                contentIndex: blockIndex(),
+                toolCall,
+                partial: output,
+              });
+            }
+          }
+        }
+
+        if (candidate?.finishReason) {
+          output.stopReason = mapStopReason(candidate.finishReason);
+          if (output.content.some((b) => b.type === "toolCall")) {
+            output.stopReason = "toolUse";
+          }
+        }
+
+        if (chunk.usageMetadata) {
+          output.usage = {
+            input: chunk.usageMetadata.promptTokenCount || 0,
+            output:
+              (chunk.usageMetadata.candidatesTokenCount || 0) +
+              (chunk.usageMetadata.thoughtsTokenCount || 0),
+            cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
+            cacheWrite: 0,
+            totalTokens: chunk.usageMetadata.totalTokenCount || 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          };
+          calculateCost(model, output.usage);
+        }
+      }
+
+      if (currentBlock) {
+        if (currentBlock.type === "text") {
+          stream.push({
+            type: "text_end",
+            contentIndex: blockIndex(),
+            content: currentBlock.text,
+            partial: output,
+          });
+        } else {
+          stream.push({
+            type: "thinking_end",
+            contentIndex: blockIndex(),
+            content: currentBlock.thinking,
+            partial: output,
+          });
+        }
+      }
+
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      // Remove internal index property used during streaming
+      for (const block of output.content) {
+        if ("index" in block) {
+          delete (block as { index?: number }).index;
+        }
+      }
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+export const streamSimpleGoogle: StreamFunction<
+  "google-generative-ai",
+  SimpleStreamOptions
+> = (
+  model: Model<"google-generative-ai">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+  if (!apiKey) {
+    throw new Error(`No API key for provider: ${model.provider}`);
+  }
+
+  const base = buildBaseOptions(model, options, apiKey);
+  if (!options?.reasoning) {
+    return streamGoogle(model, context, {
+      ...base,
+      thinking: { enabled: false },
+    } satisfies GoogleOptions);
+  }
+
+  const effort = clampReasoning(options.reasoning)!;
+  const googleModel = model as Model<"google-generative-ai">;
+
+  if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
+    return streamGoogle(model, context, {
+      ...base,
+      thinking: {
+        enabled: true,
+        level: getGemini3ThinkingLevel(effort, googleModel),
+      },
+    } satisfies GoogleOptions);
+  }
+
+  return streamGoogle(model, context, {
+    ...base,
+    thinking: {
+      enabled: true,
+      budgetTokens: getGoogleBudget(
+        googleModel,
+        effort,
+        options.thinkingBudgets,
+      ),
+    },
+  } satisfies GoogleOptions);
+};
+
+function createClient(
+  model: Model<"google-generative-ai">,
+  apiKey?: string,
+  optionsHeaders?: Record<string, string>,
+): GoogleGenAI {
+  const httpOptions: {
+    baseUrl?: string;
+    apiVersion?: string;
+    headers?: Record<string, string>;
+  } = {};
+  if (model.baseUrl) {
+    httpOptions.baseUrl = model.baseUrl;
+    httpOptions.apiVersion = ""; // baseUrl already includes version path, don't append
+  }
+  if (model.headers || optionsHeaders) {
+    httpOptions.headers = { ...model.headers, ...optionsHeaders };
+  }
+
+  return new GoogleGenAI({
+    apiKey,
+    httpOptions: Object.keys(httpOptions).length > 0 ? httpOptions : undefined,
+  });
+}
+
+function buildParams(
+  model: Model<"google-generative-ai">,
+  context: Context,
+  options: GoogleOptions = {},
+): GenerateContentParameters {
+  const contents = convertMessages(model, context);
+
+  const generationConfig: GenerateContentConfig = {};
+  if (options.temperature !== undefined) {
+    generationConfig.temperature = options.temperature;
+  }
+  if (options.maxTokens !== undefined) {
+    generationConfig.maxOutputTokens = options.maxTokens;
+  }
+
+  const config: GenerateContentConfig = {
+    ...(Object.keys(generationConfig).length > 0 && generationConfig),
+    ...(context.systemPrompt && {
+      systemInstruction: sanitizeSurrogates(context.systemPrompt),
+    }),
+    ...(context.tools &&
+      context.tools.length > 0 && { tools: convertTools(context.tools) }),
+  };
+
+  if (context.tools && context.tools.length > 0 && options.toolChoice) {
+    config.toolConfig = {
+      functionCallingConfig: {
+        mode: mapToolChoice(options.toolChoice),
+      },
+    };
+  } else {
+    config.toolConfig = undefined;
+  }
+
+  if (options.thinking?.enabled && model.reasoning) {
+    const thinkingConfig: ThinkingConfig = { includeThoughts: true };
+    if (options.thinking.level !== undefined) {
+      // Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values
+      thinkingConfig.thinkingLevel = options.thinking.level as any;
+    } else if (options.thinking.budgetTokens !== undefined) {
+      thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
+    }
+    config.thinkingConfig = thinkingConfig;
+  }
+
+  if (options.signal) {
+    if (options.signal.aborted) {
+      throw new Error("Request aborted");
+    }
+    config.abortSignal = options.signal;
+  }
+
+  const params: GenerateContentParameters = {
+    model: model.id,
+    contents,
+    config,
+  };
+
+  return params;
+}
+
+type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
+
+function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
+  return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase());
+}
+
+function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
+  return /gemini-3(?:\.\d+)?-flash/.test(model.id.toLowerCase());
+}
+
+function getGemini3ThinkingLevel(
+  effort: ClampedThinkingLevel,
+  model: Model<"google-generative-ai">,
+): GoogleThinkingLevel {
+  if (isGemini3ProModel(model)) {
+    switch (effort) {
+      case "minimal":
+      case "low":
+        return "LOW";
+      case "medium":
+      case "high":
+        return "HIGH";
+    }
+  }
+  switch (effort) {
+    case "minimal":
+      return "MINIMAL";
+    case "low":
+      return "LOW";
+    case "medium":
+      return "MEDIUM";
+    case "high":
+      return "HIGH";
+  }
+}
+
+function getGoogleBudget(
+  model: Model<"google-generative-ai">,
+  effort: ClampedThinkingLevel,
+  customBudgets?: ThinkingBudgets,
+): number {
+  if (customBudgets?.[effort] !== undefined) {
+    return customBudgets[effort]!;
+  }
+
+  if (model.id.includes("2.5-pro")) {
+    const budgets: Record<ClampedThinkingLevel, number> = {
+      minimal: 128,
+      low: 2048,
+      medium: 8192,
+      high: 32768,
+    };
+    return budgets[effort];
+  }
+
+  if (model.id.includes("2.5-flash")) {
+    const budgets: Record<ClampedThinkingLevel, number> = {
+      minimal: 128,
+      low: 2048,
+      medium: 8192,
+      high: 24576,
+    };
+    return budgets[effort];
+  }
+
+  return -1;
+}
--- a/packages/ai/src/providers/mistral.ts
+++ b/packages/ai/src/providers/mistral.ts
@ -0,0 +1,688 @@
+import { Mistral } from "@mistralai/mistralai";
+import type { RequestOptions } from "@mistralai/mistralai/lib/sdks.js";
+import type {
+  ChatCompletionStreamRequest,
+  ChatCompletionStreamRequestMessages,
+  CompletionEvent,
+  ContentChunk,
+  FunctionTool,
+} from "@mistralai/mistralai/models/components/index.js";
+import { getEnvApiKey } from "../env-api-keys.js";
+import { calculateCost } from "../models.js";
+import type {
+  AssistantMessage,
+  Context,
+  Message,
+  Model,
+  SimpleStreamOptions,
+  StopReason,
+  StreamFunction,
+  StreamOptions,
+  TextContent,
+  ThinkingContent,
+  Tool,
+  ToolCall,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { shortHash } from "../utils/hash.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { transformMessages } from "./transform-messages.js";
+
+const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
+const MAX_MISTRAL_ERROR_BODY_CHARS = 4000;
+
+/**
+ * Provider-specific options for the Mistral API.
+ */
+export interface MistralOptions extends StreamOptions {
+  toolChoice?:
+    | "auto"
+    | "none"
+    | "any"
+    | "required"
+    | { type: "function"; function: { name: string } };
+  promptMode?: "reasoning";
+}
+
+/**
+ * Stream responses from Mistral using `chat.stream`.
+ */
+export const streamMistral: StreamFunction<
+  "mistral-conversations",
+  MistralOptions
+> = (
+  model: Model<"mistral-conversations">,
+  context: Context,
+  options?: MistralOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  (async () => {
+    const output = createOutput(model);
+
+    try {
+      const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+      if (!apiKey) {
+        throw new Error(`No API key for provider: ${model.provider}`);
+      }
+
+      // Intentionally per-request: avoids shared SDK mutable state across concurrent consumers.
+      const mistral = new Mistral({
+        apiKey,
+        serverURL: model.baseUrl,
+      });
+
+      const normalizeMistralToolCallId = createMistralToolCallIdNormalizer();
+      const transformedMessages = transformMessages(
+        context.messages,
+        model,
+        (id) => normalizeMistralToolCallId(id),
+      );
+
+      const payload = buildChatPayload(
+        model,
+        context,
+        transformedMessages,
+        options,
+      );
+      options?.onPayload?.(payload);
+      const mistralStream = await mistral.chat.stream(
+        payload,
+        buildRequestOptions(model, options),
+      );
+      stream.push({ type: "start", partial: output });
+      await consumeChatStream(model, output, stream, mistralStream);
+
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage = formatMistralError(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+/**
+ * Maps provider-agnostic `SimpleStreamOptions` to Mistral options.
+ */
+export const streamSimpleMistral: StreamFunction<
+  "mistral-conversations",
+  SimpleStreamOptions
+> = (
+  model: Model<"mistral-conversations">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+  if (!apiKey) {
+    throw new Error(`No API key for provider: ${model.provider}`);
+  }
+
+  const base = buildBaseOptions(model, options, apiKey);
+  const reasoning = clampReasoning(options?.reasoning);
+
+  return streamMistral(model, context, {
+    ...base,
+    promptMode: model.reasoning && reasoning ? "reasoning" : undefined,
+  } satisfies MistralOptions);
+};
+
+function createOutput(model: Model<"mistral-conversations">): AssistantMessage {
+  return {
+    role: "assistant",
+    content: [],
+    api: model.api,
+    provider: model.provider,
+    model: model.id,
+    usage: {
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 0,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "stop",
+    timestamp: Date.now(),
+  };
+}
+
+function createMistralToolCallIdNormalizer(): (id: string) => string {
+  const idMap = new Map<string, string>();
+  const reverseMap = new Map<string, string>();
+
+  return (id: string): string => {
+    const existing = idMap.get(id);
+    if (existing) return existing;
+
+    let attempt = 0;
+    while (true) {
+      const candidate = deriveMistralToolCallId(id, attempt);
+      const owner = reverseMap.get(candidate);
+      if (!owner || owner === id) {
+        idMap.set(id, candidate);
+        reverseMap.set(candidate, id);
+        return candidate;
+      }
+      attempt++;
+    }
+  };
+}
+
+function deriveMistralToolCallId(id: string, attempt: number): string {
+  const normalized = id.replace(/[^a-zA-Z0-9]/g, "");
+  if (attempt === 0 && normalized.length === MISTRAL_TOOL_CALL_ID_LENGTH)
+    return normalized;
+  const seedBase = normalized || id;
+  const seed = attempt === 0 ? seedBase : `${seedBase}:${attempt}`;
+  return shortHash(seed)
+    .replace(/[^a-zA-Z0-9]/g, "")
+    .slice(0, MISTRAL_TOOL_CALL_ID_LENGTH);
+}
+
+function formatMistralError(error: unknown): string {
+  if (error instanceof Error) {
+    const sdkError = error as Error & { statusCode?: unknown; body?: unknown };
+    const statusCode =
+      typeof sdkError.statusCode === "number" ? sdkError.statusCode : undefined;
+    const bodyText =
+      typeof sdkError.body === "string" ? sdkError.body.trim() : undefined;
+    if (statusCode !== undefined && bodyText) {
+      return `Mistral API error (${statusCode}): ${truncateErrorText(bodyText, MAX_MISTRAL_ERROR_BODY_CHARS)}`;
+    }
+    if (statusCode !== undefined)
+      return `Mistral API error (${statusCode}): ${error.message}`;
+    return error.message;
+  }
+  return safeJsonStringify(error);
+}
+
+function truncateErrorText(text: string, maxChars: number): string {
+  if (text.length <= maxChars) return text;
+  return `${text.slice(0, maxChars)}... [truncated ${text.length - maxChars} chars]`;
+}
+
+function safeJsonStringify(value: unknown): string {
+  try {
+    const serialized = JSON.stringify(value);
+    return serialized === undefined ? String(value) : serialized;
+  } catch {
+    return String(value);
+  }
+}
+
+function buildRequestOptions(
+  model: Model<"mistral-conversations">,
+  options?: MistralOptions,
+): RequestOptions {
+  const requestOptions: RequestOptions = {};
+  if (options?.signal) requestOptions.signal = options.signal;
+  requestOptions.retries = { strategy: "none" };
+
+  const headers: Record<string, string> = {};
+  if (model.headers) Object.assign(headers, model.headers);
+  if (options?.headers) Object.assign(headers, options.headers);
+
+  // Mistral infrastructure uses `x-affinity` for KV-cache reuse (prefix caching).
+  // Respect explicit caller-provided header values.
+  if (options?.sessionId && !headers["x-affinity"]) {
+    headers["x-affinity"] = options.sessionId;
+  }
+
+  if (Object.keys(headers).length > 0) {
+    requestOptions.headers = headers;
+  }
+
+  return requestOptions;
+}
+
+function buildChatPayload(
+  model: Model<"mistral-conversations">,
+  context: Context,
+  messages: Message[],
+  options?: MistralOptions,
+): ChatCompletionStreamRequest {
+  const payload: ChatCompletionStreamRequest = {
+    model: model.id,
+    stream: true,
+    messages: toChatMessages(messages, model.input.includes("image")),
+  };
+
+  if (context.tools?.length) payload.tools = toFunctionTools(context.tools);
+  if (options?.temperature !== undefined)
+    payload.temperature = options.temperature;
+  if (options?.maxTokens !== undefined) payload.maxTokens = options.maxTokens;
+  if (options?.toolChoice)
+    payload.toolChoice = mapToolChoice(options.toolChoice);
+  if (options?.promptMode) payload.promptMode = options.promptMode as any;
+
+  if (context.systemPrompt) {
+    payload.messages.unshift({
+      role: "system",
+      content: sanitizeSurrogates(context.systemPrompt),
+    });
+  }
+
+  return payload;
+}
+
+async function consumeChatStream(
+  model: Model<"mistral-conversations">,
+  output: AssistantMessage,
+  stream: AssistantMessageEventStream,
+  mistralStream: AsyncIterable<CompletionEvent>,
+): Promise<void> {
+  let currentBlock: TextContent | ThinkingContent | null = null;
+  const blocks = output.content;
+  const blockIndex = () => blocks.length - 1;
+  const toolBlocksByKey = new Map<string, number>();
+
+  const finishCurrentBlock = (block?: typeof currentBlock) => {
+    if (!block) return;
+    if (block.type === "text") {
+      stream.push({
+        type: "text_end",
+        contentIndex: blockIndex(),
+        content: block.text,
+        partial: output,
+      });
+      return;
+    }
+    if (block.type === "thinking") {
+      stream.push({
+        type: "thinking_end",
+        contentIndex: blockIndex(),
+        content: block.thinking,
+        partial: output,
+      });
+    }
+  };
+
+  for await (const event of mistralStream) {
+    const chunk = event.data;
+
+    if (chunk.usage) {
+      output.usage.input = chunk.usage.promptTokens || 0;
+      output.usage.output = chunk.usage.completionTokens || 0;
+      output.usage.cacheRead = 0;
+      output.usage.cacheWrite = 0;
+      output.usage.totalTokens =
+        chunk.usage.totalTokens || output.usage.input + output.usage.output;
+      calculateCost(model, output.usage);
+    }
+
+    const choice = chunk.choices[0];
+    if (!choice) continue;
+
+    if (choice.finishReason) {
+      output.stopReason = mapChatStopReason(choice.finishReason);
+    }
+
+    const delta = choice.delta;
+    if (delta.content !== null && delta.content !== undefined) {
+      const contentItems =
+        typeof delta.content === "string" ? [delta.content] : delta.content;
+      for (const item of contentItems) {
+        if (typeof item === "string") {
+          const textDelta = sanitizeSurrogates(item);
+          if (!currentBlock || currentBlock.type !== "text") {
+            finishCurrentBlock(currentBlock);
+            currentBlock = { type: "text", text: "" };
+            output.content.push(currentBlock);
+            stream.push({
+              type: "text_start",
+              contentIndex: blockIndex(),
+              partial: output,
+            });
+          }
+          currentBlock.text += textDelta;
+          stream.push({
+            type: "text_delta",
+            contentIndex: blockIndex(),
+            delta: textDelta,
+            partial: output,
+          });
+          continue;
+        }
+
+        if (item.type === "thinking") {
+          const deltaText = item.thinking
+            .map((part) => ("text" in part ? part.text : ""))
+            .filter((text) => text.length > 0)
+            .join("");
+          const thinkingDelta = sanitizeSurrogates(deltaText);
+          if (!thinkingDelta) continue;
+          if (!currentBlock || currentBlock.type !== "thinking") {
+            finishCurrentBlock(currentBlock);
+            currentBlock = { type: "thinking", thinking: "" };
+            output.content.push(currentBlock);
+            stream.push({
+              type: "thinking_start",
+              contentIndex: blockIndex(),
+              partial: output,
+            });
+          }
+          currentBlock.thinking += thinkingDelta;
+          stream.push({
+            type: "thinking_delta",
+            contentIndex: blockIndex(),
+            delta: thinkingDelta,
+            partial: output,
+          });
+          continue;
+        }
+
+        if (item.type === "text") {
+          const textDelta = sanitizeSurrogates(item.text);
+          if (!currentBlock || currentBlock.type !== "text") {
+            finishCurrentBlock(currentBlock);
+            currentBlock = { type: "text", text: "" };
+            output.content.push(currentBlock);
+            stream.push({
+              type: "text_start",
+              contentIndex: blockIndex(),
+              partial: output,
+            });
+          }
+          currentBlock.text += textDelta;
+          stream.push({
+            type: "text_delta",
+            contentIndex: blockIndex(),
+            delta: textDelta,
+            partial: output,
+          });
+        }
+      }
+    }
+
+    const toolCalls = delta.toolCalls || [];
+    for (const toolCall of toolCalls) {
+      if (currentBlock) {
+        finishCurrentBlock(currentBlock);
+        currentBlock = null;
+      }
+      const callId =
+        toolCall.id && toolCall.id !== "null"
+          ? toolCall.id
+          : deriveMistralToolCallId(`toolcall:${toolCall.index ?? 0}`, 0);
+      const key = `${callId}:${toolCall.index || 0}`;
+      const existingIndex = toolBlocksByKey.get(key);
+      let block: (ToolCall & { partialArgs?: string }) | undefined;
+
+      if (existingIndex !== undefined) {
+        const existing = output.content[existingIndex];
+        if (existing?.type === "toolCall") {
+          block = existing as ToolCall & { partialArgs?: string };
+        }
+      }
+
+      if (!block) {
+        block = {
+          type: "toolCall",
+          id: callId,
+          name: toolCall.function.name,
+          arguments: {},
+          partialArgs: "",
+        };
+        output.content.push(block);
+        toolBlocksByKey.set(key, output.content.length - 1);
+        stream.push({
+          type: "toolcall_start",
+          contentIndex: output.content.length - 1,
+          partial: output,
+        });
+      }
+
+      const argsDelta =
+        typeof toolCall.function.arguments === "string"
+          ? toolCall.function.arguments
+          : JSON.stringify(toolCall.function.arguments || {});
+      block.partialArgs = (block.partialArgs || "") + argsDelta;
+      block.arguments = parseStreamingJson<Record<string, unknown>>(
+        block.partialArgs,
+      );
+      stream.push({
+        type: "toolcall_delta",
+        contentIndex: toolBlocksByKey.get(key)!,
+        delta: argsDelta,
+        partial: output,
+      });
+    }
+  }
+
+  finishCurrentBlock(currentBlock);
+  for (const index of toolBlocksByKey.values()) {
+    const block = output.content[index];
+    if (block.type !== "toolCall") continue;
+    const toolBlock = block as ToolCall & { partialArgs?: string };
+    toolBlock.arguments = parseStreamingJson<Record<string, unknown>>(
+      toolBlock.partialArgs,
+    );
+    delete toolBlock.partialArgs;
+    stream.push({
+      type: "toolcall_end",
+      contentIndex: index,
+      toolCall: toolBlock,
+      partial: output,
+    });
+  }
+}
+
+function toFunctionTools(
+  tools: Tool[],
+): Array<FunctionTool & { type: "function" }> {
+  return tools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.parameters as unknown as Record<string, unknown>,
+      strict: false,
+    },
+  }));
+}
+
+function toChatMessages(
+  messages: Message[],
+  supportsImages: boolean,
+): ChatCompletionStreamRequestMessages[] {
+  const result: ChatCompletionStreamRequestMessages[] = [];
+
+  for (const msg of messages) {
+    if (msg.role === "user") {
+      if (typeof msg.content === "string") {
+        result.push({ role: "user", content: sanitizeSurrogates(msg.content) });
+        continue;
+      }
+      const hadImages = msg.content.some((item) => item.type === "image");
+      const content: ContentChunk[] = msg.content
+        .filter((item) => item.type === "text" || supportsImages)
+        .map((item) => {
+          if (item.type === "text")
+            return { type: "text", text: sanitizeSurrogates(item.text) };
+          return {
+            type: "image_url",
+            imageUrl: `data:${item.mimeType};base64,${item.data}`,
+          };
+        });
+      if (content.length > 0) {
+        result.push({ role: "user", content });
+        continue;
+      }
+      if (hadImages && !supportsImages) {
+        result.push({
+          role: "user",
+          content: "(image omitted: model does not support images)",
+        });
+      }
+      continue;
+    }
+
+    if (msg.role === "assistant") {
+      const contentParts: ContentChunk[] = [];
+      const toolCalls: Array<{
+        id: string;
+        type: "function";
+        function: { name: string; arguments: string };
+      }> = [];
+
+      for (const block of msg.content) {
+        if (block.type === "text") {
+          if (block.text.trim().length > 0) {
+            contentParts.push({
+              type: "text",
+              text: sanitizeSurrogates(block.text),
+            });
+          }
+          continue;
+        }
+        if (block.type === "thinking") {
+          if (block.thinking.trim().length > 0) {
+            contentParts.push({
+              type: "thinking",
+              thinking: [
+                { type: "text", text: sanitizeSurrogates(block.thinking) },
+              ],
+            });
+          }
+          continue;
+        }
+        toolCalls.push({
+          id: block.id,
+          type: "function",
+          function: {
+            name: block.name,
+            arguments: JSON.stringify(block.arguments || {}),
+          },
+        });
+      }
+
+      const assistantMessage: ChatCompletionStreamRequestMessages = {
+        role: "assistant",
+      };
+      if (contentParts.length > 0) assistantMessage.content = contentParts;
+      if (toolCalls.length > 0) assistantMessage.toolCalls = toolCalls;
+      if (contentParts.length > 0 || toolCalls.length > 0)
+        result.push(assistantMessage);
+      continue;
+    }
+
+    const toolContent: ContentChunk[] = [];
+    const textResult = msg.content
+      .filter((part) => part.type === "text")
+      .map((part) =>
+        part.type === "text" ? sanitizeSurrogates(part.text) : "",
+      )
+      .join("\n");
+    const hasImages = msg.content.some((part) => part.type === "image");
+    const toolText = buildToolResultText(
+      textResult,
+      hasImages,
+      supportsImages,
+      msg.isError,
+    );
+    toolContent.push({ type: "text", text: toolText });
+    for (const part of msg.content) {
+      if (!supportsImages) continue;
+      if (part.type !== "image") continue;
+      toolContent.push({
+        type: "image_url",
+        imageUrl: `data:${part.mimeType};base64,${part.data}`,
+      });
+    }
+    result.push({
+      role: "tool",
+      toolCallId: msg.toolCallId,
+      name: msg.toolName,
+      content: toolContent,
+    });
+  }
+
+  return result;
+}
+
+function buildToolResultText(
+  text: string,
+  hasImages: boolean,
+  supportsImages: boolean,
+  isError: boolean,
+): string {
+  const trimmed = text.trim();
+  const errorPrefix = isError ? "[tool error] " : "";
+
+  if (trimmed.length > 0) {
+    const imageSuffix =
+      hasImages && !supportsImages
+        ? "\n[tool image omitted: model does not support images]"
+        : "";
+    return `${errorPrefix}${trimmed}${imageSuffix}`;
+  }
+
+  if (hasImages) {
+    if (supportsImages) {
+      return isError
+        ? "[tool error] (see attached image)"
+        : "(see attached image)";
+    }
+    return isError
+      ? "[tool error] (image omitted: model does not support images)"
+      : "(image omitted: model does not support images)";
+  }
+
+  return isError ? "[tool error] (no tool output)" : "(no tool output)";
+}
+
+function mapToolChoice(
+  choice: MistralOptions["toolChoice"],
+):
+  | "auto"
+  | "none"
+  | "any"
+  | "required"
+  | { type: "function"; function: { name: string } }
+  | undefined {
+  if (!choice) return undefined;
+  if (
+    choice === "auto" ||
+    choice === "none" ||
+    choice === "any" ||
+    choice === "required"
+  ) {
+    return choice as any;
+  }
+  return {
+    type: "function",
+    function: { name: choice.function.name },
+  };
+}
+
+function mapChatStopReason(reason: string | null): StopReason {
+  if (reason === null) return "stop";
+  switch (reason) {
+    case "stop":
+      return "stop";
+    case "length":
+    case "model_length":
+      return "length";
+    case "tool_calls":
+      return "toolUse";
+    case "error":
+      return "error";
+    default:
+      return "stop";
+  }
+}
--- a/packages/ai/src/providers/openai-codex-responses.ts
+++ b/packages/ai/src/providers/openai-codex-responses.ts
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -0,0 +1,949 @@
+import OpenAI from "openai";
+import type {
+  ChatCompletionAssistantMessageParam,
+  ChatCompletionChunk,
+  ChatCompletionContentPart,
+  ChatCompletionContentPartImage,
+  ChatCompletionContentPartText,
+  ChatCompletionMessageParam,
+  ChatCompletionToolMessageParam,
+} from "openai/resources/chat/completions.js";
+import { getEnvApiKey } from "../env-api-keys.js";
+import { calculateCost, supportsXhigh } from "../models.js";
+import type {
+  AssistantMessage,
+  Context,
+  Message,
+  Model,
+  OpenAICompletionsCompat,
+  SimpleStreamOptions,
+  StopReason,
+  StreamFunction,
+  StreamOptions,
+  TextContent,
+  ThinkingContent,
+  Tool,
+  ToolCall,
+  ToolResultMessage,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import {
+  buildCopilotDynamicHeaders,
+  hasCopilotVisionInput,
+} from "./github-copilot-headers.js";
+import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { transformMessages } from "./transform-messages.js";
+
+/**
+ * Check if conversation messages contain tool calls or tool results.
+ * This is needed because Anthropic (via proxy) requires the tools param
+ * to be present when messages include tool_calls or tool role messages.
+ */
+function hasToolHistory(messages: Message[]): boolean {
+  for (const msg of messages) {
+    if (msg.role === "toolResult") {
+      return true;
+    }
+    if (msg.role === "assistant") {
+      if (msg.content.some((block) => block.type === "toolCall")) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+export interface OpenAICompletionsOptions extends StreamOptions {
+  toolChoice?:
+    | "auto"
+    | "none"
+    | "required"
+    | { type: "function"; function: { name: string } };
+  reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
+}
+
+export const streamOpenAICompletions: StreamFunction<
+  "openai-completions",
+  OpenAICompletionsOptions
+> = (
+  model: Model<"openai-completions">,
+  context: Context,
+  options?: OpenAICompletionsOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: model.api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    try {
+      const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
+      const client = createClient(model, context, apiKey, options?.headers);
+      const params = buildParams(model, context, options);
+      options?.onPayload?.(params);
+      const openaiStream = await client.chat.completions.create(params, {
+        signal: options?.signal,
+      });
+      stream.push({ type: "start", partial: output });
+
+      let currentBlock:
+        | TextContent
+        | ThinkingContent
+        | (ToolCall & { partialArgs?: string })
+        | null = null;
+      const blocks = output.content;
+      const blockIndex = () => blocks.length - 1;
+      const finishCurrentBlock = (block?: typeof currentBlock) => {
+        if (block) {
+          if (block.type === "text") {
+            stream.push({
+              type: "text_end",
+              contentIndex: blockIndex(),
+              content: block.text,
+              partial: output,
+            });
+          } else if (block.type === "thinking") {
+            stream.push({
+              type: "thinking_end",
+              contentIndex: blockIndex(),
+              content: block.thinking,
+              partial: output,
+            });
+          } else if (block.type === "toolCall") {
+            block.arguments = parseStreamingJson(block.partialArgs);
+            delete block.partialArgs;
+            stream.push({
+              type: "toolcall_end",
+              contentIndex: blockIndex(),
+              toolCall: block,
+              partial: output,
+            });
+          }
+        }
+      };
+
+      for await (const chunk of openaiStream) {
+        if (chunk.usage) {
+          const cachedTokens =
+            chunk.usage.prompt_tokens_details?.cached_tokens || 0;
+          const reasoningTokens =
+            chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
+          const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
+          const outputTokens =
+            (chunk.usage.completion_tokens || 0) + reasoningTokens;
+          output.usage = {
+            // OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
+            input,
+            output: outputTokens,
+            cacheRead: cachedTokens,
+            cacheWrite: 0,
+            // Compute totalTokens ourselves since we add reasoning_tokens to output
+            // and some providers (e.g., Groq) don't include them in total_tokens
+            totalTokens: input + outputTokens + cachedTokens,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          };
+          calculateCost(model, output.usage);
+        }
+
+        const choice = chunk.choices?.[0];
+        if (!choice) continue;
+
+        if (choice.finish_reason) {
+          output.stopReason = mapStopReason(choice.finish_reason);
+        }
+
+        if (choice.delta) {
+          if (
+            choice.delta.content !== null &&
+            choice.delta.content !== undefined &&
+            choice.delta.content.length > 0
+          ) {
+            if (!currentBlock || currentBlock.type !== "text") {
+              finishCurrentBlock(currentBlock);
+              currentBlock = { type: "text", text: "" };
+              output.content.push(currentBlock);
+              stream.push({
+                type: "text_start",
+                contentIndex: blockIndex(),
+                partial: output,
+              });
+            }
+
+            if (currentBlock.type === "text") {
+              currentBlock.text += choice.delta.content;
+              stream.push({
+                type: "text_delta",
+                contentIndex: blockIndex(),
+                delta: choice.delta.content,
+                partial: output,
+              });
+            }
+          }
+
+          // Some endpoints return reasoning in reasoning_content (llama.cpp),
+          // or reasoning (other openai compatible endpoints)
+          // Use the first non-empty reasoning field to avoid duplication
+          // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
+          const reasoningFields = [
+            "reasoning_content",
+            "reasoning",
+            "reasoning_text",
+          ];
+          let foundReasoningField: string | null = null;
+          for (const field of reasoningFields) {
+            if (
+              (choice.delta as any)[field] !== null &&
+              (choice.delta as any)[field] !== undefined &&
+              (choice.delta as any)[field].length > 0
+            ) {
+              if (!foundReasoningField) {
+                foundReasoningField = field;
+                break;
+              }
+            }
+          }
+
+          if (foundReasoningField) {
+            if (!currentBlock || currentBlock.type !== "thinking") {
+              finishCurrentBlock(currentBlock);
+              currentBlock = {
+                type: "thinking",
+                thinking: "",
+                thinkingSignature: foundReasoningField,
+              };
+              output.content.push(currentBlock);
+              stream.push({
+                type: "thinking_start",
+                contentIndex: blockIndex(),
+                partial: output,
+              });
+            }
+
+            if (currentBlock.type === "thinking") {
+              const delta = (choice.delta as any)[foundReasoningField];
+              currentBlock.thinking += delta;
+              stream.push({
+                type: "thinking_delta",
+                contentIndex: blockIndex(),
+                delta,
+                partial: output,
+              });
+            }
+          }
+
+          if (choice?.delta?.tool_calls) {
+            for (const toolCall of choice.delta.tool_calls) {
+              if (
+                !currentBlock ||
+                currentBlock.type !== "toolCall" ||
+                (toolCall.id && currentBlock.id !== toolCall.id)
+              ) {
+                finishCurrentBlock(currentBlock);
+                currentBlock = {
+                  type: "toolCall",
+                  id: toolCall.id || "",
+                  name: toolCall.function?.name || "",
+                  arguments: {},
+                  partialArgs: "",
+                };
+                output.content.push(currentBlock);
+                stream.push({
+                  type: "toolcall_start",
+                  contentIndex: blockIndex(),
+                  partial: output,
+                });
+              }
+
+              if (currentBlock.type === "toolCall") {
+                if (toolCall.id) currentBlock.id = toolCall.id;
+                if (toolCall.function?.name)
+                  currentBlock.name = toolCall.function.name;
+                let delta = "";
+                if (toolCall.function?.arguments) {
+                  delta = toolCall.function.arguments;
+                  currentBlock.partialArgs += toolCall.function.arguments;
+                  currentBlock.arguments = parseStreamingJson(
+                    currentBlock.partialArgs,
+                  );
+                }
+                stream.push({
+                  type: "toolcall_delta",
+                  contentIndex: blockIndex(),
+                  delta,
+                  partial: output,
+                });
+              }
+            }
+          }
+
+          const reasoningDetails = (choice.delta as any).reasoning_details;
+          if (reasoningDetails && Array.isArray(reasoningDetails)) {
+            for (const detail of reasoningDetails) {
+              if (
+                detail.type === "reasoning.encrypted" &&
+                detail.id &&
+                detail.data
+              ) {
+                const matchingToolCall = output.content.find(
+                  (b) => b.type === "toolCall" && b.id === detail.id,
+                ) as ToolCall | undefined;
+                if (matchingToolCall) {
+                  matchingToolCall.thoughtSignature = JSON.stringify(detail);
+                }
+              }
+            }
+          }
+        }
+      }
+
+      finishCurrentBlock(currentBlock);
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      for (const block of output.content) delete (block as any).index;
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      // Some providers via OpenRouter give additional information in this field.
+      const rawMetadata = (error as any)?.error?.metadata?.raw;
+      if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+export const streamSimpleOpenAICompletions: StreamFunction<
+  "openai-completions",
+  SimpleStreamOptions
+> = (
+  model: Model<"openai-completions">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+  if (!apiKey) {
+    throw new Error(`No API key for provider: ${model.provider}`);
+  }
+
+  const base = buildBaseOptions(model, options, apiKey);
+  const reasoningEffort = supportsXhigh(model)
+    ? options?.reasoning
+    : clampReasoning(options?.reasoning);
+  const toolChoice = (options as OpenAICompletionsOptions | undefined)
+    ?.toolChoice;
+
+  return streamOpenAICompletions(model, context, {
+    ...base,
+    reasoningEffort,
+    toolChoice,
+  } satisfies OpenAICompletionsOptions);
+};
+
+function createClient(
+  model: Model<"openai-completions">,
+  context: Context,
+  apiKey?: string,
+  optionsHeaders?: Record<string, string>,
+) {
+  if (!apiKey) {
+    if (!process.env.OPENAI_API_KEY) {
+      throw new Error(
+        "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
+      );
+    }
+    apiKey = process.env.OPENAI_API_KEY;
+  }
+
+  const headers = { ...model.headers };
+  if (model.provider === "github-copilot") {
+    const hasImages = hasCopilotVisionInput(context.messages);
+    const copilotHeaders = buildCopilotDynamicHeaders({
+      messages: context.messages,
+      hasImages,
+    });
+    Object.assign(headers, copilotHeaders);
+  }
+
+  // Merge options headers last so they can override defaults
+  if (optionsHeaders) {
+    Object.assign(headers, optionsHeaders);
+  }
+
+  return new OpenAI({
+    apiKey,
+    baseURL: model.baseUrl,
+    dangerouslyAllowBrowser: true,
+    defaultHeaders: headers,
+  });
+}
+
+function buildParams(
+  model: Model<"openai-completions">,
+  context: Context,
+  options?: OpenAICompletionsOptions,
+) {
+  const compat = getCompat(model);
+  const messages = convertMessages(model, context, compat);
+  maybeAddOpenRouterAnthropicCacheControl(model, messages);
+
+  const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+    model: model.id,
+    messages,
+    stream: true,
+  };
+
+  if (compat.supportsUsageInStreaming !== false) {
+    (params as any).stream_options = { include_usage: true };
+  }
+
+  if (compat.supportsStore) {
+    params.store = false;
+  }
+
+  if (options?.maxTokens) {
+    if (compat.maxTokensField === "max_tokens") {
+      (params as any).max_tokens = options.maxTokens;
+    } else {
+      params.max_completion_tokens = options.maxTokens;
+    }
+  }
+
+  if (options?.temperature !== undefined) {
+    params.temperature = options.temperature;
+  }
+
+  if (context.tools) {
+    params.tools = convertTools(context.tools, compat);
+  } else if (hasToolHistory(context.messages)) {
+    // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
+    params.tools = [];
+  }
+
+  if (options?.toolChoice) {
+    params.tool_choice = options.toolChoice;
+  }
+
+  if (
+    (compat.thinkingFormat === "zai" || compat.thinkingFormat === "qwen") &&
+    model.reasoning
+  ) {
+    // Both Z.ai and Qwen use enable_thinking: boolean
+    (params as any).enable_thinking = !!options?.reasoningEffort;
+  } else if (
+    options?.reasoningEffort &&
+    model.reasoning &&
+    compat.supportsReasoningEffort
+  ) {
+    // OpenAI-style reasoning_effort
+    (params as any).reasoning_effort = mapReasoningEffort(
+      options.reasoningEffort,
+      compat.reasoningEffortMap,
+    );
+  }
+
+  // OpenRouter provider routing preferences
+  if (
+    model.baseUrl.includes("openrouter.ai") &&
+    model.compat?.openRouterRouting
+  ) {
+    (params as any).provider = model.compat.openRouterRouting;
+  }
+
+  // Vercel AI Gateway provider routing preferences
+  if (
+    model.baseUrl.includes("ai-gateway.vercel.sh") &&
+    model.compat?.vercelGatewayRouting
+  ) {
+    const routing = model.compat.vercelGatewayRouting;
+    if (routing.only || routing.order) {
+      const gatewayOptions: Record<string, string[]> = {};
+      if (routing.only) gatewayOptions.only = routing.only;
+      if (routing.order) gatewayOptions.order = routing.order;
+      (params as any).providerOptions = { gateway: gatewayOptions };
+    }
+  }
+
+  return params;
+}
+
+function mapReasoningEffort(
+  effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
+  reasoningEffortMap: Partial<
+    Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>
+  >,
+): string {
+  return reasoningEffortMap[effort] ?? effort;
+}
+
+function maybeAddOpenRouterAnthropicCacheControl(
+  model: Model<"openai-completions">,
+  messages: ChatCompletionMessageParam[],
+): void {
+  if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
+    return;
+
+  // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
+  // on the last user/assistant message (walking backwards until we find text content).
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role !== "user" && msg.role !== "assistant") continue;
+
+    const content = msg.content;
+    if (typeof content === "string") {
+      msg.content = [
+        Object.assign(
+          { type: "text" as const, text: content },
+          { cache_control: { type: "ephemeral" } },
+        ),
+      ];
+      return;
+    }
+
+    if (!Array.isArray(content)) continue;
+
+    // Find last text part and add cache_control
+    for (let j = content.length - 1; j >= 0; j--) {
+      const part = content[j];
+      if (part?.type === "text") {
+        Object.assign(part, { cache_control: { type: "ephemeral" } });
+        return;
+      }
+    }
+  }
+}
+
+export function convertMessages(
+  model: Model<"openai-completions">,
+  context: Context,
+  compat: Required<OpenAICompletionsCompat>,
+): ChatCompletionMessageParam[] {
+  const params: ChatCompletionMessageParam[] = [];
+
+  const normalizeToolCallId = (id: string): string => {
+    // Handle pipe-separated IDs from OpenAI Responses API
+    // Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
+    // These come from providers like github-copilot, openai-codex, opencode
+    // Extract just the call_id part and normalize it
+    if (id.includes("|")) {
+      const [callId] = id.split("|");
+      // Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
+      return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
+    }
+
+    if (model.provider === "openai")
+      return id.length > 40 ? id.slice(0, 40) : id;
+    return id;
+  };
+
+  const transformedMessages = transformMessages(context.messages, model, (id) =>
+    normalizeToolCallId(id),
+  );
+
+  if (context.systemPrompt) {
+    const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
+    const role = useDeveloperRole ? "developer" : "system";
+    params.push({
+      role: role,
+      content: sanitizeSurrogates(context.systemPrompt),
+    });
+  }
+
+  let lastRole: string | null = null;
+
+  for (let i = 0; i < transformedMessages.length; i++) {
+    const msg = transformedMessages[i];
+    // Some providers don't allow user messages directly after tool results
+    // Insert a synthetic assistant message to bridge the gap
+    if (
+      compat.requiresAssistantAfterToolResult &&
+      lastRole === "toolResult" &&
+      msg.role === "user"
+    ) {
+      params.push({
+        role: "assistant",
+        content: "I have processed the tool results.",
+      });
+    }
+
+    if (msg.role === "user") {
+      if (typeof msg.content === "string") {
+        params.push({
+          role: "user",
+          content: sanitizeSurrogates(msg.content),
+        });
+      } else {
+        const content: ChatCompletionContentPart[] = msg.content.map(
+          (item): ChatCompletionContentPart => {
+            if (item.type === "text") {
+              return {
+                type: "text",
+                text: sanitizeSurrogates(item.text),
+              } satisfies ChatCompletionContentPartText;
+            } else {
+              return {
+                type: "image_url",
+                image_url: {
+                  url: `data:${item.mimeType};base64,${item.data}`,
+                },
+              } satisfies ChatCompletionContentPartImage;
+            }
+          },
+        );
+        const filteredContent = !model.input.includes("image")
+          ? content.filter((c) => c.type !== "image_url")
+          : content;
+        if (filteredContent.length === 0) continue;
+        params.push({
+          role: "user",
+          content: filteredContent,
+        });
+      }
+    } else if (msg.role === "assistant") {
+      // Some providers don't accept null content, use empty string instead
+      const assistantMsg: ChatCompletionAssistantMessageParam = {
+        role: "assistant",
+        content: compat.requiresAssistantAfterToolResult ? "" : null,
+      };
+
+      const textBlocks = msg.content.filter(
+        (b) => b.type === "text",
+      ) as TextContent[];
+      // Filter out empty text blocks to avoid API validation errors
+      const nonEmptyTextBlocks = textBlocks.filter(
+        (b) => b.text && b.text.trim().length > 0,
+      );
+      if (nonEmptyTextBlocks.length > 0) {
+        // GitHub Copilot requires assistant content as a string, not an array.
+        // Sending as array causes Claude models to re-answer all previous prompts.
+        if (model.provider === "github-copilot") {
+          assistantMsg.content = nonEmptyTextBlocks
+            .map((b) => sanitizeSurrogates(b.text))
+            .join("");
+        } else {
+          assistantMsg.content = nonEmptyTextBlocks.map((b) => {
+            return { type: "text", text: sanitizeSurrogates(b.text) };
+          });
+        }
+      }
+
+      // Handle thinking blocks
+      const thinkingBlocks = msg.content.filter(
+        (b) => b.type === "thinking",
+      ) as ThinkingContent[];
+      // Filter out empty thinking blocks to avoid API validation errors
+      const nonEmptyThinkingBlocks = thinkingBlocks.filter(
+        (b) => b.thinking && b.thinking.trim().length > 0,
+      );
+      if (nonEmptyThinkingBlocks.length > 0) {
+        if (compat.requiresThinkingAsText) {
+          // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
+          const thinkingText = nonEmptyThinkingBlocks
+            .map((b) => b.thinking)
+            .join("\n\n");
+          const textContent = assistantMsg.content as Array<{
+            type: "text";
+            text: string;
+          }> | null;
+          if (textContent) {
+            textContent.unshift({ type: "text", text: thinkingText });
+          } else {
+            assistantMsg.content = [{ type: "text", text: thinkingText }];
+          }
+        } else {
+          // Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
+          const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
+          if (signature && signature.length > 0) {
+            (assistantMsg as any)[signature] = nonEmptyThinkingBlocks
+              .map((b) => b.thinking)
+              .join("\n");
+          }
+        }
+      }
+
+      const toolCalls = msg.content.filter(
+        (b) => b.type === "toolCall",
+      ) as ToolCall[];
+      if (toolCalls.length > 0) {
+        assistantMsg.tool_calls = toolCalls.map((tc) => ({
+          id: tc.id,
+          type: "function" as const,
+          function: {
+            name: tc.name,
+            arguments: JSON.stringify(tc.arguments),
+          },
+        }));
+        const reasoningDetails = toolCalls
+          .filter((tc) => tc.thoughtSignature)
+          .map((tc) => {
+            try {
+              return JSON.parse(tc.thoughtSignature!);
+            } catch {
+              return null;
+            }
+          })
+          .filter(Boolean);
+        if (reasoningDetails.length > 0) {
+          (assistantMsg as any).reasoning_details = reasoningDetails;
+        }
+      }
+      // Skip assistant messages that have no content and no tool calls.
+      // Some providers require "either content or tool_calls, but not none".
+      // Other providers also don't accept empty assistant messages.
+      // This handles aborted assistant responses that got no content.
+      const content = assistantMsg.content;
+      const hasContent =
+        content !== null &&
+        content !== undefined &&
+        (typeof content === "string" ? content.length > 0 : content.length > 0);
+      if (!hasContent && !assistantMsg.tool_calls) {
+        continue;
+      }
+      params.push(assistantMsg);
+    } else if (msg.role === "toolResult") {
+      const imageBlocks: Array<{
+        type: "image_url";
+        image_url: { url: string };
+      }> = [];
+      let j = i;
+
+      for (
+        ;
+        j < transformedMessages.length &&
+        transformedMessages[j].role === "toolResult";
+        j++
+      ) {
+        const toolMsg = transformedMessages[j] as ToolResultMessage;
+
+        // Extract text and image content
+        const textResult = toolMsg.content
+          .filter((c) => c.type === "text")
+          .map((c) => (c as any).text)
+          .join("\n");
+        const hasImages = toolMsg.content.some((c) => c.type === "image");
+
+        // Always send tool result with text (or placeholder if only images)
+        const hasText = textResult.length > 0;
+        // Some providers require the 'name' field in tool results
+        const toolResultMsg: ChatCompletionToolMessageParam = {
+          role: "tool",
+          content: sanitizeSurrogates(
+            hasText ? textResult : "(see attached image)",
+          ),
+          tool_call_id: toolMsg.toolCallId,
+        };
+        if (compat.requiresToolResultName && toolMsg.toolName) {
+          (toolResultMsg as any).name = toolMsg.toolName;
+        }
+        params.push(toolResultMsg);
+
+        if (hasImages && model.input.includes("image")) {
+          for (const block of toolMsg.content) {
+            if (block.type === "image") {
+              imageBlocks.push({
+                type: "image_url",
+                image_url: {
+                  url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
+                },
+              });
+            }
+          }
+        }
+      }
+
+      i = j - 1;
+
+      if (imageBlocks.length > 0) {
+        if (compat.requiresAssistantAfterToolResult) {
+          params.push({
+            role: "assistant",
+            content: "I have processed the tool results.",
+          });
+        }
+
+        params.push({
+          role: "user",
+          content: [
+            {
+              type: "text",
+              text: "Attached image(s) from tool result:",
+            },
+            ...imageBlocks,
+          ],
+        });
+        lastRole = "user";
+      } else {
+        lastRole = "toolResult";
+      }
+      continue;
+    }
+
+    lastRole = msg.role;
+  }
+
+  return params;
+}
+
+function convertTools(
+  tools: Tool[],
+  compat: Required<OpenAICompletionsCompat>,
+): OpenAI.Chat.Completions.ChatCompletionTool[] {
+  return tools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.parameters as any, // TypeBox already generates JSON Schema
+      // Only include strict if provider supports it. Some reject unknown fields.
+      ...(compat.supportsStrictMode !== false && { strict: false }),
+    },
+  }));
+}
+
+function mapStopReason(
+  reason: ChatCompletionChunk.Choice["finish_reason"],
+): StopReason {
+  if (reason === null) return "stop";
+  switch (reason) {
+    case "stop":
+      return "stop";
+    case "length":
+      return "length";
+    case "function_call":
+    case "tool_calls":
+      return "toolUse";
+    case "content_filter":
+      return "error";
+    default: {
+      const _exhaustive: never = reason;
+      throw new Error(`Unhandled stop reason: ${_exhaustive}`);
+    }
+  }
+}
+
+/**
+ * Detect compatibility settings from provider and baseUrl for known providers.
+ * Provider takes precedence over URL-based detection since it's explicitly configured.
+ * Returns a fully resolved OpenAICompletionsCompat object with all fields set.
+ */
+function detectCompat(
+  model: Model<"openai-completions">,
+): Required<OpenAICompletionsCompat> {
+  const provider = model.provider;
+  const baseUrl = model.baseUrl;
+
+  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
+
+  const isNonStandard =
+    provider === "cerebras" ||
+    baseUrl.includes("cerebras.ai") ||
+    provider === "xai" ||
+    baseUrl.includes("api.x.ai") ||
+    baseUrl.includes("chutes.ai") ||
+    baseUrl.includes("deepseek.com") ||
+    isZai ||
+    provider === "opencode" ||
+    baseUrl.includes("opencode.ai");
+
+  const useMaxTokens = baseUrl.includes("chutes.ai");
+
+  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
+  const isGroq = provider === "groq" || baseUrl.includes("groq.com");
+
+  const reasoningEffortMap =
+    isGroq && model.id === "qwen/qwen3-32b"
+      ? {
+          minimal: "default",
+          low: "default",
+          medium: "default",
+          high: "default",
+          xhigh: "default",
+        }
+      : {};
+  return {
+    supportsStore: !isNonStandard,
+    supportsDeveloperRole: !isNonStandard,
+    supportsReasoningEffort: !isGrok && !isZai,
+    reasoningEffortMap,
+    supportsUsageInStreaming: true,
+    maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
+    requiresToolResultName: false,
+    requiresAssistantAfterToolResult: false,
+    requiresThinkingAsText: false,
+    thinkingFormat: isZai ? "zai" : "openai",
+    openRouterRouting: {},
+    vercelGatewayRouting: {},
+    supportsStrictMode: true,
+  };
+}
+
+/**
+ * Get resolved compatibility settings for a model.
+ * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
+ */
+function getCompat(
+  model: Model<"openai-completions">,
+): Required<OpenAICompletionsCompat> {
+  const detected = detectCompat(model);
+  if (!model.compat) return detected;
+
+  return {
+    supportsStore: model.compat.supportsStore ?? detected.supportsStore,
+    supportsDeveloperRole:
+      model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
+    supportsReasoningEffort:
+      model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
+    reasoningEffortMap:
+      model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
+    supportsUsageInStreaming:
+      model.compat.supportsUsageInStreaming ??
+      detected.supportsUsageInStreaming,
+    maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
+    requiresToolResultName:
+      model.compat.requiresToolResultName ?? detected.requiresToolResultName,
+    requiresAssistantAfterToolResult:
+      model.compat.requiresAssistantAfterToolResult ??
+      detected.requiresAssistantAfterToolResult,
+    requiresThinkingAsText:
+      model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
+    thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
+    openRouterRouting: model.compat.openRouterRouting ?? {},
+    vercelGatewayRouting:
+      model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
+    supportsStrictMode:
+      model.compat.supportsStrictMode ?? detected.supportsStrictMode,
+  };
+}
--- a/packages/ai/src/providers/openai-responses-shared.ts
+++ b/packages/ai/src/providers/openai-responses-shared.ts
@ -0,0 +1,583 @@
+import type OpenAI from "openai";
+import type {
+  Tool as OpenAITool,
+  ResponseCreateParamsStreaming,
+  ResponseFunctionToolCall,
+  ResponseInput,
+  ResponseInputContent,
+  ResponseInputImage,
+  ResponseInputText,
+  ResponseOutputMessage,
+  ResponseReasoningItem,
+  ResponseStreamEvent,
+} from "openai/resources/responses/responses.js";
+import { calculateCost } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  Context,
+  ImageContent,
+  Model,
+  StopReason,
+  TextContent,
+  TextSignatureV1,
+  ThinkingContent,
+  Tool,
+  ToolCall,
+  Usage,
+} from "../types.js";
+import type { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { shortHash } from "../utils/hash.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import { transformMessages } from "./transform-messages.js";
+
+// =============================================================================
+// Utilities
+// =============================================================================
+
+function encodeTextSignatureV1(
+  id: string,
+  phase?: TextSignatureV1["phase"],
+): string {
+  const payload: TextSignatureV1 = { v: 1, id };
+  if (phase) payload.phase = phase;
+  return JSON.stringify(payload);
+}
+
+function parseTextSignature(
+  signature: string | undefined,
+): { id: string; phase?: TextSignatureV1["phase"] } | undefined {
+  if (!signature) return undefined;
+  if (signature.startsWith("{")) {
+    try {
+      const parsed = JSON.parse(signature) as Partial<TextSignatureV1>;
+      if (parsed.v === 1 && typeof parsed.id === "string") {
+        if (parsed.phase === "commentary" || parsed.phase === "final_answer") {
+          return { id: parsed.id, phase: parsed.phase };
+        }
+        return { id: parsed.id };
+      }
+    } catch {
+      // Fall through to legacy plain-string handling.
+    }
+  }
+  return { id: signature };
+}
+
+export interface OpenAIResponsesStreamOptions {
+  serviceTier?: ResponseCreateParamsStreaming["service_tier"];
+  applyServiceTierPricing?: (
+    usage: Usage,
+    serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
+  ) => void;
+}
+
+export interface ConvertResponsesMessagesOptions {
+  includeSystemPrompt?: boolean;
+}
+
+export interface ConvertResponsesToolsOptions {
+  strict?: boolean | null;
+}
+
+// =============================================================================
+// Message conversion
+// =============================================================================
+
+export function convertResponsesMessages<TApi extends Api>(
+  model: Model<TApi>,
+  context: Context,
+  allowedToolCallProviders: ReadonlySet<string>,
+  options?: ConvertResponsesMessagesOptions,
+): ResponseInput {
+  const messages: ResponseInput = [];
+
+  const normalizeToolCallId = (id: string): string => {
+    if (!allowedToolCallProviders.has(model.provider)) return id;
+    if (!id.includes("|")) return id;
+    const [callId, itemId] = id.split("|");
+    const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
+    let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
+    // OpenAI Responses API requires item id to start with "fc"
+    if (!sanitizedItemId.startsWith("fc")) {
+      sanitizedItemId = `fc_${sanitizedItemId}`;
+    }
+    // Truncate to 64 chars and strip trailing underscores (OpenAI Codex rejects them)
+    let normalizedCallId =
+      sanitizedCallId.length > 64
+        ? sanitizedCallId.slice(0, 64)
+        : sanitizedCallId;
+    let normalizedItemId =
+      sanitizedItemId.length > 64
+        ? sanitizedItemId.slice(0, 64)
+        : sanitizedItemId;
+    normalizedCallId = normalizedCallId.replace(/_+$/, "");
+    normalizedItemId = normalizedItemId.replace(/_+$/, "");
+    return `${normalizedCallId}|${normalizedItemId}`;
+  };
+
+  const transformedMessages = transformMessages(
+    context.messages,
+    model,
+    normalizeToolCallId,
+  );
+
+  const includeSystemPrompt = options?.includeSystemPrompt ?? true;
+  if (includeSystemPrompt && context.systemPrompt) {
+    const role = model.reasoning ? "developer" : "system";
+    messages.push({
+      role,
+      content: sanitizeSurrogates(context.systemPrompt),
+    });
+  }
+
+  let msgIndex = 0;
+  for (const msg of transformedMessages) {
+    if (msg.role === "user") {
+      if (typeof msg.content === "string") {
+        messages.push({
+          role: "user",
+          content: [
+            { type: "input_text", text: sanitizeSurrogates(msg.content) },
+          ],
+        });
+      } else {
+        const content: ResponseInputContent[] = msg.content.map(
+          (item): ResponseInputContent => {
+            if (item.type === "text") {
+              return {
+                type: "input_text",
+                text: sanitizeSurrogates(item.text),
+              } satisfies ResponseInputText;
+            }
+            return {
+              type: "input_image",
+              detail: "auto",
+              image_url: `data:${item.mimeType};base64,${item.data}`,
+            } satisfies ResponseInputImage;
+          },
+        );
+        const filteredContent = !model.input.includes("image")
+          ? content.filter((c) => c.type !== "input_image")
+          : content;
+        if (filteredContent.length === 0) continue;
+        messages.push({
+          role: "user",
+          content: filteredContent,
+        });
+      }
+    } else if (msg.role === "assistant") {
+      const output: ResponseInput = [];
+      const assistantMsg = msg as AssistantMessage;
+      const isDifferentModel =
+        assistantMsg.model !== model.id &&
+        assistantMsg.provider === model.provider &&
+        assistantMsg.api === model.api;
+
+      for (const block of msg.content) {
+        if (block.type === "thinking") {
+          if (block.thinking.trim().length === 0) continue;
+          if (block.thinkingSignature) {
+            const reasoningItem = JSON.parse(
+              block.thinkingSignature,
+            ) as ResponseReasoningItem;
+            output.push(reasoningItem);
+          }
+        } else if (block.type === "text") {
+          const textBlock = block as TextContent;
+          const parsedSignature = parseTextSignature(textBlock.textSignature);
+          // OpenAI requires id to be max 64 characters
+          let msgId = parsedSignature?.id;
+          if (!msgId) {
+            msgId = `msg_${msgIndex}`;
+          } else if (msgId.length > 64) {
+            msgId = `msg_${shortHash(msgId)}`;
+          }
+          output.push({
+            type: "message",
+            role: "assistant",
+            content: [
+              {
+                type: "output_text",
+                text: sanitizeSurrogates(textBlock.text),
+                annotations: [],
+              },
+            ],
+            status: "completed",
+            id: msgId,
+            phase: parsedSignature?.phase,
+          } satisfies ResponseOutputMessage);
+        } else if (block.type === "toolCall") {
+          const toolCall = block as ToolCall;
+          const [callId, itemIdRaw] = toolCall.id.split("|");
+          let itemId: string | undefined = itemIdRaw;
+
+          // For different-model messages, set id to undefined to avoid pairing validation.
+          // OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.
+          // By omitting the id, we avoid triggering that validation (like cross-provider does).
+          if (isDifferentModel && itemId?.startsWith("fc_")) {
+            itemId = undefined;
+          }
+
+          output.push({
+            type: "function_call",
+            id: itemId,
+            call_id: callId,
+            name: toolCall.name,
+            arguments: JSON.stringify(toolCall.arguments),
+          });
+        }
+      }
+      if (output.length === 0) continue;
+      messages.push(...output);
+    } else if (msg.role === "toolResult") {
+      // Extract text and image content
+      const textResult = msg.content
+        .filter((c): c is TextContent => c.type === "text")
+        .map((c) => c.text)
+        .join("\n");
+      const hasImages = msg.content.some(
+        (c): c is ImageContent => c.type === "image",
+      );
+
+      // Always send function_call_output with text (or placeholder if only images)
+      const hasText = textResult.length > 0;
+      const [callId] = msg.toolCallId.split("|");
+      messages.push({
+        type: "function_call_output",
+        call_id: callId,
+        output: sanitizeSurrogates(
+          hasText ? textResult : "(see attached image)",
+        ),
+      });
+
+      // If there are images and model supports them, send a follow-up user message with images
+      if (hasImages && model.input.includes("image")) {
+        const contentParts: ResponseInputContent[] = [];
+
+        // Add text prefix
+        contentParts.push({
+          type: "input_text",
+          text: "Attached image(s) from tool result:",
+        } satisfies ResponseInputText);
+
+        // Add images
+        for (const block of msg.content) {
+          if (block.type === "image") {
+            contentParts.push({
+              type: "input_image",
+              detail: "auto",
+              image_url: `data:${block.mimeType};base64,${block.data}`,
+            } satisfies ResponseInputImage);
+          }
+        }
+
+        messages.push({
+          role: "user",
+          content: contentParts,
+        });
+      }
+    }
+    msgIndex++;
+  }
+
+  return messages;
+}
+
+// =============================================================================
+// Tool conversion
+// =============================================================================
+
+export function convertResponsesTools(
+  tools: Tool[],
+  options?: ConvertResponsesToolsOptions,
+): OpenAITool[] {
+  const strict = options?.strict === undefined ? false : options.strict;
+  return tools.map((tool) => ({
+    type: "function",
+    name: tool.name,
+    description: tool.description,
+    parameters: tool.parameters as any, // TypeBox already generates JSON Schema
+    strict,
+  }));
+}
+
+// =============================================================================
+// Stream processing
+// =============================================================================
+
+export async function processResponsesStream<TApi extends Api>(
+  openaiStream: AsyncIterable<ResponseStreamEvent>,
+  output: AssistantMessage,
+  stream: AssistantMessageEventStream,
+  model: Model<TApi>,
+  options?: OpenAIResponsesStreamOptions,
+): Promise<void> {
+  let currentItem:
+    | ResponseReasoningItem
+    | ResponseOutputMessage
+    | ResponseFunctionToolCall
+    | null = null;
+  let currentBlock:
+    | ThinkingContent
+    | TextContent
+    | (ToolCall & { partialJson: string })
+    | null = null;
+  const blocks = output.content;
+  const blockIndex = () => blocks.length - 1;
+
+  for await (const event of openaiStream) {
+    if (event.type === "response.output_item.added") {
+      const item = event.item;
+      if (item.type === "reasoning") {
+        currentItem = item;
+        currentBlock = { type: "thinking", thinking: "" };
+        output.content.push(currentBlock);
+        stream.push({
+          type: "thinking_start",
+          contentIndex: blockIndex(),
+          partial: output,
+        });
+      } else if (item.type === "message") {
+        currentItem = item;
+        currentBlock = { type: "text", text: "" };
+        output.content.push(currentBlock);
+        stream.push({
+          type: "text_start",
+          contentIndex: blockIndex(),
+          partial: output,
+        });
+      } else if (item.type === "function_call") {
+        currentItem = item;
+        currentBlock = {
+          type: "toolCall",
+          id: `${item.call_id}|${item.id}`,
+          name: item.name,
+          arguments: {},
+          partialJson: item.arguments || "",
+        };
+        output.content.push(currentBlock);
+        stream.push({
+          type: "toolcall_start",
+          contentIndex: blockIndex(),
+          partial: output,
+        });
+      }
+    } else if (event.type === "response.reasoning_summary_part.added") {
+      if (currentItem && currentItem.type === "reasoning") {
+        currentItem.summary = currentItem.summary || [];
+        currentItem.summary.push(event.part);
+      }
+    } else if (event.type === "response.reasoning_summary_text.delta") {
+      if (
+        currentItem?.type === "reasoning" &&
+        currentBlock?.type === "thinking"
+      ) {
+        currentItem.summary = currentItem.summary || [];
+        const lastPart = currentItem.summary[currentItem.summary.length - 1];
+        if (lastPart) {
+          currentBlock.thinking += event.delta;
+          lastPart.text += event.delta;
+          stream.push({
+            type: "thinking_delta",
+            contentIndex: blockIndex(),
+            delta: event.delta,
+            partial: output,
+          });
+        }
+      }
+    } else if (event.type === "response.reasoning_summary_part.done") {
+      if (
+        currentItem?.type === "reasoning" &&
+        currentBlock?.type === "thinking"
+      ) {
+        currentItem.summary = currentItem.summary || [];
+        const lastPart = currentItem.summary[currentItem.summary.length - 1];
+        if (lastPart) {
+          currentBlock.thinking += "\n\n";
+          lastPart.text += "\n\n";
+          stream.push({
+            type: "thinking_delta",
+            contentIndex: blockIndex(),
+            delta: "\n\n",
+            partial: output,
+          });
+        }
+      }
+    } else if (event.type === "response.content_part.added") {
+      if (currentItem?.type === "message") {
+        currentItem.content = currentItem.content || [];
+        // Filter out ReasoningText, only accept output_text and refusal
+        if (
+          event.part.type === "output_text" ||
+          event.part.type === "refusal"
+        ) {
+          currentItem.content.push(event.part);
+        }
+      }
+    } else if (event.type === "response.output_text.delta") {
+      if (currentItem?.type === "message" && currentBlock?.type === "text") {
+        if (!currentItem.content || currentItem.content.length === 0) {
+          continue;
+        }
+        const lastPart = currentItem.content[currentItem.content.length - 1];
+        if (lastPart?.type === "output_text") {
+          currentBlock.text += event.delta;
+          lastPart.text += event.delta;
+          stream.push({
+            type: "text_delta",
+            contentIndex: blockIndex(),
+            delta: event.delta,
+            partial: output,
+          });
+        }
+      }
+    } else if (event.type === "response.refusal.delta") {
+      if (currentItem?.type === "message" && currentBlock?.type === "text") {
+        if (!currentItem.content || currentItem.content.length === 0) {
+          continue;
+        }
+        const lastPart = currentItem.content[currentItem.content.length - 1];
+        if (lastPart?.type === "refusal") {
+          currentBlock.text += event.delta;
+          lastPart.refusal += event.delta;
+          stream.push({
+            type: "text_delta",
+            contentIndex: blockIndex(),
+            delta: event.delta,
+            partial: output,
+          });
+        }
+      }
+    } else if (event.type === "response.function_call_arguments.delta") {
+      if (
+        currentItem?.type === "function_call" &&
+        currentBlock?.type === "toolCall"
+      ) {
+        currentBlock.partialJson += event.delta;
+        currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
+        stream.push({
+          type: "toolcall_delta",
+          contentIndex: blockIndex(),
+          delta: event.delta,
+          partial: output,
+        });
+      }
+    } else if (event.type === "response.function_call_arguments.done") {
+      if (
+        currentItem?.type === "function_call" &&
+        currentBlock?.type === "toolCall"
+      ) {
+        currentBlock.partialJson = event.arguments;
+        currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
+      }
+    } else if (event.type === "response.output_item.done") {
+      const item = event.item;
+
+      if (item.type === "reasoning" && currentBlock?.type === "thinking") {
+        currentBlock.thinking =
+          item.summary?.map((s) => s.text).join("\n\n") || "";
+        currentBlock.thinkingSignature = JSON.stringify(item);
+        stream.push({
+          type: "thinking_end",
+          contentIndex: blockIndex(),
+          content: currentBlock.thinking,
+          partial: output,
+        });
+        currentBlock = null;
+      } else if (item.type === "message" && currentBlock?.type === "text") {
+        currentBlock.text = item.content
+          .map((c) => (c.type === "output_text" ? c.text : c.refusal))
+          .join("");
+        currentBlock.textSignature = encodeTextSignatureV1(
+          item.id,
+          item.phase ?? undefined,
+        );
+        stream.push({
+          type: "text_end",
+          contentIndex: blockIndex(),
+          content: currentBlock.text,
+          partial: output,
+        });
+        currentBlock = null;
+      } else if (item.type === "function_call") {
+        const args =
+          currentBlock?.type === "toolCall" && currentBlock.partialJson
+            ? parseStreamingJson(currentBlock.partialJson)
+            : parseStreamingJson(item.arguments || "{}");
+        const toolCall: ToolCall = {
+          type: "toolCall",
+          id: `${item.call_id}|${item.id}`,
+          name: item.name,
+          arguments: args,
+        };
+
+        currentBlock = null;
+        stream.push({
+          type: "toolcall_end",
+          contentIndex: blockIndex(),
+          toolCall,
+          partial: output,
+        });
+      }
+    } else if (event.type === "response.completed") {
+      const response = event.response;
+      if (response?.usage) {
+        const cachedTokens =
+          response.usage.input_tokens_details?.cached_tokens || 0;
+        output.usage = {
+          // OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
+          input: (response.usage.input_tokens || 0) - cachedTokens,
+          output: response.usage.output_tokens || 0,
+          cacheRead: cachedTokens,
+          cacheWrite: 0,
+          totalTokens: response.usage.total_tokens || 0,
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+        };
+      }
+      calculateCost(model, output.usage);
+      if (options?.applyServiceTierPricing) {
+        const serviceTier = response?.service_tier ?? options.serviceTier;
+        options.applyServiceTierPricing(output.usage, serviceTier);
+      }
+      // Map status to stop reason
+      output.stopReason = mapStopReason(response?.status);
+      if (
+        output.content.some((b) => b.type === "toolCall") &&
+        output.stopReason === "stop"
+      ) {
+        output.stopReason = "toolUse";
+      }
+    } else if (event.type === "error") {
+      throw new Error(
+        `Error Code ${event.code}: ${event.message}` || "Unknown error",
+      );
+    } else if (event.type === "response.failed") {
+      throw new Error("Unknown error");
+    }
+  }
+}
+
+function mapStopReason(
+  status: OpenAI.Responses.ResponseStatus | undefined,
+): StopReason {
+  if (!status) return "stop";
+  switch (status) {
+    case "completed":
+      return "stop";
+    case "incomplete":
+      return "length";
+    case "failed":
+    case "cancelled":
+      return "error";
+    // These two are wonky ...
+    case "in_progress":
+    case "queued":
+      return "stop";
+    default: {
+      const _exhaustive: never = status;
+      throw new Error(`Unhandled stop reason: ${_exhaustive}`);
+    }
+  }
+}
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -0,0 +1,309 @@
+import OpenAI from "openai";
+import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
+import { getEnvApiKey } from "../env-api-keys.js";
+import { supportsXhigh } from "../models.js";
+import type {
+  Api,
+  AssistantMessage,
+  CacheRetention,
+  Context,
+  Model,
+  SimpleStreamOptions,
+  StreamFunction,
+  StreamOptions,
+  Usage,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import {
+  buildCopilotDynamicHeaders,
+  hasCopilotVisionInput,
+} from "./github-copilot-headers.js";
+import {
+  convertResponsesMessages,
+  convertResponsesTools,
+  processResponsesStream,
+} from "./openai-responses-shared.js";
+import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+
+const OPENAI_TOOL_CALL_PROVIDERS = new Set([
+  "openai",
+  "openai-codex",
+  "opencode",
+]);
+
+/**
+ * Resolve cache retention preference.
+ * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
+ */
+function resolveCacheRetention(
+  cacheRetention?: CacheRetention,
+): CacheRetention {
+  if (cacheRetention) {
+    return cacheRetention;
+  }
+  if (
+    typeof process !== "undefined" &&
+    process.env.PI_CACHE_RETENTION === "long"
+  ) {
+    return "long";
+  }
+  return "short";
+}
+
+/**
+ * Get prompt cache retention based on cacheRetention and base URL.
+ * Only applies to direct OpenAI API calls (api.openai.com).
+ */
+function getPromptCacheRetention(
+  baseUrl: string,
+  cacheRetention: CacheRetention,
+): "24h" | undefined {
+  if (cacheRetention !== "long") {
+    return undefined;
+  }
+  if (baseUrl.includes("api.openai.com")) {
+    return "24h";
+  }
+  return undefined;
+}
+
+// OpenAI Responses-specific options
+export interface OpenAIResponsesOptions extends StreamOptions {
+  reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
+  reasoningSummary?: "auto" | "detailed" | "concise" | null;
+  serviceTier?: ResponseCreateParamsStreaming["service_tier"];
+}
+
+/**
+ * Generate function for OpenAI Responses API
+ */
+export const streamOpenAIResponses: StreamFunction<
+  "openai-responses",
+  OpenAIResponsesOptions
+> = (
+  model: Model<"openai-responses">,
+  context: Context,
+  options?: OpenAIResponsesOptions,
+): AssistantMessageEventStream => {
+  const stream = new AssistantMessageEventStream();
+
+  // Start async processing
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: model.api as Api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    try {
+      // Create OpenAI client
+      const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
+      const client = createClient(model, context, apiKey, options?.headers);
+      const params = buildParams(model, context, options);
+      options?.onPayload?.(params);
+      const openaiStream = await client.responses.create(
+        params,
+        options?.signal ? { signal: options.signal } : undefined,
+      );
+      stream.push({ type: "start", partial: output });
+
+      await processResponsesStream(openaiStream, output, stream, model, {
+        serviceTier: options?.serviceTier,
+        applyServiceTierPricing,
+      });
+
+      if (options?.signal?.aborted) {
+        throw new Error("Request was aborted");
+      }
+
+      if (output.stopReason === "aborted" || output.stopReason === "error") {
+        throw new Error("An unknown error occurred");
+      }
+
+      stream.push({ type: "done", reason: output.stopReason, message: output });
+      stream.end();
+    } catch (error) {
+      for (const block of output.content)
+        delete (block as { index?: number }).index;
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage =
+        error instanceof Error ? error.message : JSON.stringify(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+
+  return stream;
+};
+
+export const streamSimpleOpenAIResponses: StreamFunction<
+  "openai-responses",
+  SimpleStreamOptions
+> = (
+  model: Model<"openai-responses">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+  if (!apiKey) {
+    throw new Error(`No API key for provider: ${model.provider}`);
+  }
+
+  const base = buildBaseOptions(model, options, apiKey);
+  const reasoningEffort = supportsXhigh(model)
+    ? options?.reasoning
+    : clampReasoning(options?.reasoning);
+
+  return streamOpenAIResponses(model, context, {
+    ...base,
+    reasoningEffort,
+  } satisfies OpenAIResponsesOptions);
+};
+
+function createClient(
+  model: Model<"openai-responses">,
+  context: Context,
+  apiKey?: string,
+  optionsHeaders?: Record<string, string>,
+) {
+  if (!apiKey) {
+    if (!process.env.OPENAI_API_KEY) {
+      throw new Error(
+        "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
+      );
+    }
+    apiKey = process.env.OPENAI_API_KEY;
+  }
+
+  const headers = { ...model.headers };
+  if (model.provider === "github-copilot") {
+    const hasImages = hasCopilotVisionInput(context.messages);
+    const copilotHeaders = buildCopilotDynamicHeaders({
+      messages: context.messages,
+      hasImages,
+    });
+    Object.assign(headers, copilotHeaders);
+  }
+
+  // Merge options headers last so they can override defaults
+  if (optionsHeaders) {
+    Object.assign(headers, optionsHeaders);
+  }
+
+  return new OpenAI({
+    apiKey,
+    baseURL: model.baseUrl,
+    dangerouslyAllowBrowser: true,
+    defaultHeaders: headers,
+  });
+}
+
+function buildParams(
+  model: Model<"openai-responses">,
+  context: Context,
+  options?: OpenAIResponsesOptions,
+) {
+  const messages = convertResponsesMessages(
+    model,
+    context,
+    OPENAI_TOOL_CALL_PROVIDERS,
+  );
+
+  const cacheRetention = resolveCacheRetention(options?.cacheRetention);
+  const params: ResponseCreateParamsStreaming = {
+    model: model.id,
+    input: messages,
+    stream: true,
+    prompt_cache_key:
+      cacheRetention === "none" ? undefined : options?.sessionId,
+    prompt_cache_retention: getPromptCacheRetention(
+      model.baseUrl,
+      cacheRetention,
+    ),
+    store: false,
+  };
+
+  if (options?.maxTokens) {
+    params.max_output_tokens = options?.maxTokens;
+  }
+
+  if (options?.temperature !== undefined) {
+    params.temperature = options?.temperature;
+  }
+
+  if (options?.serviceTier !== undefined) {
+    params.service_tier = options.serviceTier;
+  }
+
+  if (context.tools) {
+    params.tools = convertResponsesTools(context.tools);
+  }
+
+  if (model.reasoning) {
+    if (options?.reasoningEffort || options?.reasoningSummary) {
+      params.reasoning = {
+        effort: options?.reasoningEffort || "medium",
+        summary: options?.reasoningSummary || "auto",
+      };
+      params.include = ["reasoning.encrypted_content"];
+    } else {
+      if (model.name.startsWith("gpt-5")) {
+        // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
+        messages.push({
+          role: "developer",
+          content: [
+            {
+              type: "input_text",
+              text: "# Juice: 0 !important",
+            },
+          ],
+        });
+      }
+    }
+  }
+
+  return params;
+}
+
+function getServiceTierCostMultiplier(
+  serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
+): number {
+  switch (serviceTier) {
+    case "flex":
+      return 0.5;
+    case "priority":
+      return 2;
+    default:
+      return 1;
+  }
+}
+
+function applyServiceTierPricing(
+  usage: Usage,
+  serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
+) {
+  const multiplier = getServiceTierCostMultiplier(serviceTier);
+  if (multiplier === 1) return;
+
+  usage.cost.input *= multiplier;
+  usage.cost.output *= multiplier;
+  usage.cost.cacheRead *= multiplier;
+  usage.cost.cacheWrite *= multiplier;
+  usage.cost.total =
+    usage.cost.input +
+    usage.cost.output +
+    usage.cost.cacheRead +
+    usage.cost.cacheWrite;
+}
--- a/packages/ai/src/providers/register-builtins.ts
+++ b/packages/ai/src/providers/register-builtins.ts
@ -0,0 +1,216 @@
+import { clearApiProviders, registerApiProvider } from "../api-registry.js";
+import type {
+  AssistantMessage,
+  AssistantMessageEvent,
+  Context,
+  Model,
+  SimpleStreamOptions,
+  StreamOptions,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
+import {
+  streamAzureOpenAIResponses,
+  streamSimpleAzureOpenAIResponses,
+} from "./azure-openai-responses.js";
+import { streamGoogle, streamSimpleGoogle } from "./google.js";
+import {
+  streamGoogleGeminiCli,
+  streamSimpleGoogleGeminiCli,
+} from "./google-gemini-cli.js";
+import {
+  streamGoogleVertex,
+  streamSimpleGoogleVertex,
+} from "./google-vertex.js";
+import { streamMistral, streamSimpleMistral } from "./mistral.js";
+import {
+  streamOpenAICodexResponses,
+  streamSimpleOpenAICodexResponses,
+} from "./openai-codex-responses.js";
+import {
+  streamOpenAICompletions,
+  streamSimpleOpenAICompletions,
+} from "./openai-completions.js";
+import {
+  streamOpenAIResponses,
+  streamSimpleOpenAIResponses,
+} from "./openai-responses.js";
+
+interface BedrockProviderModule {
+  streamBedrock: (
+    model: Model<"bedrock-converse-stream">,
+    context: Context,
+    options?: StreamOptions,
+  ) => AsyncIterable<AssistantMessageEvent>;
+  streamSimpleBedrock: (
+    model: Model<"bedrock-converse-stream">,
+    context: Context,
+    options?: SimpleStreamOptions,
+  ) => AsyncIterable<AssistantMessageEvent>;
+}
+
+type DynamicImport = (specifier: string) => Promise<unknown>;
+
+const dynamicImport: DynamicImport = (specifier) => import(specifier);
+const BEDROCK_PROVIDER_SPECIFIER = "./amazon-" + "bedrock.js";
+
+let bedrockProviderModuleOverride: BedrockProviderModule | undefined;
+
+export function setBedrockProviderModule(module: BedrockProviderModule): void {
+  bedrockProviderModuleOverride = module;
+}
+
+async function loadBedrockProviderModule(): Promise<BedrockProviderModule> {
+  if (bedrockProviderModuleOverride) {
+    return bedrockProviderModuleOverride;
+  }
+  const module = await dynamicImport(BEDROCK_PROVIDER_SPECIFIER);
+  return module as BedrockProviderModule;
+}
+
+function forwardStream(
+  target: AssistantMessageEventStream,
+  source: AsyncIterable<AssistantMessageEvent>,
+): void {
+  (async () => {
+    for await (const event of source) {
+      target.push(event);
+    }
+    target.end();
+  })();
+}
+
+function createLazyLoadErrorMessage(
+  model: Model<"bedrock-converse-stream">,
+  error: unknown,
+): AssistantMessage {
+  return {
+    role: "assistant",
+    content: [],
+    api: "bedrock-converse-stream",
+    provider: model.provider,
+    model: model.id,
+    usage: {
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 0,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "error",
+    errorMessage: error instanceof Error ? error.message : String(error),
+    timestamp: Date.now(),
+  };
+}
+
+function streamBedrockLazy(
+  model: Model<"bedrock-converse-stream">,
+  context: Context,
+  options?: StreamOptions,
+): AssistantMessageEventStream {
+  const outer = new AssistantMessageEventStream();
+
+  loadBedrockProviderModule()
+    .then((module) => {
+      const inner = module.streamBedrock(model, context, options);
+      forwardStream(outer, inner);
+    })
+    .catch((error) => {
+      const message = createLazyLoadErrorMessage(model, error);
+      outer.push({ type: "error", reason: "error", error: message });
+      outer.end(message);
+    });
+
+  return outer;
+}
+
+function streamSimpleBedrockLazy(
+  model: Model<"bedrock-converse-stream">,
+  context: Context,
+  options?: SimpleStreamOptions,
+): AssistantMessageEventStream {
+  const outer = new AssistantMessageEventStream();
+
+  loadBedrockProviderModule()
+    .then((module) => {
+      const inner = module.streamSimpleBedrock(model, context, options);
+      forwardStream(outer, inner);
+    })
+    .catch((error) => {
+      const message = createLazyLoadErrorMessage(model, error);
+      outer.push({ type: "error", reason: "error", error: message });
+      outer.end(message);
+    });
+
+  return outer;
+}
+
+export function registerBuiltInApiProviders(): void {
+  registerApiProvider({
+    api: "anthropic-messages",
+    stream: streamAnthropic,
+    streamSimple: streamSimpleAnthropic,
+  });
+
+  registerApiProvider({
+    api: "openai-completions",
+    stream: streamOpenAICompletions,
+    streamSimple: streamSimpleOpenAICompletions,
+  });
+
+  registerApiProvider({
+    api: "mistral-conversations",
+    stream: streamMistral,
+    streamSimple: streamSimpleMistral,
+  });
+
+  registerApiProvider({
+    api: "openai-responses",
+    stream: streamOpenAIResponses,
+    streamSimple: streamSimpleOpenAIResponses,
+  });
+
+  registerApiProvider({
+    api: "azure-openai-responses",
+    stream: streamAzureOpenAIResponses,
+    streamSimple: streamSimpleAzureOpenAIResponses,
+  });
+
+  registerApiProvider({
+    api: "openai-codex-responses",
+    stream: streamOpenAICodexResponses,
+    streamSimple: streamSimpleOpenAICodexResponses,
+  });
+
+  registerApiProvider({
+    api: "google-generative-ai",
+    stream: streamGoogle,
+    streamSimple: streamSimpleGoogle,
+  });
+
+  registerApiProvider({
+    api: "google-gemini-cli",
+    stream: streamGoogleGeminiCli,
+    streamSimple: streamSimpleGoogleGeminiCli,
+  });
+
+  registerApiProvider({
+    api: "google-vertex",
+    stream: streamGoogleVertex,
+    streamSimple: streamSimpleGoogleVertex,
+  });
+
+  registerApiProvider({
+    api: "bedrock-converse-stream",
+    stream: streamBedrockLazy,
+    streamSimple: streamSimpleBedrockLazy,
+  });
+}
+
+export function resetApiProviders(): void {
+  clearApiProviders();
+  registerBuiltInApiProviders();
+}
+
+registerBuiltInApiProviders();
--- a/packages/ai/src/providers/simple-options.ts
+++ b/packages/ai/src/providers/simple-options.ts
@ -0,0 +1,59 @@
+import type {
+  Api,
+  Model,
+  SimpleStreamOptions,
+  StreamOptions,
+  ThinkingBudgets,
+  ThinkingLevel,
+} from "../types.js";
+
+export function buildBaseOptions(
+  model: Model<Api>,
+  options?: SimpleStreamOptions,
+  apiKey?: string,
+): StreamOptions {
+  return {
+    temperature: options?.temperature,
+    maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
+    signal: options?.signal,
+    apiKey: apiKey || options?.apiKey,
+    cacheRetention: options?.cacheRetention,
+    sessionId: options?.sessionId,
+    headers: options?.headers,
+    onPayload: options?.onPayload,
+    maxRetryDelayMs: options?.maxRetryDelayMs,
+    metadata: options?.metadata,
+  };
+}
+
+export function clampReasoning(
+  effort: ThinkingLevel | undefined,
+): Exclude<ThinkingLevel, "xhigh"> | undefined {
+  return effort === "xhigh" ? "high" : effort;
+}
+
+export function adjustMaxTokensForThinking(
+  baseMaxTokens: number,
+  modelMaxTokens: number,
+  reasoningLevel: ThinkingLevel,
+  customBudgets?: ThinkingBudgets,
+): { maxTokens: number; thinkingBudget: number } {
+  const defaultBudgets: ThinkingBudgets = {
+    minimal: 1024,
+    low: 2048,
+    medium: 8192,
+    high: 16384,
+  };
+  const budgets = { ...defaultBudgets, ...customBudgets };
+
+  const minOutputTokens = 1024;
+  const level = clampReasoning(reasoningLevel)!;
+  let thinkingBudget = budgets[level]!;
+  const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens);
+
+  if (maxTokens <= thinkingBudget) {
+    thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
+  }
+
+  return { maxTokens, thinkingBudget };
+}
--- a/packages/ai/src/providers/transform-messages.ts
+++ b/packages/ai/src/providers/transform-messages.ts
@ -0,0 +1,193 @@
+import type {
+  Api,
+  AssistantMessage,
+  Message,
+  Model,
+  ToolCall,
+  ToolResultMessage,
+} from "../types.js";
+
+/**
+ * Normalize tool call ID for cross-provider compatibility.
+ * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
+ * Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars).
+ */
+export function transformMessages<TApi extends Api>(
+  messages: Message[],
+  model: Model<TApi>,
+  normalizeToolCallId?: (
+    id: string,
+    model: Model<TApi>,
+    source: AssistantMessage,
+  ) => string,
+): Message[] {
+  // Build a map of original tool call IDs to normalized IDs
+  const toolCallIdMap = new Map<string, string>();
+
+  // First pass: transform messages (thinking blocks, tool call ID normalization)
+  const transformed = messages.map((msg) => {
+    // User messages pass through unchanged
+    if (msg.role === "user") {
+      return msg;
+    }
+
+    // Handle toolResult messages - normalize toolCallId if we have a mapping
+    if (msg.role === "toolResult") {
+      const normalizedId = toolCallIdMap.get(msg.toolCallId);
+      if (normalizedId && normalizedId !== msg.toolCallId) {
+        return { ...msg, toolCallId: normalizedId };
+      }
+      return msg;
+    }
+
+    // Assistant messages need transformation check
+    if (msg.role === "assistant") {
+      const assistantMsg = msg as AssistantMessage;
+      const isSameModel =
+        assistantMsg.provider === model.provider &&
+        assistantMsg.api === model.api &&
+        assistantMsg.model === model.id;
+
+      const transformedContent = assistantMsg.content.flatMap((block) => {
+        if (block.type === "thinking") {
+          // Redacted thinking is opaque encrypted content, only valid for the same model.
+          // Drop it for cross-model to avoid API errors.
+          if (block.redacted) {
+            return isSameModel ? block : [];
+          }
+          // For same model: keep thinking blocks with signatures (needed for replay)
+          // even if the thinking text is empty (OpenAI encrypted reasoning)
+          if (isSameModel && block.thinkingSignature) return block;
+          // Skip empty thinking blocks, convert others to plain text
+          if (!block.thinking || block.thinking.trim() === "") return [];
+          if (isSameModel) return block;
+          return {
+            type: "text" as const,
+            text: block.thinking,
+          };
+        }
+
+        if (block.type === "text") {
+          if (isSameModel) return block;
+          return {
+            type: "text" as const,
+            text: block.text,
+          };
+        }
+
+        if (block.type === "toolCall") {
+          const toolCall = block as ToolCall;
+          let normalizedToolCall: ToolCall = toolCall;
+
+          if (!isSameModel && toolCall.thoughtSignature) {
+            normalizedToolCall = { ...toolCall };
+            delete (normalizedToolCall as { thoughtSignature?: string })
+              .thoughtSignature;
+          }
+
+          if (!isSameModel && normalizeToolCallId) {
+            const normalizedId = normalizeToolCallId(
+              toolCall.id,
+              model,
+              assistantMsg,
+            );
+            if (normalizedId !== toolCall.id) {
+              toolCallIdMap.set(toolCall.id, normalizedId);
+              normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
+            }
+          }
+
+          return normalizedToolCall;
+        }
+
+        return block;
+      });
+
+      return {
+        ...assistantMsg,
+        content: transformedContent,
+      };
+    }
+    return msg;
+  });
+
+  // Second pass: insert synthetic empty tool results for orphaned tool calls
+  // This preserves thinking signatures and satisfies API requirements
+  const result: Message[] = [];
+  let pendingToolCalls: ToolCall[] = [];
+  let existingToolResultIds = new Set<string>();
+
+  for (let i = 0; i < transformed.length; i++) {
+    const msg = transformed[i];
+
+    if (msg.role === "assistant") {
+      // If we have pending orphaned tool calls from a previous assistant, insert synthetic results now
+      if (pendingToolCalls.length > 0) {
+        for (const tc of pendingToolCalls) {
+          if (!existingToolResultIds.has(tc.id)) {
+            result.push({
+              role: "toolResult",
+              toolCallId: tc.id,
+              toolName: tc.name,
+              content: [{ type: "text", text: "No result provided" }],
+              isError: true,
+              timestamp: Date.now(),
+            } as ToolResultMessage);
+          }
+        }
+        pendingToolCalls = [];
+        existingToolResultIds = new Set();
+      }
+
+      // Skip errored/aborted assistant messages entirely.
+      // These are incomplete turns that shouldn't be replayed:
+      // - May have partial content (reasoning without message, incomplete tool calls)
+      // - Replaying them can cause API errors (e.g., OpenAI "reasoning without following item")
+      // - The model should retry from the last valid state
+      const assistantMsg = msg as AssistantMessage;
+      if (
+        assistantMsg.stopReason === "error" ||
+        assistantMsg.stopReason === "aborted"
+      ) {
+        continue;
+      }
+
+      // Track tool calls from this assistant message
+      const toolCalls = assistantMsg.content.filter(
+        (b) => b.type === "toolCall",
+      ) as ToolCall[];
+      if (toolCalls.length > 0) {
+        pendingToolCalls = toolCalls;
+        existingToolResultIds = new Set();
+      }
+
+      result.push(msg);
+    } else if (msg.role === "toolResult") {
+      existingToolResultIds.add(msg.toolCallId);
+      result.push(msg);
+    } else if (msg.role === "user") {
+      // User message interrupts tool flow - insert synthetic results for orphaned calls
+      if (pendingToolCalls.length > 0) {
+        for (const tc of pendingToolCalls) {
+          if (!existingToolResultIds.has(tc.id)) {
+            result.push({
+              role: "toolResult",
+              toolCallId: tc.id,
+              toolName: tc.name,
+              content: [{ type: "text", text: "No result provided" }],
+              isError: true,
+              timestamp: Date.now(),
+            } as ToolResultMessage);
+          }
+        }
+        pendingToolCalls = [];
+        existingToolResultIds = new Set();
+      }
+      result.push(msg);
+    } else {
+      result.push(msg);
+    }
+  }
+
+  return result;
+}