import Anthropic from "@anthropic-ai/sdk"; import type { ContentBlockParam, MessageCreateParamsStreaming, MessageParam, } from "@anthropic-ai/sdk/resources/messages.js"; import { getEnvApiKey } from "../env-api-keys.js"; import { calculateCost } from "../models.js"; import type { Api, AssistantMessage, CacheRetention, Context, ImageContent, Message, Model, SimpleStreamOptions, StopReason, StreamFunction, StreamOptions, TextContent, ThinkingContent, Tool, ToolCall, ToolResultMessage, } from "../types.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput, } from "./github-copilot-headers.js"; import { adjustMaxTokensForThinking, buildBaseOptions, } from "./simple-options.js"; import { transformMessages } from "./transform-messages.js"; /** * Resolve cache retention preference. * Defaults to "short" and uses COMPANION_CACHE_RETENTION for backward compatibility. */ function resolveCacheRetention( cacheRetention?: CacheRetention, ): CacheRetention { if (cacheRetention) { return cacheRetention; } if ( typeof process !== "undefined" && process.env.COMPANION_CACHE_RETENTION === "long" ) { return "long"; } return "short"; } function getCacheControl( baseUrl: string, cacheRetention?: CacheRetention, ): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" }; } { const retention = resolveCacheRetention(cacheRetention); if (retention === "none") { return { retention }; } const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined; return { retention, cacheControl: { type: "ephemeral", ...(ttl && { ttl }) }, }; } // Stealth mode: Mimic Claude Code's tool naming exactly const claudeCodeVersion = "2.1.62"; // Claude Code 2.x tool names (canonical casing) // Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md // To update: https://github.com/badlogic/cchistory const claudeCodeTools = [ "Read", "Write", "Edit", "Bash", "Grep", "Glob", "AskUserQuestion", "EnterPlanMode", "ExitPlanMode", "KillShell", "NotebookEdit", "Skill", "Task", "TaskOutput", "TodoWrite", "WebFetch", "WebSearch", ]; const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t])); // Convert tool name to CC canonical casing if it matches (case-insensitive) const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name; const fromClaudeCodeName = (name: string, tools?: Tool[]) => { if (tools && tools.length > 0) { const lowerName = name.toLowerCase(); const matchedTool = tools.find( (tool) => tool.name.toLowerCase() === lowerName, ); if (matchedTool) return matchedTool.name; } return name; }; /** * Convert content blocks to Anthropic API format */ function convertContentBlocks(content: (TextContent | ImageContent)[]): | string | Array< | { type: "text"; text: string } | { type: "image"; source: { type: "base64"; media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"; data: string; }; } > { // If only text blocks, return as concatenated string for simplicity const hasImages = content.some((c) => c.type === "image"); if (!hasImages) { return sanitizeSurrogates( content.map((c) => (c as TextContent).text).join("\n"), ); } // If we have images, convert to content block array const blocks = content.map((block) => { if (block.type === "text") { return { type: "text" as const, text: sanitizeSurrogates(block.text), }; } return { type: "image" as const, source: { type: "base64" as const, media_type: block.mimeType as | "image/jpeg" | "image/png" | "image/gif" | "image/webp", data: block.data, }, }; }); // If only images (no text), add placeholder text block const hasText = blocks.some((b) => b.type === "text"); if (!hasText) { blocks.unshift({ type: "text" as const, text: "(see attached image)", }); } return blocks; } export type AnthropicEffort = "low" | "medium" | "high" | "max"; export interface AnthropicOptions extends StreamOptions { /** * Enable extended thinking. * For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think). * For older models: uses budget-based thinking with thinkingBudgetTokens. */ thinkingEnabled?: boolean; /** * Token budget for extended thinking (older models only). * Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking. */ thinkingBudgetTokens?: number; /** * Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6). * Controls how much thinking Claude allocates: * - "max": Always thinks with no constraints (Opus 4.6 only) * - "high": Always thinks, deep reasoning (default) * - "medium": Moderate thinking, may skip for simple queries * - "low": Minimal thinking, skips for simple tasks * Ignored for older models. */ effort?: AnthropicEffort; interleavedThinking?: boolean; toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; } function mergeHeaders( ...headerSources: (Record | undefined)[] ): Record { const merged: Record = {}; for (const headers of headerSources) { if (headers) { Object.assign(merged, headers); } } return merged; } export const streamAnthropic: StreamFunction< "anthropic-messages", AnthropicOptions > = ( model: Model<"anthropic-messages">, context: Context, options?: AnthropicOptions, ): AssistantMessageEventStream => { const stream = new AssistantMessageEventStream(); (async () => { const output: AssistantMessage = { role: "assistant", content: [], api: model.api as Api, provider: model.provider, model: model.id, usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "stop", timestamp: Date.now(), }; try { const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? ""; let copilotDynamicHeaders: Record | undefined; if (model.provider === "github-copilot") { const hasImages = hasCopilotVisionInput(context.messages); copilotDynamicHeaders = buildCopilotDynamicHeaders({ messages: context.messages, hasImages, }); } const { client, isOAuthToken } = createClient( model, apiKey, options?.interleavedThinking ?? true, options?.headers, copilotDynamicHeaders, ); const params = buildParams(model, context, isOAuthToken, options); options?.onPayload?.(params); const anthropicStream = client.messages.stream( { ...params, stream: true }, { signal: options?.signal }, ); stream.push({ type: "start", partial: output }); type Block = ( | ThinkingContent | TextContent | (ToolCall & { partialJson: string }) ) & { index: number }; const blocks = output.content as Block[]; for await (const event of anthropicStream) { if (event.type === "message_start") { // Capture initial token usage from message_start event // This ensures we have input token counts even if the stream is aborted early output.usage.input = event.message.usage.input_tokens || 0; output.usage.output = event.message.usage.output_tokens || 0; output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0; // Anthropic doesn't provide total_tokens, compute from components output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; calculateCost(model, output.usage); } else if (event.type === "content_block_start") { if (event.content_block.type === "text") { const block: Block = { type: "text", text: "", index: event.index, }; output.content.push(block); stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output, }); } else if (event.content_block.type === "thinking") { const block: Block = { type: "thinking", thinking: "", thinkingSignature: "", index: event.index, }; output.content.push(block); stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output, }); } else if (event.content_block.type === "redacted_thinking") { const block: Block = { type: "thinking", thinking: "[Reasoning redacted]", thinkingSignature: event.content_block.data, redacted: true, index: event.index, }; output.content.push(block); stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output, }); } else if (event.content_block.type === "tool_use") { const block: Block = { type: "toolCall", id: event.content_block.id, name: isOAuthToken ? fromClaudeCodeName(event.content_block.name, context.tools) : event.content_block.name, arguments: (event.content_block.input as Record) ?? {}, partialJson: "", index: event.index, }; output.content.push(block); stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output, }); } } else if (event.type === "content_block_delta") { if (event.delta.type === "text_delta") { const index = blocks.findIndex((b) => b.index === event.index); const block = blocks[index]; if (block && block.type === "text") { block.text += event.delta.text; stream.push({ type: "text_delta", contentIndex: index, delta: event.delta.text, partial: output, }); } } else if (event.delta.type === "thinking_delta") { const index = blocks.findIndex((b) => b.index === event.index); const block = blocks[index]; if (block && block.type === "thinking") { block.thinking += event.delta.thinking; stream.push({ type: "thinking_delta", contentIndex: index, delta: event.delta.thinking, partial: output, }); } } else if (event.delta.type === "input_json_delta") { const index = blocks.findIndex((b) => b.index === event.index); const block = blocks[index]; if (block && block.type === "toolCall") { block.partialJson += event.delta.partial_json; block.arguments = parseStreamingJson(block.partialJson); stream.push({ type: "toolcall_delta", contentIndex: index, delta: event.delta.partial_json, partial: output, }); } } else if (event.delta.type === "signature_delta") { const index = blocks.findIndex((b) => b.index === event.index); const block = blocks[index]; if (block && block.type === "thinking") { block.thinkingSignature = block.thinkingSignature || ""; block.thinkingSignature += event.delta.signature; } } } else if (event.type === "content_block_stop") { const index = blocks.findIndex((b) => b.index === event.index); const block = blocks[index]; if (block) { delete (block as any).index; if (block.type === "text") { stream.push({ type: "text_end", contentIndex: index, content: block.text, partial: output, }); } else if (block.type === "thinking") { stream.push({ type: "thinking_end", contentIndex: index, content: block.thinking, partial: output, }); } else if (block.type === "toolCall") { block.arguments = parseStreamingJson(block.partialJson); delete (block as any).partialJson; stream.push({ type: "toolcall_end", contentIndex: index, toolCall: block, partial: output, }); } } } else if (event.type === "message_delta") { if (event.delta.stop_reason) { output.stopReason = mapStopReason(event.delta.stop_reason); } // Only update usage fields if present (not null). // Preserves input_tokens from message_start when proxies omit it in message_delta. if (event.usage.input_tokens != null) { output.usage.input = event.usage.input_tokens; } if (event.usage.output_tokens != null) { output.usage.output = event.usage.output_tokens; } if (event.usage.cache_read_input_tokens != null) { output.usage.cacheRead = event.usage.cache_read_input_tokens; } if (event.usage.cache_creation_input_tokens != null) { output.usage.cacheWrite = event.usage.cache_creation_input_tokens; } // Anthropic doesn't provide total_tokens, compute from components output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; calculateCost(model, output.usage); } } if (options?.signal?.aborted) { throw new Error("Request was aborted"); } if (output.stopReason === "aborted" || output.stopReason === "error") { throw new Error("An unknown error occurred"); } stream.push({ type: "done", reason: output.stopReason, message: output }); stream.end(); } catch (error) { for (const block of output.content) delete (block as any).index; output.stopReason = options?.signal?.aborted ? "aborted" : "error"; output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); stream.push({ type: "error", reason: output.stopReason, error: output }); stream.end(); } })(); return stream; }; /** * Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6) */ function supportsAdaptiveThinking(modelId: string): boolean { // Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix) return ( modelId.includes("opus-4-6") || modelId.includes("opus-4.6") || modelId.includes("sonnet-4-6") || modelId.includes("sonnet-4.6") ); } /** * Map ThinkingLevel to Anthropic effort levels for adaptive thinking. * Note: effort "max" is only valid on Opus 4.6. */ function mapThinkingLevelToEffort( level: SimpleStreamOptions["reasoning"], modelId: string, ): AnthropicEffort { switch (level) { case "minimal": return "low"; case "low": return "low"; case "medium": return "medium"; case "high": return "high"; case "xhigh": return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high"; default: return "high"; } } export const streamSimpleAnthropic: StreamFunction< "anthropic-messages", SimpleStreamOptions > = ( model: Model<"anthropic-messages">, context: Context, options?: SimpleStreamOptions, ): AssistantMessageEventStream => { const apiKey = options?.apiKey || getEnvApiKey(model.provider); if (!apiKey) { throw new Error(`No API key for provider: ${model.provider}`); } const base = buildBaseOptions(model, options, apiKey); if (!options?.reasoning) { return streamAnthropic(model, context, { ...base, thinkingEnabled: false, } satisfies AnthropicOptions); } // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level // For older models: use budget-based thinking if (supportsAdaptiveThinking(model.id)) { const effort = mapThinkingLevelToEffort(options.reasoning, model.id); return streamAnthropic(model, context, { ...base, thinkingEnabled: true, effort, } satisfies AnthropicOptions); } const adjusted = adjustMaxTokensForThinking( base.maxTokens || 0, model.maxTokens, options.reasoning, options.thinkingBudgets, ); return streamAnthropic(model, context, { ...base, maxTokens: adjusted.maxTokens, thinkingEnabled: true, thinkingBudgetTokens: adjusted.thinkingBudget, } satisfies AnthropicOptions); }; function isOAuthToken(apiKey: string): boolean { return apiKey.includes("sk-ant-oat"); } function createClient( model: Model<"anthropic-messages">, apiKey: string, interleavedThinking: boolean, optionsHeaders?: Record, dynamicHeaders?: Record, ): { client: Anthropic; isOAuthToken: boolean } { // Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in. // The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it. const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id); // Copilot: Bearer auth, selective betas (no fine-grained-tool-streaming) if (model.provider === "github-copilot") { const betaFeatures: string[] = []; if (needsInterleavedBeta) { betaFeatures.push("interleaved-thinking-2025-05-14"); } const client = new Anthropic({ apiKey: null, authToken: apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders: mergeHeaders( { accept: "application/json", "anthropic-dangerous-direct-browser-access": "true", ...(betaFeatures.length > 0 ? { "anthropic-beta": betaFeatures.join(",") } : {}), }, model.headers, dynamicHeaders, optionsHeaders, ), }); return { client, isOAuthToken: false }; } const betaFeatures = ["fine-grained-tool-streaming-2025-05-14"]; if (needsInterleavedBeta) { betaFeatures.push("interleaved-thinking-2025-05-14"); } // OAuth: Bearer auth, Claude Code identity headers if (isOAuthToken(apiKey)) { const client = new Anthropic({ apiKey: null, authToken: apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders: mergeHeaders( { accept: "application/json", "anthropic-dangerous-direct-browser-access": "true", "anthropic-beta": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(",")}`, "user-agent": `claude-cli/${claudeCodeVersion}`, "x-app": "cli", }, model.headers, optionsHeaders, ), }); return { client, isOAuthToken: true }; } // API key auth const client = new Anthropic({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders: mergeHeaders( { accept: "application/json", "anthropic-dangerous-direct-browser-access": "true", "anthropic-beta": betaFeatures.join(","), }, model.headers, optionsHeaders, ), }); return { client, isOAuthToken: false }; } function buildParams( model: Model<"anthropic-messages">, context: Context, isOAuthToken: boolean, options?: AnthropicOptions, ): MessageCreateParamsStreaming { const { cacheControl } = getCacheControl( model.baseUrl, options?.cacheRetention, ); const params: MessageCreateParamsStreaming = { model: model.id, messages: convertMessages( context.messages, model, isOAuthToken, cacheControl, ), max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0, stream: true, }; // For OAuth tokens, we MUST include Claude Code identity if (isOAuthToken) { params.system = [ { type: "text", text: "You are Claude Code, Anthropic's official CLI for Claude.", ...(cacheControl ? { cache_control: cacheControl } : {}), }, ]; if (context.systemPrompt) { params.system.push({ type: "text", text: sanitizeSurrogates(context.systemPrompt), ...(cacheControl ? { cache_control: cacheControl } : {}), }); } } else if (context.systemPrompt) { // Add cache control to system prompt for non-OAuth tokens params.system = [ { type: "text", text: sanitizeSurrogates(context.systemPrompt), ...(cacheControl ? { cache_control: cacheControl } : {}), }, ]; } // Temperature is incompatible with extended thinking (adaptive or budget-based). if (options?.temperature !== undefined && !options?.thinkingEnabled) { params.temperature = options.temperature; } if (context.tools) { params.tools = convertTools(context.tools, isOAuthToken); } // Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models) if (options?.thinkingEnabled && model.reasoning) { if (supportsAdaptiveThinking(model.id)) { // Adaptive thinking: Claude decides when and how much to think params.thinking = { type: "adaptive" }; if (options.effort) { params.output_config = { effort: options.effort }; } } else { // Budget-based thinking for older models params.thinking = { type: "enabled", budget_tokens: options.thinkingBudgetTokens || 1024, }; } } if (options?.metadata) { const userId = options.metadata.user_id; if (typeof userId === "string") { params.metadata = { user_id: userId }; } } if (options?.toolChoice) { if (typeof options.toolChoice === "string") { params.tool_choice = { type: options.toolChoice }; } else { params.tool_choice = options.toolChoice; } } return params; } // Normalize tool call IDs to match Anthropic's required pattern and length function normalizeToolCallId(id: string): string { return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); } function convertMessages( messages: Message[], model: Model<"anthropic-messages">, isOAuthToken: boolean, cacheControl?: { type: "ephemeral"; ttl?: "1h" }, ): MessageParam[] { const params: MessageParam[] = []; // Transform messages for cross-provider compatibility const transformedMessages = transformMessages( messages, model, normalizeToolCallId, ); for (let i = 0; i < transformedMessages.length; i++) { const msg = transformedMessages[i]; if (msg.role === "user") { if (typeof msg.content === "string") { if (msg.content.trim().length > 0) { params.push({ role: "user", content: sanitizeSurrogates(msg.content), }); } } else { const blocks: ContentBlockParam[] = msg.content.map((item) => { if (item.type === "text") { return { type: "text", text: sanitizeSurrogates(item.text), }; } else { return { type: "image", source: { type: "base64", media_type: item.mimeType as | "image/jpeg" | "image/png" | "image/gif" | "image/webp", data: item.data, }, }; } }); let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks; filteredBlocks = filteredBlocks.filter((b) => { if (b.type === "text") { return b.text.trim().length > 0; } return true; }); if (filteredBlocks.length === 0) continue; params.push({ role: "user", content: filteredBlocks, }); } } else if (msg.role === "assistant") { const blocks: ContentBlockParam[] = []; for (const block of msg.content) { if (block.type === "text") { if (block.text.trim().length === 0) continue; blocks.push({ type: "text", text: sanitizeSurrogates(block.text), }); } else if (block.type === "thinking") { // Redacted thinking: pass the opaque payload back as redacted_thinking if (block.redacted) { blocks.push({ type: "redacted_thinking", data: block.thinkingSignature!, }); continue; } if (block.thinking.trim().length === 0) continue; // If thinking signature is missing/empty (e.g., from aborted stream), // convert to plain text block without tags to avoid API rejection // and prevent Claude from mimicking the tags in responses if ( !block.thinkingSignature || block.thinkingSignature.trim().length === 0 ) { blocks.push({ type: "text", text: sanitizeSurrogates(block.thinking), }); } else { blocks.push({ type: "thinking", thinking: sanitizeSurrogates(block.thinking), signature: block.thinkingSignature, }); } } else if (block.type === "toolCall") { blocks.push({ type: "tool_use", id: block.id, name: isOAuthToken ? toClaudeCodeName(block.name) : block.name, input: block.arguments ?? {}, }); } } if (blocks.length === 0) continue; params.push({ role: "assistant", content: blocks, }); } else if (msg.role === "toolResult") { // Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint const toolResults: ContentBlockParam[] = []; // Add the current tool result toolResults.push({ type: "tool_result", tool_use_id: msg.toolCallId, content: convertContentBlocks(msg.content), is_error: msg.isError, }); // Look ahead for consecutive toolResult messages let j = i + 1; while ( j < transformedMessages.length && transformedMessages[j].role === "toolResult" ) { const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult toolResults.push({ type: "tool_result", tool_use_id: nextMsg.toolCallId, content: convertContentBlocks(nextMsg.content), is_error: nextMsg.isError, }); j++; } // Skip the messages we've already processed i = j - 1; // Add a single user message with all tool results params.push({ role: "user", content: toolResults, }); } } // Add cache_control to the last user message to cache conversation history if (cacheControl && params.length > 0) { const lastMessage = params[params.length - 1]; if (lastMessage.role === "user") { if (Array.isArray(lastMessage.content)) { const lastBlock = lastMessage.content[lastMessage.content.length - 1]; if ( lastBlock && (lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result") ) { (lastBlock as any).cache_control = cacheControl; } } else if (typeof lastMessage.content === "string") { lastMessage.content = [ { type: "text", text: lastMessage.content, cache_control: cacheControl, }, ] as any; } } } return params; } function convertTools( tools: Tool[], isOAuthToken: boolean, ): Anthropic.Messages.Tool[] { if (!tools) return []; return tools.map((tool) => { const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema return { name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name, description: tool.description, input_schema: { type: "object" as const, properties: jsonSchema.properties || {}, required: jsonSchema.required || [], }, }; }); } function mapStopReason( reason: Anthropic.Messages.StopReason | string, ): StopReason { switch (reason) { case "end_turn": return "stop"; case "max_tokens": return "length"; case "tool_use": return "toolUse"; case "refusal": return "error"; case "pause_turn": // Stop is good enough -> resubmit return "stop"; case "stop_sequence": return "stop"; // We don't supply stop sequences, so this should never happen case "sensitive": // Content flagged by safety filters (not yet in SDK types) return "error"; default: // Handle unknown stop reasons gracefully (API may add new values) throw new Error(`Unhandled stop reason: ${reason}`); } }