mirror of
https://github.com/harivansh-afk/clanker-agent.git
synced 2026-04-20 20:01:10 +00:00
move pi-mono into companion-cloud as apps/companion-os
- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
0250f72976
579 changed files with 206942 additions and 0 deletions
894
packages/ai/src/providers/amazon-bedrock.ts
Normal file
894
packages/ai/src/providers/amazon-bedrock.ts
Normal file
|
|
@ -0,0 +1,894 @@
|
|||
import {
|
||||
BedrockRuntimeClient,
|
||||
type BedrockRuntimeClientConfig,
|
||||
StopReason as BedrockStopReason,
|
||||
type Tool as BedrockTool,
|
||||
CachePointType,
|
||||
CacheTTL,
|
||||
type ContentBlock,
|
||||
type ContentBlockDeltaEvent,
|
||||
type ContentBlockStartEvent,
|
||||
type ContentBlockStopEvent,
|
||||
ConversationRole,
|
||||
ConverseStreamCommand,
|
||||
type ConverseStreamMetadataEvent,
|
||||
ImageFormat,
|
||||
type Message,
|
||||
type SystemContentBlock,
|
||||
type ToolChoice,
|
||||
type ToolConfiguration,
|
||||
ToolResultStatus,
|
||||
} from "@aws-sdk/client-bedrock-runtime";
|
||||
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
CacheRetention,
|
||||
Context,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StopReason,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingBudgets,
|
||||
ThinkingContent,
|
||||
ThinkingLevel,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import {
|
||||
adjustMaxTokensForThinking,
|
||||
buildBaseOptions,
|
||||
clampReasoning,
|
||||
} from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
export interface BedrockOptions extends StreamOptions {
|
||||
region?: string;
|
||||
profile?: string;
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
|
||||
reasoning?: ThinkingLevel;
|
||||
/* Custom token budgets per thinking level. Overrides default budgets. */
|
||||
thinkingBudgets?: ThinkingBudgets;
|
||||
/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
|
||||
interleavedThinking?: boolean;
|
||||
}
|
||||
|
||||
type Block = (TextContent | ThinkingContent | ToolCall) & {
|
||||
index?: number;
|
||||
partialJson?: string;
|
||||
};
|
||||
|
||||
export const streamBedrock: StreamFunction<
|
||||
"bedrock-converse-stream",
|
||||
BedrockOptions
|
||||
> = (
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
context: Context,
|
||||
options: BedrockOptions = {},
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "bedrock-converse-stream" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
const blocks = output.content as Block[];
|
||||
|
||||
const config: BedrockRuntimeClientConfig = {
|
||||
profile: options.profile,
|
||||
};
|
||||
|
||||
// in Node.js/Bun environment only
|
||||
if (
|
||||
typeof process !== "undefined" &&
|
||||
(process.versions?.node || process.versions?.bun)
|
||||
) {
|
||||
// Region resolution: explicit option > env vars > SDK default chain.
|
||||
// When AWS_PROFILE is set, we leave region undefined so the SDK can
|
||||
// resovle it from aws profile configs. Otherwise fall back to us-east-1.
|
||||
const explicitRegion =
|
||||
options.region ||
|
||||
process.env.AWS_REGION ||
|
||||
process.env.AWS_DEFAULT_REGION;
|
||||
if (explicitRegion) {
|
||||
config.region = explicitRegion;
|
||||
} else if (!process.env.AWS_PROFILE) {
|
||||
config.region = "us-east-1";
|
||||
}
|
||||
|
||||
// Support proxies that don't need authentication
|
||||
if (process.env.AWS_BEDROCK_SKIP_AUTH === "1") {
|
||||
config.credentials = {
|
||||
accessKeyId: "dummy-access-key",
|
||||
secretAccessKey: "dummy-secret-key",
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
process.env.HTTP_PROXY ||
|
||||
process.env.HTTPS_PROXY ||
|
||||
process.env.NO_PROXY ||
|
||||
process.env.http_proxy ||
|
||||
process.env.https_proxy ||
|
||||
process.env.no_proxy
|
||||
) {
|
||||
const nodeHttpHandler = await import("@smithy/node-http-handler");
|
||||
const proxyAgent = await import("proxy-agent");
|
||||
|
||||
const agent = new proxyAgent.ProxyAgent();
|
||||
|
||||
// Bedrock runtime uses NodeHttp2Handler by default since v3.798.0, which is based
|
||||
// on `http2` module and has no support for http agent.
|
||||
// Use NodeHttpHandler to support http agent.
|
||||
config.requestHandler = new nodeHttpHandler.NodeHttpHandler({
|
||||
httpAgent: agent,
|
||||
httpsAgent: agent,
|
||||
});
|
||||
} else if (process.env.AWS_BEDROCK_FORCE_HTTP1 === "1") {
|
||||
// Some custom endpoints require HTTP/1.1 instead of HTTP/2
|
||||
const nodeHttpHandler = await import("@smithy/node-http-handler");
|
||||
config.requestHandler = new nodeHttpHandler.NodeHttpHandler();
|
||||
}
|
||||
} else {
|
||||
// Non-Node environment (browser): fall back to us-east-1 since
|
||||
// there's no config file resolution available.
|
||||
config.region = options.region || "us-east-1";
|
||||
}
|
||||
|
||||
try {
|
||||
const client = new BedrockRuntimeClient(config);
|
||||
|
||||
const cacheRetention = resolveCacheRetention(options.cacheRetention);
|
||||
const commandInput = {
|
||||
modelId: model.id,
|
||||
messages: convertMessages(context, model, cacheRetention),
|
||||
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
||||
inferenceConfig: {
|
||||
maxTokens: options.maxTokens,
|
||||
temperature: options.temperature,
|
||||
},
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice),
|
||||
additionalModelRequestFields: buildAdditionalModelRequestFields(
|
||||
model,
|
||||
options,
|
||||
),
|
||||
};
|
||||
options?.onPayload?.(commandInput);
|
||||
const command = new ConverseStreamCommand(commandInput);
|
||||
|
||||
const response = await client.send(command, {
|
||||
abortSignal: options.signal,
|
||||
});
|
||||
|
||||
for await (const item of response.stream!) {
|
||||
if (item.messageStart) {
|
||||
if (item.messageStart.role !== ConversationRole.ASSISTANT) {
|
||||
throw new Error(
|
||||
"Unexpected assistant message start but got user message start instead",
|
||||
);
|
||||
}
|
||||
stream.push({ type: "start", partial: output });
|
||||
} else if (item.contentBlockStart) {
|
||||
handleContentBlockStart(
|
||||
item.contentBlockStart,
|
||||
blocks,
|
||||
output,
|
||||
stream,
|
||||
);
|
||||
} else if (item.contentBlockDelta) {
|
||||
handleContentBlockDelta(
|
||||
item.contentBlockDelta,
|
||||
blocks,
|
||||
output,
|
||||
stream,
|
||||
);
|
||||
} else if (item.contentBlockStop) {
|
||||
handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
|
||||
} else if (item.messageStop) {
|
||||
output.stopReason = mapStopReason(item.messageStop.stopReason);
|
||||
} else if (item.metadata) {
|
||||
handleMetadata(item.metadata, model, output);
|
||||
} else if (item.internalServerException) {
|
||||
throw new Error(
|
||||
`Internal server error: ${item.internalServerException.message}`,
|
||||
);
|
||||
} else if (item.modelStreamErrorException) {
|
||||
throw new Error(
|
||||
`Model stream error: ${item.modelStreamErrorException.message}`,
|
||||
);
|
||||
} else if (item.validationException) {
|
||||
throw new Error(
|
||||
`Validation error: ${item.validationException.message}`,
|
||||
);
|
||||
} else if (item.throttlingException) {
|
||||
throw new Error(
|
||||
`Throttling error: ${item.throttlingException.message}`,
|
||||
);
|
||||
} else if (item.serviceUnavailableException) {
|
||||
throw new Error(
|
||||
`Service unavailable: ${item.serviceUnavailableException.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "error" || output.stopReason === "aborted") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content) {
|
||||
delete (block as Block).index;
|
||||
delete (block as Block).partialJson;
|
||||
}
|
||||
output.stopReason = options.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleBedrock: StreamFunction<
|
||||
"bedrock-converse-stream",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const base = buildBaseOptions(model, options, undefined);
|
||||
if (!options?.reasoning) {
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: undefined,
|
||||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
if (
|
||||
model.id.includes("anthropic.claude") ||
|
||||
model.id.includes("anthropic/claude")
|
||||
) {
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: options.reasoning,
|
||||
thinkingBudgets: options.thinkingBudgets,
|
||||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
maxTokens: adjusted.maxTokens,
|
||||
reasoning: options.reasoning,
|
||||
thinkingBudgets: {
|
||||
...(options.thinkingBudgets || {}),
|
||||
[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
|
||||
},
|
||||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: options.reasoning,
|
||||
thinkingBudgets: options.thinkingBudgets,
|
||||
} satisfies BedrockOptions);
|
||||
};
|
||||
|
||||
function handleContentBlockStart(
|
||||
event: ContentBlockStartEvent,
|
||||
blocks: Block[],
|
||||
output: AssistantMessage,
|
||||
stream: AssistantMessageEventStream,
|
||||
): void {
|
||||
const index = event.contentBlockIndex!;
|
||||
const start = event.start;
|
||||
|
||||
if (start?.toolUse) {
|
||||
const block: Block = {
|
||||
type: "toolCall",
|
||||
id: start.toolUse.toolUseId || "",
|
||||
name: start.toolUse.name || "",
|
||||
arguments: {},
|
||||
partialJson: "",
|
||||
index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: blocks.length - 1,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function handleContentBlockDelta(
|
||||
event: ContentBlockDeltaEvent,
|
||||
blocks: Block[],
|
||||
output: AssistantMessage,
|
||||
stream: AssistantMessageEventStream,
|
||||
): void {
|
||||
const contentBlockIndex = event.contentBlockIndex!;
|
||||
const delta = event.delta;
|
||||
let index = blocks.findIndex((b) => b.index === contentBlockIndex);
|
||||
let block = blocks[index];
|
||||
|
||||
if (delta?.text !== undefined) {
|
||||
// If no text block exists yet, create one, as `handleContentBlockStart` is not sent for text blocks
|
||||
if (!block) {
|
||||
const newBlock: Block = {
|
||||
type: "text",
|
||||
text: "",
|
||||
index: contentBlockIndex,
|
||||
};
|
||||
output.content.push(newBlock);
|
||||
index = blocks.length - 1;
|
||||
block = blocks[index];
|
||||
stream.push({ type: "text_start", contentIndex: index, partial: output });
|
||||
}
|
||||
if (block.type === "text") {
|
||||
block.text += delta.text;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: index,
|
||||
delta: delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (delta?.toolUse && block?.type === "toolCall") {
|
||||
block.partialJson = (block.partialJson || "") + (delta.toolUse.input || "");
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: index,
|
||||
delta: delta.toolUse.input || "",
|
||||
partial: output,
|
||||
});
|
||||
} else if (delta?.reasoningContent) {
|
||||
let thinkingBlock = block;
|
||||
let thinkingIndex = index;
|
||||
|
||||
if (!thinkingBlock) {
|
||||
const newBlock: Block = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "",
|
||||
index: contentBlockIndex,
|
||||
};
|
||||
output.content.push(newBlock);
|
||||
thinkingIndex = blocks.length - 1;
|
||||
thinkingBlock = blocks[thinkingIndex];
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: thinkingIndex,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
|
||||
if (thinkingBlock?.type === "thinking") {
|
||||
if (delta.reasoningContent.text) {
|
||||
thinkingBlock.thinking += delta.reasoningContent.text;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: thinkingIndex,
|
||||
delta: delta.reasoningContent.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
if (delta.reasoningContent.signature) {
|
||||
thinkingBlock.thinkingSignature =
|
||||
(thinkingBlock.thinkingSignature || "") +
|
||||
delta.reasoningContent.signature;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function handleMetadata(
|
||||
event: ConverseStreamMetadataEvent,
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
output: AssistantMessage,
|
||||
): void {
|
||||
if (event.usage) {
|
||||
output.usage.input = event.usage.inputTokens || 0;
|
||||
output.usage.output = event.usage.outputTokens || 0;
|
||||
output.usage.cacheRead = event.usage.cacheReadInputTokens || 0;
|
||||
output.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;
|
||||
output.usage.totalTokens =
|
||||
event.usage.totalTokens || output.usage.input + output.usage.output;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
function handleContentBlockStop(
|
||||
event: ContentBlockStopEvent,
|
||||
blocks: Block[],
|
||||
output: AssistantMessage,
|
||||
stream: AssistantMessageEventStream,
|
||||
): void {
|
||||
const index = blocks.findIndex((b) => b.index === event.contentBlockIndex);
|
||||
const block = blocks[index];
|
||||
if (!block) return;
|
||||
delete (block as Block).index;
|
||||
|
||||
switch (block.type) {
|
||||
case "text":
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: index,
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
break;
|
||||
case "thinking":
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: index,
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
break;
|
||||
case "toolCall":
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
delete (block as Block).partialJson;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: index,
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model supports adaptive thinking (Opus 4.6 and Sonnet 4.6).
|
||||
*/
|
||||
function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
return (
|
||||
modelId.includes("opus-4-6") ||
|
||||
modelId.includes("opus-4.6") ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.6")
|
||||
);
|
||||
}
|
||||
|
||||
function mapThinkingLevelToEffort(
|
||||
level: SimpleStreamOptions["reasoning"],
|
||||
modelId: string,
|
||||
): "low" | "medium" | "high" | "max" {
|
||||
switch (level) {
|
||||
case "minimal":
|
||||
case "low":
|
||||
return "low";
|
||||
case "medium":
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "xhigh":
|
||||
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6")
|
||||
? "max"
|
||||
: "high";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve cache retention preference.
|
||||
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
|
||||
*/
|
||||
function resolveCacheRetention(
|
||||
cacheRetention?: CacheRetention,
|
||||
): CacheRetention {
|
||||
if (cacheRetention) {
|
||||
return cacheRetention;
|
||||
}
|
||||
if (
|
||||
typeof process !== "undefined" &&
|
||||
process.env.PI_CACHE_RETENTION === "long"
|
||||
) {
|
||||
return "long";
|
||||
}
|
||||
return "short";
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model supports prompt caching.
|
||||
* Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
|
||||
*/
|
||||
function supportsPromptCaching(
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
): boolean {
|
||||
if (model.cost.cacheRead || model.cost.cacheWrite) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const id = model.id.toLowerCase();
|
||||
// Claude 4.x models (opus-4, sonnet-4, haiku-4)
|
||||
if (id.includes("claude") && (id.includes("-4-") || id.includes("-4.")))
|
||||
return true;
|
||||
// Claude 3.7 Sonnet
|
||||
if (id.includes("claude-3-7-sonnet")) return true;
|
||||
// Claude 3.5 Haiku
|
||||
if (id.includes("claude-3-5-haiku")) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model supports thinking signatures in reasoningContent.
|
||||
* Only Anthropic Claude models support the signature field.
|
||||
* Other models (OpenAI, Qwen, Minimax, Moonshot, etc.) reject it with:
|
||||
* "This model doesn't support the reasoningContent.reasoningText.signature field"
|
||||
*/
|
||||
function supportsThinkingSignature(
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
): boolean {
|
||||
const id = model.id.toLowerCase();
|
||||
return id.includes("anthropic.claude") || id.includes("anthropic/claude");
|
||||
}
|
||||
|
||||
function buildSystemPrompt(
|
||||
systemPrompt: string | undefined,
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
cacheRetention: CacheRetention,
|
||||
): SystemContentBlock[] | undefined {
|
||||
if (!systemPrompt) return undefined;
|
||||
|
||||
const blocks: SystemContentBlock[] = [
|
||||
{ text: sanitizeSurrogates(systemPrompt) },
|
||||
];
|
||||
|
||||
// Add cache point for supported Claude models when caching is enabled
|
||||
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
||||
blocks.push({
|
||||
cachePoint: {
|
||||
type: CachePointType.DEFAULT,
|
||||
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
function normalizeToolCallId(id: string): string {
|
||||
const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
return sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;
|
||||
}
|
||||
|
||||
function convertMessages(
|
||||
context: Context,
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
cacheRetention: CacheRetention,
|
||||
): Message[] {
|
||||
const result: Message[] = [];
|
||||
const transformedMessages = transformMessages(
|
||||
context.messages,
|
||||
model,
|
||||
normalizeToolCallId,
|
||||
);
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const m = transformedMessages[i];
|
||||
|
||||
switch (m.role) {
|
||||
case "user":
|
||||
result.push({
|
||||
role: ConversationRole.USER,
|
||||
content:
|
||||
typeof m.content === "string"
|
||||
? [{ text: sanitizeSurrogates(m.content) }]
|
||||
: m.content.map((c) => {
|
||||
switch (c.type) {
|
||||
case "text":
|
||||
return { text: sanitizeSurrogates(c.text) };
|
||||
case "image":
|
||||
return { image: createImageBlock(c.mimeType, c.data) };
|
||||
default:
|
||||
throw new Error("Unknown user content type");
|
||||
}
|
||||
}),
|
||||
});
|
||||
break;
|
||||
case "assistant": {
|
||||
// Skip assistant messages with empty content (e.g., from aborted requests)
|
||||
// Bedrock rejects messages with empty content arrays
|
||||
if (m.content.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const contentBlocks: ContentBlock[] = [];
|
||||
for (const c of m.content) {
|
||||
switch (c.type) {
|
||||
case "text":
|
||||
// Skip empty text blocks
|
||||
if (c.text.trim().length === 0) continue;
|
||||
contentBlocks.push({ text: sanitizeSurrogates(c.text) });
|
||||
break;
|
||||
case "toolCall":
|
||||
contentBlocks.push({
|
||||
toolUse: { toolUseId: c.id, name: c.name, input: c.arguments },
|
||||
});
|
||||
break;
|
||||
case "thinking":
|
||||
// Skip empty thinking blocks
|
||||
if (c.thinking.trim().length === 0) continue;
|
||||
// Only Anthropic models support the signature field in reasoningText.
|
||||
// For other models, we omit the signature to avoid errors like:
|
||||
// "This model doesn't support the reasoningContent.reasoningText.signature field"
|
||||
if (supportsThinkingSignature(model)) {
|
||||
contentBlocks.push({
|
||||
reasoningContent: {
|
||||
reasoningText: {
|
||||
text: sanitizeSurrogates(c.thinking),
|
||||
signature: c.thinkingSignature,
|
||||
},
|
||||
},
|
||||
});
|
||||
} else {
|
||||
contentBlocks.push({
|
||||
reasoningContent: {
|
||||
reasoningText: { text: sanitizeSurrogates(c.thinking) },
|
||||
},
|
||||
});
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unknown assistant content type");
|
||||
}
|
||||
}
|
||||
// Skip if all content blocks were filtered out
|
||||
if (contentBlocks.length === 0) {
|
||||
continue;
|
||||
}
|
||||
result.push({
|
||||
role: ConversationRole.ASSISTANT,
|
||||
content: contentBlocks,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case "toolResult": {
|
||||
// Collect all consecutive toolResult messages into a single user message
|
||||
// Bedrock requires all tool results to be in one message
|
||||
const toolResults: ContentBlock.ToolResultMember[] = [];
|
||||
|
||||
// Add current tool result with all content blocks combined
|
||||
toolResults.push({
|
||||
toolResult: {
|
||||
toolUseId: m.toolCallId,
|
||||
content: m.content.map((c) =>
|
||||
c.type === "image"
|
||||
? { image: createImageBlock(c.mimeType, c.data) }
|
||||
: { text: sanitizeSurrogates(c.text) },
|
||||
),
|
||||
status: m.isError
|
||||
? ToolResultStatus.ERROR
|
||||
: ToolResultStatus.SUCCESS,
|
||||
},
|
||||
});
|
||||
|
||||
// Look ahead for consecutive toolResult messages
|
||||
let j = i + 1;
|
||||
while (
|
||||
j < transformedMessages.length &&
|
||||
transformedMessages[j].role === "toolResult"
|
||||
) {
|
||||
const nextMsg = transformedMessages[j] as ToolResultMessage;
|
||||
toolResults.push({
|
||||
toolResult: {
|
||||
toolUseId: nextMsg.toolCallId,
|
||||
content: nextMsg.content.map((c) =>
|
||||
c.type === "image"
|
||||
? { image: createImageBlock(c.mimeType, c.data) }
|
||||
: { text: sanitizeSurrogates(c.text) },
|
||||
),
|
||||
status: nextMsg.isError
|
||||
? ToolResultStatus.ERROR
|
||||
: ToolResultStatus.SUCCESS,
|
||||
},
|
||||
});
|
||||
j++;
|
||||
}
|
||||
|
||||
// Skip the messages we've already processed
|
||||
i = j - 1;
|
||||
|
||||
result.push({
|
||||
role: ConversationRole.USER,
|
||||
content: toolResults,
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new Error("Unknown message role");
|
||||
}
|
||||
}
|
||||
|
||||
// Add cache point to the last user message for supported Claude models when caching is enabled
|
||||
if (
|
||||
cacheRetention !== "none" &&
|
||||
supportsPromptCaching(model) &&
|
||||
result.length > 0
|
||||
) {
|
||||
const lastMessage = result[result.length - 1];
|
||||
if (lastMessage.role === ConversationRole.USER && lastMessage.content) {
|
||||
(lastMessage.content as ContentBlock[]).push({
|
||||
cachePoint: {
|
||||
type: CachePointType.DEFAULT,
|
||||
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function convertToolConfig(
|
||||
tools: Tool[] | undefined,
|
||||
toolChoice: BedrockOptions["toolChoice"],
|
||||
): ToolConfiguration | undefined {
|
||||
if (!tools?.length || toolChoice === "none") return undefined;
|
||||
|
||||
const bedrockTools: BedrockTool[] = tools.map((tool) => ({
|
||||
toolSpec: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
inputSchema: { json: tool.parameters },
|
||||
},
|
||||
}));
|
||||
|
||||
let bedrockToolChoice: ToolChoice | undefined;
|
||||
switch (toolChoice) {
|
||||
case "auto":
|
||||
bedrockToolChoice = { auto: {} };
|
||||
break;
|
||||
case "any":
|
||||
bedrockToolChoice = { any: {} };
|
||||
break;
|
||||
default:
|
||||
if (toolChoice?.type === "tool") {
|
||||
bedrockToolChoice = { tool: { name: toolChoice.name } };
|
||||
}
|
||||
}
|
||||
|
||||
return { tools: bedrockTools, toolChoice: bedrockToolChoice };
|
||||
}
|
||||
|
||||
function mapStopReason(reason: string | undefined): StopReason {
|
||||
switch (reason) {
|
||||
case BedrockStopReason.END_TURN:
|
||||
case BedrockStopReason.STOP_SEQUENCE:
|
||||
return "stop";
|
||||
case BedrockStopReason.MAX_TOKENS:
|
||||
case BedrockStopReason.MODEL_CONTEXT_WINDOW_EXCEEDED:
|
||||
return "length";
|
||||
case BedrockStopReason.TOOL_USE:
|
||||
return "toolUse";
|
||||
default:
|
||||
return "error";
|
||||
}
|
||||
}
|
||||
|
||||
function buildAdditionalModelRequestFields(
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
options: BedrockOptions,
|
||||
): Record<string, any> | undefined {
|
||||
if (!options.reasoning || !model.reasoning) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (
|
||||
model.id.includes("anthropic.claude") ||
|
||||
model.id.includes("anthropic/claude")
|
||||
) {
|
||||
const result: Record<string, any> = supportsAdaptiveThinking(model.id)
|
||||
? {
|
||||
thinking: { type: "adaptive" },
|
||||
output_config: {
|
||||
effort: mapThinkingLevelToEffort(options.reasoning, model.id),
|
||||
},
|
||||
}
|
||||
: (() => {
|
||||
const defaultBudgets: Record<ThinkingLevel, number> = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
xhigh: 16384, // Claude doesn't support xhigh, clamp to high
|
||||
};
|
||||
|
||||
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
||||
const level =
|
||||
options.reasoning === "xhigh" ? "high" : options.reasoning;
|
||||
const budget =
|
||||
options.thinkingBudgets?.[level] ??
|
||||
defaultBudgets[options.reasoning];
|
||||
|
||||
return {
|
||||
thinking: {
|
||||
type: "enabled",
|
||||
budget_tokens: budget,
|
||||
},
|
||||
};
|
||||
})();
|
||||
|
||||
if (
|
||||
!supportsAdaptiveThinking(model.id) &&
|
||||
(options.interleavedThinking ?? true)
|
||||
) {
|
||||
result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function createImageBlock(mimeType: string, data: string) {
|
||||
let format: ImageFormat;
|
||||
switch (mimeType) {
|
||||
case "image/jpeg":
|
||||
case "image/jpg":
|
||||
format = ImageFormat.JPEG;
|
||||
break;
|
||||
case "image/png":
|
||||
format = ImageFormat.PNG;
|
||||
break;
|
||||
case "image/gif":
|
||||
format = ImageFormat.GIF;
|
||||
break;
|
||||
case "image/webp":
|
||||
format = ImageFormat.WEBP;
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unknown image type: ${mimeType}`);
|
||||
}
|
||||
|
||||
const binaryString = atob(data);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
|
||||
return { source: { bytes }, format };
|
||||
}
|
||||
989
packages/ai/src/providers/anthropic.ts
Normal file
989
packages/ai/src/providers/anthropic.ts
Normal file
|
|
@ -0,0 +1,989 @@
|
|||
import Anthropic from "@anthropic-ai/sdk";
|
||||
import type {
|
||||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
CacheRetention,
|
||||
Context,
|
||||
ImageContent,
|
||||
Message,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StopReason,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
|
||||
import {
|
||||
buildCopilotDynamicHeaders,
|
||||
hasCopilotVisionInput,
|
||||
} from "./github-copilot-headers.js";
|
||||
import {
|
||||
adjustMaxTokensForThinking,
|
||||
buildBaseOptions,
|
||||
} from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
/**
|
||||
* Resolve cache retention preference.
|
||||
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
|
||||
*/
|
||||
function resolveCacheRetention(
|
||||
cacheRetention?: CacheRetention,
|
||||
): CacheRetention {
|
||||
if (cacheRetention) {
|
||||
return cacheRetention;
|
||||
}
|
||||
if (
|
||||
typeof process !== "undefined" &&
|
||||
process.env.PI_CACHE_RETENTION === "long"
|
||||
) {
|
||||
return "long";
|
||||
}
|
||||
return "short";
|
||||
}
|
||||
|
||||
function getCacheControl(
|
||||
baseUrl: string,
|
||||
cacheRetention?: CacheRetention,
|
||||
): {
|
||||
retention: CacheRetention;
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" };
|
||||
} {
|
||||
const retention = resolveCacheRetention(cacheRetention);
|
||||
if (retention === "none") {
|
||||
return { retention };
|
||||
}
|
||||
const ttl =
|
||||
retention === "long" && baseUrl.includes("api.anthropic.com")
|
||||
? "1h"
|
||||
: undefined;
|
||||
return {
|
||||
retention,
|
||||
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
|
||||
};
|
||||
}
|
||||
|
||||
// Stealth mode: Mimic Claude Code's tool naming exactly
|
||||
const claudeCodeVersion = "2.1.62";
|
||||
|
||||
// Claude Code 2.x tool names (canonical casing)
|
||||
// Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md
|
||||
// To update: https://github.com/badlogic/cchistory
|
||||
const claudeCodeTools = [
|
||||
"Read",
|
||||
"Write",
|
||||
"Edit",
|
||||
"Bash",
|
||||
"Grep",
|
||||
"Glob",
|
||||
"AskUserQuestion",
|
||||
"EnterPlanMode",
|
||||
"ExitPlanMode",
|
||||
"KillShell",
|
||||
"NotebookEdit",
|
||||
"Skill",
|
||||
"Task",
|
||||
"TaskOutput",
|
||||
"TodoWrite",
|
||||
"WebFetch",
|
||||
"WebSearch",
|
||||
];
|
||||
|
||||
const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
|
||||
|
||||
// Convert tool name to CC canonical casing if it matches (case-insensitive)
|
||||
const toClaudeCodeName = (name: string) =>
|
||||
ccToolLookup.get(name.toLowerCase()) ?? name;
|
||||
const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
|
||||
if (tools && tools.length > 0) {
|
||||
const lowerName = name.toLowerCase();
|
||||
const matchedTool = tools.find(
|
||||
(tool) => tool.name.toLowerCase() === lowerName,
|
||||
);
|
||||
if (matchedTool) return matchedTool.name;
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert content blocks to Anthropic API format
|
||||
*/
|
||||
function convertContentBlocks(content: (TextContent | ImageContent)[]):
|
||||
| string
|
||||
| Array<
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "image";
|
||||
source: {
|
||||
type: "base64";
|
||||
media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
|
||||
data: string;
|
||||
};
|
||||
}
|
||||
> {
|
||||
// If only text blocks, return as concatenated string for simplicity
|
||||
const hasImages = content.some((c) => c.type === "image");
|
||||
if (!hasImages) {
|
||||
return sanitizeSurrogates(
|
||||
content.map((c) => (c as TextContent).text).join("\n"),
|
||||
);
|
||||
}
|
||||
|
||||
// If we have images, convert to content block array
|
||||
const blocks = content.map((block) => {
|
||||
if (block.type === "text") {
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: sanitizeSurrogates(block.text),
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: "image" as const,
|
||||
source: {
|
||||
type: "base64" as const,
|
||||
media_type: block.mimeType as
|
||||
| "image/jpeg"
|
||||
| "image/png"
|
||||
| "image/gif"
|
||||
| "image/webp",
|
||||
data: block.data,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// If only images (no text), add placeholder text block
|
||||
const hasText = blocks.some((b) => b.type === "text");
|
||||
if (!hasText) {
|
||||
blocks.unshift({
|
||||
type: "text" as const,
|
||||
text: "(see attached image)",
|
||||
});
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
export type AnthropicEffort = "low" | "medium" | "high" | "max";
|
||||
|
||||
export interface AnthropicOptions extends StreamOptions {
|
||||
/**
|
||||
* Enable extended thinking.
|
||||
* For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
|
||||
* For older models: uses budget-based thinking with thinkingBudgetTokens.
|
||||
*/
|
||||
thinkingEnabled?: boolean;
|
||||
/**
|
||||
* Token budget for extended thinking (older models only).
|
||||
* Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
|
||||
*/
|
||||
thinkingBudgetTokens?: number;
|
||||
/**
|
||||
* Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6).
|
||||
* Controls how much thinking Claude allocates:
|
||||
* - "max": Always thinks with no constraints (Opus 4.6 only)
|
||||
* - "high": Always thinks, deep reasoning (default)
|
||||
* - "medium": Moderate thinking, may skip for simple queries
|
||||
* - "low": Minimal thinking, skips for simple tasks
|
||||
* Ignored for older models.
|
||||
*/
|
||||
effort?: AnthropicEffort;
|
||||
interleavedThinking?: boolean;
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
}
|
||||
|
||||
function mergeHeaders(
|
||||
...headerSources: (Record<string, string> | undefined)[]
|
||||
): Record<string, string> {
|
||||
const merged: Record<string, string> = {};
|
||||
for (const headers of headerSources) {
|
||||
if (headers) {
|
||||
Object.assign(merged, headers);
|
||||
}
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
export const streamAnthropic: StreamFunction<
|
||||
"anthropic-messages",
|
||||
AnthropicOptions
|
||||
> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
options?: AnthropicOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: model.api as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
||||
|
||||
let copilotDynamicHeaders: Record<string, string> | undefined;
|
||||
if (model.provider === "github-copilot") {
|
||||
const hasImages = hasCopilotVisionInput(context.messages);
|
||||
copilotDynamicHeaders = buildCopilotDynamicHeaders({
|
||||
messages: context.messages,
|
||||
hasImages,
|
||||
});
|
||||
}
|
||||
|
||||
const { client, isOAuthToken } = createClient(
|
||||
model,
|
||||
apiKey,
|
||||
options?.interleavedThinking ?? true,
|
||||
options?.headers,
|
||||
copilotDynamicHeaders,
|
||||
);
|
||||
const params = buildParams(model, context, isOAuthToken, options);
|
||||
options?.onPayload?.(params);
|
||||
const anthropicStream = client.messages.stream(
|
||||
{ ...params, stream: true },
|
||||
{ signal: options?.signal },
|
||||
);
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
type Block = (
|
||||
| ThinkingContent
|
||||
| TextContent
|
||||
| (ToolCall & { partialJson: string })
|
||||
) & { index: number };
|
||||
const blocks = output.content as Block[];
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "message_start") {
|
||||
// Capture initial token usage from message_start event
|
||||
// This ensures we have input token counts even if the stream is aborted early
|
||||
output.usage.input = event.message.usage.input_tokens || 0;
|
||||
output.usage.output = event.message.usage.output_tokens || 0;
|
||||
output.usage.cacheRead =
|
||||
event.message.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite =
|
||||
event.message.usage.cache_creation_input_tokens || 0;
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input +
|
||||
output.usage.output +
|
||||
output.usage.cacheRead +
|
||||
output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
} else if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
const block: Block = {
|
||||
type: "text",
|
||||
text: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: output.content.length - 1,
|
||||
partial: output,
|
||||
});
|
||||
} else if (event.content_block.type === "thinking") {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: output.content.length - 1,
|
||||
partial: output,
|
||||
});
|
||||
} else if (event.content_block.type === "redacted_thinking") {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "[Reasoning redacted]",
|
||||
thinkingSignature: event.content_block.data,
|
||||
redacted: true,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: output.content.length - 1,
|
||||
partial: output,
|
||||
});
|
||||
} else if (event.content_block.type === "tool_use") {
|
||||
const block: Block = {
|
||||
type: "toolCall",
|
||||
id: event.content_block.id,
|
||||
name: isOAuthToken
|
||||
? fromClaudeCodeName(event.content_block.name, context.tools)
|
||||
: event.content_block.name,
|
||||
arguments:
|
||||
(event.content_block.input as Record<string, any>) ?? {},
|
||||
partialJson: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: output.content.length - 1,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.type === "content_block_delta") {
|
||||
if (event.delta.type === "text_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "text") {
|
||||
block.text += event.delta.text;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "thinking_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinking += event.delta.thinking;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "input_json_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "toolCall") {
|
||||
block.partialJson += event.delta.partial_json;
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.partial_json,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "signature_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinkingSignature = block.thinkingSignature || "";
|
||||
block.thinkingSignature += event.delta.signature;
|
||||
}
|
||||
}
|
||||
} else if (event.type === "content_block_stop") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block) {
|
||||
delete (block as any).index;
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: index,
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: index,
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
delete (block as any).partialJson;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: index,
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "message_delta") {
|
||||
if (event.delta.stop_reason) {
|
||||
output.stopReason = mapStopReason(event.delta.stop_reason);
|
||||
}
|
||||
// Only update usage fields if present (not null).
|
||||
// Preserves input_tokens from message_start when proxies omit it in message_delta.
|
||||
if (event.usage.input_tokens != null) {
|
||||
output.usage.input = event.usage.input_tokens;
|
||||
}
|
||||
if (event.usage.output_tokens != null) {
|
||||
output.usage.output = event.usage.output_tokens;
|
||||
}
|
||||
if (event.usage.cache_read_input_tokens != null) {
|
||||
output.usage.cacheRead = event.usage.cache_read_input_tokens;
|
||||
}
|
||||
if (event.usage.cache_creation_input_tokens != null) {
|
||||
output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
|
||||
}
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input +
|
||||
output.usage.output +
|
||||
output.usage.cacheRead +
|
||||
output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content) delete (block as any).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6)
|
||||
*/
|
||||
function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
// Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix)
|
||||
return (
|
||||
modelId.includes("opus-4-6") ||
|
||||
modelId.includes("opus-4.6") ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.6")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
|
||||
* Note: effort "max" is only valid on Opus 4.6.
|
||||
*/
|
||||
function mapThinkingLevelToEffort(
|
||||
level: SimpleStreamOptions["reasoning"],
|
||||
modelId: string,
|
||||
): AnthropicEffort {
|
||||
switch (level) {
|
||||
case "minimal":
|
||||
return "low";
|
||||
case "low":
|
||||
return "low";
|
||||
case "medium":
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "xhigh":
|
||||
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6")
|
||||
? "max"
|
||||
: "high";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
}
|
||||
|
||||
export const streamSimpleAnthropic: StreamFunction<
|
||||
"anthropic-messages",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
if (!options?.reasoning) {
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: false,
|
||||
} satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
|
||||
// For older models: use budget-based thinking
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: true,
|
||||
effort,
|
||||
} satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
maxTokens: adjusted.maxTokens,
|
||||
thinkingEnabled: true,
|
||||
thinkingBudgetTokens: adjusted.thinkingBudget,
|
||||
} satisfies AnthropicOptions);
|
||||
};
|
||||
|
||||
function isOAuthToken(apiKey: string): boolean {
|
||||
return apiKey.includes("sk-ant-oat");
|
||||
}
|
||||
|
||||
function createClient(
|
||||
model: Model<"anthropic-messages">,
|
||||
apiKey: string,
|
||||
interleavedThinking: boolean,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
dynamicHeaders?: Record<string, string>,
|
||||
): { client: Anthropic; isOAuthToken: boolean } {
|
||||
// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
|
||||
// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
|
||||
const needsInterleavedBeta =
|
||||
interleavedThinking && !supportsAdaptiveThinking(model.id);
|
||||
|
||||
// Copilot: Bearer auth, selective betas (no fine-grained-tool-streaming)
|
||||
if (model.provider === "github-copilot") {
|
||||
const betaFeatures: string[] = [];
|
||||
if (needsInterleavedBeta) {
|
||||
betaFeatures.push("interleaved-thinking-2025-05-14");
|
||||
}
|
||||
|
||||
const client = new Anthropic({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: mergeHeaders(
|
||||
{
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
...(betaFeatures.length > 0
|
||||
? { "anthropic-beta": betaFeatures.join(",") }
|
||||
: {}),
|
||||
},
|
||||
model.headers,
|
||||
dynamicHeaders,
|
||||
optionsHeaders,
|
||||
),
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: false };
|
||||
}
|
||||
|
||||
const betaFeatures = ["fine-grained-tool-streaming-2025-05-14"];
|
||||
if (needsInterleavedBeta) {
|
||||
betaFeatures.push("interleaved-thinking-2025-05-14");
|
||||
}
|
||||
|
||||
// OAuth: Bearer auth, Claude Code identity headers
|
||||
if (isOAuthToken(apiKey)) {
|
||||
const client = new Anthropic({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: mergeHeaders(
|
||||
{
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(",")}`,
|
||||
"user-agent": `claude-cli/${claudeCodeVersion}`,
|
||||
"x-app": "cli",
|
||||
},
|
||||
model.headers,
|
||||
optionsHeaders,
|
||||
),
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: true };
|
||||
}
|
||||
|
||||
// API key auth
|
||||
const client = new Anthropic({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: mergeHeaders(
|
||||
{
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": betaFeatures.join(","),
|
||||
},
|
||||
model.headers,
|
||||
optionsHeaders,
|
||||
),
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: false };
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
isOAuthToken: boolean,
|
||||
options?: AnthropicOptions,
|
||||
): MessageCreateParamsStreaming {
|
||||
const { cacheControl } = getCacheControl(
|
||||
model.baseUrl,
|
||||
options?.cacheRetention,
|
||||
);
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
messages: convertMessages(
|
||||
context.messages,
|
||||
model,
|
||||
isOAuthToken,
|
||||
cacheControl,
|
||||
),
|
||||
max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
// For OAuth tokens, we MUST include Claude Code identity
|
||||
if (isOAuthToken) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
},
|
||||
];
|
||||
if (context.systemPrompt) {
|
||||
params.system.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(context.systemPrompt),
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
});
|
||||
}
|
||||
} else if (context.systemPrompt) {
|
||||
// Add cache control to system prompt for non-OAuth tokens
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(context.systemPrompt),
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
// Temperature is incompatible with extended thinking (adaptive or budget-based).
|
||||
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, isOAuthToken);
|
||||
}
|
||||
|
||||
// Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models)
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
// Adaptive thinking: Claude decides when and how much to think
|
||||
params.thinking = { type: "adaptive" };
|
||||
if (options.effort) {
|
||||
params.output_config = { effort: options.effort };
|
||||
}
|
||||
} else {
|
||||
// Budget-based thinking for older models
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
budget_tokens: options.thinkingBudgetTokens || 1024,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.metadata) {
|
||||
const userId = options.metadata.user_id;
|
||||
if (typeof userId === "string") {
|
||||
params.metadata = { user_id: userId };
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
if (typeof options.toolChoice === "string") {
|
||||
params.tool_choice = { type: options.toolChoice };
|
||||
} else {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
// Normalize tool call IDs to match Anthropic's required pattern and length
|
||||
function normalizeToolCallId(id: string): string {
|
||||
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
||||
}
|
||||
|
||||
function convertMessages(
|
||||
messages: Message[],
|
||||
model: Model<"anthropic-messages">,
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
||||
): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(
|
||||
messages,
|
||||
model,
|
||||
normalizeToolCallId,
|
||||
);
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
if (msg.content.trim().length > 0) {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: sanitizeSurrogates(msg.content),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(item.text),
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as
|
||||
| "image/jpeg"
|
||||
| "image/png"
|
||||
| "image/gif"
|
||||
| "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
let filteredBlocks = !model?.input.includes("image")
|
||||
? blocks.filter((b) => b.type !== "image")
|
||||
: blocks;
|
||||
filteredBlocks = filteredBlocks.filter((b) => {
|
||||
if (b.type === "text") {
|
||||
return b.text.trim().length > 0;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (filteredBlocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredBlocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
if (block.text.trim().length === 0) continue;
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.text),
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
// Redacted thinking: pass the opaque payload back as redacted_thinking
|
||||
if (block.redacted) {
|
||||
blocks.push({
|
||||
type: "redacted_thinking",
|
||||
data: block.thinkingSignature!,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (block.thinking.trim().length === 0) continue;
|
||||
// If thinking signature is missing/empty (e.g., from aborted stream),
|
||||
// convert to plain text block without <thinking> tags to avoid API rejection
|
||||
// and prevent Claude from mimicking the tags in responses
|
||||
if (
|
||||
!block.thinkingSignature ||
|
||||
block.thinkingSignature.trim().length === 0
|
||||
) {
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
});
|
||||
} else {
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
thinking: sanitizeSurrogates(block.thinking),
|
||||
signature: block.thinkingSignature,
|
||||
});
|
||||
}
|
||||
} else if (block.type === "toolCall") {
|
||||
blocks.push({
|
||||
type: "tool_use",
|
||||
id: block.id,
|
||||
name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
|
||||
input: block.arguments ?? {},
|
||||
});
|
||||
}
|
||||
}
|
||||
if (blocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
|
||||
const toolResults: ContentBlockParam[] = [];
|
||||
|
||||
// Add the current tool result
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: msg.toolCallId,
|
||||
content: convertContentBlocks(msg.content),
|
||||
is_error: msg.isError,
|
||||
});
|
||||
|
||||
// Look ahead for consecutive toolResult messages
|
||||
let j = i + 1;
|
||||
while (
|
||||
j < transformedMessages.length &&
|
||||
transformedMessages[j].role === "toolResult"
|
||||
) {
|
||||
const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: nextMsg.toolCallId,
|
||||
content: convertContentBlocks(nextMsg.content),
|
||||
is_error: nextMsg.isError,
|
||||
});
|
||||
j++;
|
||||
}
|
||||
|
||||
// Skip the messages we've already processed
|
||||
i = j - 1;
|
||||
|
||||
// Add a single user message with all tool results
|
||||
params.push({
|
||||
role: "user",
|
||||
content: toolResults,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add cache_control to the last user message to cache conversation history
|
||||
if (cacheControl && params.length > 0) {
|
||||
const lastMessage = params[params.length - 1];
|
||||
if (lastMessage.role === "user") {
|
||||
if (Array.isArray(lastMessage.content)) {
|
||||
const lastBlock = lastMessage.content[lastMessage.content.length - 1];
|
||||
if (
|
||||
lastBlock &&
|
||||
(lastBlock.type === "text" ||
|
||||
lastBlock.type === "image" ||
|
||||
lastBlock.type === "tool_result")
|
||||
) {
|
||||
(lastBlock as any).cache_control = cacheControl;
|
||||
}
|
||||
} else if (typeof lastMessage.content === "string") {
|
||||
lastMessage.content = [
|
||||
{
|
||||
type: "text",
|
||||
text: lastMessage.content,
|
||||
cache_control: cacheControl,
|
||||
},
|
||||
] as any;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
function convertTools(
|
||||
tools: Tool[],
|
||||
isOAuthToken: boolean,
|
||||
): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => {
|
||||
const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
|
||||
|
||||
return {
|
||||
name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: jsonSchema.properties || {},
|
||||
required: jsonSchema.required || [],
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function mapStopReason(
|
||||
reason: Anthropic.Messages.StopReason | string,
|
||||
): StopReason {
|
||||
switch (reason) {
|
||||
case "end_turn":
|
||||
return "stop";
|
||||
case "max_tokens":
|
||||
return "length";
|
||||
case "tool_use":
|
||||
return "toolUse";
|
||||
case "refusal":
|
||||
return "error";
|
||||
case "pause_turn": // Stop is good enough -> resubmit
|
||||
return "stop";
|
||||
case "stop_sequence":
|
||||
return "stop"; // We don't supply stop sequences, so this should never happen
|
||||
case "sensitive": // Content flagged by safety filters (not yet in SDK types)
|
||||
return "error";
|
||||
default:
|
||||
// Handle unknown stop reasons gracefully (API may add new values)
|
||||
throw new Error(`Unhandled stop reason: ${reason}`);
|
||||
}
|
||||
}
|
||||
297
packages/ai/src/providers/azure-openai-responses.ts
Normal file
297
packages/ai/src/providers/azure-openai-responses.ts
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
import { AzureOpenAI } from "openai";
|
||||
import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { supportsXhigh } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import {
|
||||
convertResponsesMessages,
|
||||
convertResponsesTools,
|
||||
processResponsesStream,
|
||||
} from "./openai-responses-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
const DEFAULT_AZURE_API_VERSION = "v1";
|
||||
const AZURE_TOOL_CALL_PROVIDERS = new Set([
|
||||
"openai",
|
||||
"openai-codex",
|
||||
"opencode",
|
||||
"azure-openai-responses",
|
||||
]);
|
||||
|
||||
function parseDeploymentNameMap(
|
||||
value: string | undefined,
|
||||
): Map<string, string> {
|
||||
const map = new Map<string, string>();
|
||||
if (!value) return map;
|
||||
for (const entry of value.split(",")) {
|
||||
const trimmed = entry.trim();
|
||||
if (!trimmed) continue;
|
||||
const [modelId, deploymentName] = trimmed.split("=", 2);
|
||||
if (!modelId || !deploymentName) continue;
|
||||
map.set(modelId.trim(), deploymentName.trim());
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
function resolveDeploymentName(
|
||||
model: Model<"azure-openai-responses">,
|
||||
options?: AzureOpenAIResponsesOptions,
|
||||
): string {
|
||||
if (options?.azureDeploymentName) {
|
||||
return options.azureDeploymentName;
|
||||
}
|
||||
const mappedDeployment = parseDeploymentNameMap(
|
||||
process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP,
|
||||
).get(model.id);
|
||||
return mappedDeployment || model.id;
|
||||
}
|
||||
|
||||
// Azure OpenAI Responses-specific options
|
||||
export interface AzureOpenAIResponsesOptions extends StreamOptions {
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
||||
azureApiVersion?: string;
|
||||
azureResourceName?: string;
|
||||
azureBaseUrl?: string;
|
||||
azureDeploymentName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate function for Azure OpenAI Responses API
|
||||
*/
|
||||
export const streamAzureOpenAIResponses: StreamFunction<
|
||||
"azure-openai-responses",
|
||||
AzureOpenAIResponsesOptions
|
||||
> = (
|
||||
model: Model<"azure-openai-responses">,
|
||||
context: Context,
|
||||
options?: AzureOpenAIResponsesOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
// Start async processing
|
||||
(async () => {
|
||||
const deploymentName = resolveDeploymentName(model, options);
|
||||
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "azure-openai-responses" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
// Create Azure OpenAI client
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, apiKey, options);
|
||||
const params = buildParams(model, context, options, deploymentName);
|
||||
options?.onPayload?.(params);
|
||||
const openaiStream = await client.responses.create(
|
||||
params,
|
||||
options?.signal ? { signal: options.signal } : undefined,
|
||||
);
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
await processResponsesStream(openaiStream, output, stream, model);
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content)
|
||||
delete (block as { index?: number }).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleAzureOpenAIResponses: StreamFunction<
|
||||
"azure-openai-responses",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"azure-openai-responses">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model)
|
||||
? options?.reasoning
|
||||
: clampReasoning(options?.reasoning);
|
||||
|
||||
return streamAzureOpenAIResponses(model, context, {
|
||||
...base,
|
||||
reasoningEffort,
|
||||
} satisfies AzureOpenAIResponsesOptions);
|
||||
};
|
||||
|
||||
function normalizeAzureBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.replace(/\/+$/, "");
|
||||
}
|
||||
|
||||
function buildDefaultBaseUrl(resourceName: string): string {
|
||||
return `https://${resourceName}.openai.azure.com/openai/v1`;
|
||||
}
|
||||
|
||||
function resolveAzureConfig(
|
||||
model: Model<"azure-openai-responses">,
|
||||
options?: AzureOpenAIResponsesOptions,
|
||||
): { baseUrl: string; apiVersion: string } {
|
||||
const apiVersion =
|
||||
options?.azureApiVersion ||
|
||||
process.env.AZURE_OPENAI_API_VERSION ||
|
||||
DEFAULT_AZURE_API_VERSION;
|
||||
|
||||
const baseUrl =
|
||||
options?.azureBaseUrl?.trim() ||
|
||||
process.env.AZURE_OPENAI_BASE_URL?.trim() ||
|
||||
undefined;
|
||||
const resourceName =
|
||||
options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;
|
||||
|
||||
let resolvedBaseUrl = baseUrl;
|
||||
|
||||
if (!resolvedBaseUrl && resourceName) {
|
||||
resolvedBaseUrl = buildDefaultBaseUrl(resourceName);
|
||||
}
|
||||
|
||||
if (!resolvedBaseUrl && model.baseUrl) {
|
||||
resolvedBaseUrl = model.baseUrl;
|
||||
}
|
||||
|
||||
if (!resolvedBaseUrl) {
|
||||
throw new Error(
|
||||
"Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.",
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
baseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),
|
||||
apiVersion,
|
||||
};
|
||||
}
|
||||
|
||||
function createClient(
|
||||
model: Model<"azure-openai-responses">,
|
||||
apiKey: string,
|
||||
options?: AzureOpenAIResponsesOptions,
|
||||
) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.AZURE_OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.AZURE_OPENAI_API_KEY;
|
||||
}
|
||||
|
||||
const headers = { ...model.headers };
|
||||
|
||||
if (options?.headers) {
|
||||
Object.assign(headers, options.headers);
|
||||
}
|
||||
|
||||
const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
|
||||
|
||||
return new AzureOpenAI({
|
||||
apiKey,
|
||||
apiVersion,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: headers,
|
||||
baseURL: baseUrl,
|
||||
});
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"azure-openai-responses">,
|
||||
context: Context,
|
||||
options: AzureOpenAIResponsesOptions | undefined,
|
||||
deploymentName: string,
|
||||
) {
|
||||
const messages = convertResponsesMessages(
|
||||
model,
|
||||
context,
|
||||
AZURE_TOOL_CALL_PROVIDERS,
|
||||
);
|
||||
|
||||
const params: ResponseCreateParamsStreaming = {
|
||||
model: deploymentName,
|
||||
input: messages,
|
||||
stream: true,
|
||||
prompt_cache_key: options?.sessionId,
|
||||
};
|
||||
|
||||
if (options?.maxTokens) {
|
||||
params.max_output_tokens = options?.maxTokens;
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertResponsesTools(context.tools);
|
||||
}
|
||||
|
||||
if (model.reasoning) {
|
||||
if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||
params.reasoning = {
|
||||
effort: options?.reasoningEffort || "medium",
|
||||
summary: options?.reasoningSummary || "auto",
|
||||
};
|
||||
params.include = ["reasoning.encrypted_content"];
|
||||
} else {
|
||||
if (model.name.toLowerCase().startsWith("gpt-5")) {
|
||||
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
||||
messages.push({
|
||||
role: "developer",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "# Juice: 0 !important",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
37
packages/ai/src/providers/github-copilot-headers.ts
Normal file
37
packages/ai/src/providers/github-copilot-headers.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import type { Message } from "../types.js";
|
||||
|
||||
// Copilot expects X-Initiator to indicate whether the request is user-initiated
|
||||
// or agent-initiated (e.g. follow-up after assistant/tool messages).
|
||||
export function inferCopilotInitiator(messages: Message[]): "user" | "agent" {
|
||||
const last = messages[messages.length - 1];
|
||||
return last && last.role !== "user" ? "agent" : "user";
|
||||
}
|
||||
|
||||
// Copilot requires Copilot-Vision-Request header when sending images
|
||||
export function hasCopilotVisionInput(messages: Message[]): boolean {
|
||||
return messages.some((msg) => {
|
||||
if (msg.role === "user" && Array.isArray(msg.content)) {
|
||||
return msg.content.some((c) => c.type === "image");
|
||||
}
|
||||
if (msg.role === "toolResult" && Array.isArray(msg.content)) {
|
||||
return msg.content.some((c) => c.type === "image");
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
export function buildCopilotDynamicHeaders(params: {
|
||||
messages: Message[];
|
||||
hasImages: boolean;
|
||||
}): Record<string, string> {
|
||||
const headers: Record<string, string> = {
|
||||
"X-Initiator": inferCopilotInitiator(params.messages),
|
||||
"Openai-Intent": "conversation-edits",
|
||||
};
|
||||
|
||||
if (params.hasImages) {
|
||||
headers["Copilot-Vision-Request"] = "true";
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
1074
packages/ai/src/providers/google-gemini-cli.ts
Normal file
1074
packages/ai/src/providers/google-gemini-cli.ts
Normal file
File diff suppressed because it is too large
Load diff
373
packages/ai/src/providers/google-shared.ts
Normal file
373
packages/ai/src/providers/google-shared.ts
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
/**
|
||||
* Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
|
||||
*/
|
||||
|
||||
import {
|
||||
type Content,
|
||||
FinishReason,
|
||||
FunctionCallingConfigMode,
|
||||
type Part,
|
||||
} from "@google/genai";
|
||||
import type {
|
||||
Context,
|
||||
ImageContent,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
Tool,
|
||||
} from "../types.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
type GoogleApiType =
|
||||
| "google-generative-ai"
|
||||
| "google-gemini-cli"
|
||||
| "google-vertex";
|
||||
|
||||
/**
|
||||
* Determines whether a streamed Gemini `Part` should be treated as "thinking".
|
||||
*
|
||||
* Protocol note (Gemini / Vertex AI thought signatures):
|
||||
* - `thought: true` is the definitive marker for thinking content (thought summaries).
|
||||
* - `thoughtSignature` is an encrypted representation of the model's internal thought process
|
||||
* used to preserve reasoning context across multi-turn interactions.
|
||||
* - `thoughtSignature` can appear on ANY part type (text, functionCall, etc.) - it does NOT
|
||||
* indicate the part itself is thinking content.
|
||||
* - For non-functionCall responses, the signature appears on the last part for context replay.
|
||||
* - When persisting/replaying model outputs, signature-bearing parts must be preserved as-is;
|
||||
* do not merge/move signatures across parts.
|
||||
*
|
||||
* See: https://ai.google.dev/gemini-api/docs/thought-signatures
|
||||
*/
|
||||
export function isThinkingPart(
|
||||
part: Pick<Part, "thought" | "thoughtSignature">,
|
||||
): boolean {
|
||||
return part.thought === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retain thought signatures during streaming.
|
||||
*
|
||||
* Some backends only send `thoughtSignature` on the first delta for a given part/block; later deltas may omit it.
|
||||
* This helper preserves the last non-empty signature for the current block.
|
||||
*
|
||||
* Note: this does NOT merge or move signatures across distinct response parts. It only prevents
|
||||
* a signature from being overwritten with `undefined` within the same streamed block.
|
||||
*/
|
||||
export function retainThoughtSignature(
|
||||
existing: string | undefined,
|
||||
incoming: string | undefined,
|
||||
): string | undefined {
|
||||
if (typeof incoming === "string" && incoming.length > 0) return incoming;
|
||||
return existing;
|
||||
}
|
||||
|
||||
// Thought signatures must be base64 for Google APIs (TYPE_BYTES).
|
||||
const base64SignaturePattern = /^[A-Za-z0-9+/]+={0,2}$/;
|
||||
|
||||
// Sentinel value that tells the Gemini API to skip thought signature validation.
|
||||
// Used for unsigned function call parts (e.g. replayed from providers without thought signatures).
|
||||
// See: https://ai.google.dev/gemini-api/docs/thought-signatures
|
||||
const SKIP_THOUGHT_SIGNATURE = "skip_thought_signature_validator";
|
||||
|
||||
function isValidThoughtSignature(signature: string | undefined): boolean {
|
||||
if (!signature) return false;
|
||||
if (signature.length % 4 !== 0) return false;
|
||||
return base64SignaturePattern.test(signature);
|
||||
}
|
||||
|
||||
/**
|
||||
* Only keep signatures from the same provider/model and with valid base64.
|
||||
*/
|
||||
function resolveThoughtSignature(
|
||||
isSameProviderAndModel: boolean,
|
||||
signature: string | undefined,
|
||||
): string | undefined {
|
||||
return isSameProviderAndModel && isValidThoughtSignature(signature)
|
||||
? signature
|
||||
: undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Models via Google APIs that require explicit tool call IDs in function calls/responses.
|
||||
*/
|
||||
export function requiresToolCallId(modelId: string): boolean {
|
||||
return modelId.startsWith("claude-") || modelId.startsWith("gpt-oss-");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert internal messages to Gemini Content[] format.
|
||||
*/
|
||||
export function convertMessages<T extends GoogleApiType>(
|
||||
model: Model<T>,
|
||||
context: Context,
|
||||
): Content[] {
|
||||
const contents: Content[] = [];
|
||||
const normalizeToolCallId = (id: string): string => {
|
||||
if (!requiresToolCallId(model.id)) return id;
|
||||
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
||||
};
|
||||
|
||||
const transformedMessages = transformMessages(
|
||||
context.messages,
|
||||
model,
|
||||
normalizeToolCallId,
|
||||
);
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [{ text: sanitizeSurrogates(msg.content) }],
|
||||
});
|
||||
} else {
|
||||
const parts: Part[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return { text: sanitizeSurrogates(item.text) };
|
||||
} else {
|
||||
return {
|
||||
inlineData: {
|
||||
mimeType: item.mimeType,
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
const filteredParts = !model.input.includes("image")
|
||||
? parts.filter((p) => p.text !== undefined)
|
||||
: parts;
|
||||
if (filteredParts.length === 0) continue;
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: filteredParts,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const parts: Part[] = [];
|
||||
// Check if message is from same provider and model - only then keep thinking blocks
|
||||
const isSameProviderAndModel =
|
||||
msg.provider === model.provider && msg.model === model.id;
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
// Skip empty text blocks - they can cause issues with some models (e.g. Claude via Antigravity)
|
||||
if (!block.text || block.text.trim() === "") continue;
|
||||
const thoughtSignature = resolveThoughtSignature(
|
||||
isSameProviderAndModel,
|
||||
block.textSignature,
|
||||
);
|
||||
parts.push({
|
||||
text: sanitizeSurrogates(block.text),
|
||||
...(thoughtSignature && { thoughtSignature }),
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
// Skip empty thinking blocks
|
||||
if (!block.thinking || block.thinking.trim() === "") continue;
|
||||
// Only keep as thinking block if same provider AND same model
|
||||
// Otherwise convert to plain text (no tags to avoid model mimicking them)
|
||||
if (isSameProviderAndModel) {
|
||||
const thoughtSignature = resolveThoughtSignature(
|
||||
isSameProviderAndModel,
|
||||
block.thinkingSignature,
|
||||
);
|
||||
parts.push({
|
||||
thought: true,
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
...(thoughtSignature && { thoughtSignature }),
|
||||
});
|
||||
} else {
|
||||
parts.push({
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
});
|
||||
}
|
||||
} else if (block.type === "toolCall") {
|
||||
const thoughtSignature = resolveThoughtSignature(
|
||||
isSameProviderAndModel,
|
||||
block.thoughtSignature,
|
||||
);
|
||||
// Gemini 3 requires thoughtSignature on all function calls when thinking mode is enabled.
|
||||
// Use the skip_thought_signature_validator sentinel for unsigned function calls
|
||||
// (e.g. replayed from providers without thought signatures like Claude via Antigravity).
|
||||
const isGemini3 = model.id.toLowerCase().includes("gemini-3");
|
||||
const effectiveSignature =
|
||||
thoughtSignature ||
|
||||
(isGemini3 ? SKIP_THOUGHT_SIGNATURE : undefined);
|
||||
const part: Part = {
|
||||
functionCall: {
|
||||
name: block.name,
|
||||
args: block.arguments ?? {},
|
||||
...(requiresToolCallId(model.id) ? { id: block.id } : {}),
|
||||
},
|
||||
...(effectiveSignature && { thoughtSignature: effectiveSignature }),
|
||||
};
|
||||
parts.push(part);
|
||||
}
|
||||
}
|
||||
|
||||
if (parts.length === 0) continue;
|
||||
contents.push({
|
||||
role: "model",
|
||||
parts,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Extract text and image content
|
||||
const textContent = msg.content.filter(
|
||||
(c): c is TextContent => c.type === "text",
|
||||
);
|
||||
const textResult = textContent.map((c) => c.text).join("\n");
|
||||
const imageContent = model.input.includes("image")
|
||||
? msg.content.filter((c): c is ImageContent => c.type === "image")
|
||||
: [];
|
||||
|
||||
const hasText = textResult.length > 0;
|
||||
const hasImages = imageContent.length > 0;
|
||||
|
||||
// Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts
|
||||
// See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
|
||||
// Older models don't support this, so we put images in a separate user message.
|
||||
const supportsMultimodalFunctionResponse = model.id.includes("gemini-3");
|
||||
|
||||
// Use "output" key for success, "error" key for errors as per SDK documentation
|
||||
const responseValue = hasText
|
||||
? sanitizeSurrogates(textResult)
|
||||
: hasImages
|
||||
? "(see attached image)"
|
||||
: "";
|
||||
|
||||
const imageParts: Part[] = imageContent.map((imageBlock) => ({
|
||||
inlineData: {
|
||||
mimeType: imageBlock.mimeType,
|
||||
data: imageBlock.data,
|
||||
},
|
||||
}));
|
||||
|
||||
const includeId = requiresToolCallId(model.id);
|
||||
const functionResponsePart: Part = {
|
||||
functionResponse: {
|
||||
name: msg.toolName,
|
||||
response: msg.isError
|
||||
? { error: responseValue }
|
||||
: { output: responseValue },
|
||||
// Nest images inside functionResponse.parts for Gemini 3
|
||||
...(hasImages &&
|
||||
supportsMultimodalFunctionResponse && { parts: imageParts }),
|
||||
...(includeId ? { id: msg.toolCallId } : {}),
|
||||
},
|
||||
};
|
||||
|
||||
// Cloud Code Assist API requires all function responses to be in a single user turn.
|
||||
// Check if the last content is already a user turn with function responses and merge.
|
||||
const lastContent = contents[contents.length - 1];
|
||||
if (
|
||||
lastContent?.role === "user" &&
|
||||
lastContent.parts?.some((p) => p.functionResponse)
|
||||
) {
|
||||
lastContent.parts.push(functionResponsePart);
|
||||
} else {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [functionResponsePart],
|
||||
});
|
||||
}
|
||||
|
||||
// For older models, add images in a separate user message
|
||||
if (hasImages && !supportsMultimodalFunctionResponse) {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [{ text: "Tool result image:" }, ...imageParts],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return contents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert tools to Gemini function declarations format.
|
||||
*
|
||||
* By default uses `parametersJsonSchema` which supports full JSON Schema (including
|
||||
* anyOf, oneOf, const, etc.). Set `useParameters` to true to use the legacy `parameters`
|
||||
* field instead (OpenAPI 3.03 Schema). This is needed for Cloud Code Assist with Claude
|
||||
* models, where the API translates `parameters` into Anthropic's `input_schema`.
|
||||
*/
|
||||
export function convertTools(
|
||||
tools: Tool[],
|
||||
useParameters = false,
|
||||
): { functionDeclarations: Record<string, unknown>[] }[] | undefined {
|
||||
if (tools.length === 0) return undefined;
|
||||
return [
|
||||
{
|
||||
functionDeclarations: tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
...(useParameters
|
||||
? { parameters: tool.parameters }
|
||||
: { parametersJsonSchema: tool.parameters }),
|
||||
})),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Map tool choice string to Gemini FunctionCallingConfigMode.
|
||||
*/
|
||||
export function mapToolChoice(choice: string): FunctionCallingConfigMode {
|
||||
switch (choice) {
|
||||
case "auto":
|
||||
return FunctionCallingConfigMode.AUTO;
|
||||
case "none":
|
||||
return FunctionCallingConfigMode.NONE;
|
||||
case "any":
|
||||
return FunctionCallingConfigMode.ANY;
|
||||
default:
|
||||
return FunctionCallingConfigMode.AUTO;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map Gemini FinishReason to our StopReason.
|
||||
*/
|
||||
export function mapStopReason(reason: FinishReason): StopReason {
|
||||
switch (reason) {
|
||||
case FinishReason.STOP:
|
||||
return "stop";
|
||||
case FinishReason.MAX_TOKENS:
|
||||
return "length";
|
||||
case FinishReason.BLOCKLIST:
|
||||
case FinishReason.PROHIBITED_CONTENT:
|
||||
case FinishReason.SPII:
|
||||
case FinishReason.SAFETY:
|
||||
case FinishReason.IMAGE_SAFETY:
|
||||
case FinishReason.IMAGE_PROHIBITED_CONTENT:
|
||||
case FinishReason.IMAGE_RECITATION:
|
||||
case FinishReason.IMAGE_OTHER:
|
||||
case FinishReason.RECITATION:
|
||||
case FinishReason.FINISH_REASON_UNSPECIFIED:
|
||||
case FinishReason.OTHER:
|
||||
case FinishReason.LANGUAGE:
|
||||
case FinishReason.MALFORMED_FUNCTION_CALL:
|
||||
case FinishReason.UNEXPECTED_TOOL_CALL:
|
||||
case FinishReason.NO_IMAGE:
|
||||
return "error";
|
||||
default: {
|
||||
const _exhaustive: never = reason;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map string finish reason to our StopReason (for raw API responses).
|
||||
*/
|
||||
export function mapStopReasonString(reason: string): StopReason {
|
||||
switch (reason) {
|
||||
case "STOP":
|
||||
return "stop";
|
||||
case "MAX_TOKENS":
|
||||
return "length";
|
||||
default:
|
||||
return "error";
|
||||
}
|
||||
}
|
||||
529
packages/ai/src/providers/google-vertex.ts
Normal file
529
packages/ai/src/providers/google-vertex.ts
Normal file
|
|
@ -0,0 +1,529 @@
|
|||
import {
|
||||
type GenerateContentConfig,
|
||||
type GenerateContentParameters,
|
||||
GoogleGenAI,
|
||||
type ThinkingConfig,
|
||||
ThinkingLevel,
|
||||
} from "@google/genai";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
Model,
|
||||
ThinkingLevel as PiThinkingLevel,
|
||||
SimpleStreamOptions,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingBudgets,
|
||||
ThinkingContent,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import type { GoogleThinkingLevel } from "./google-gemini-cli.js";
|
||||
import {
|
||||
convertMessages,
|
||||
convertTools,
|
||||
isThinkingPart,
|
||||
mapStopReason,
|
||||
mapToolChoice,
|
||||
retainThoughtSignature,
|
||||
} from "./google-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
export interface GoogleVertexOptions extends StreamOptions {
|
||||
toolChoice?: "auto" | "none" | "any";
|
||||
thinking?: {
|
||||
enabled: boolean;
|
||||
budgetTokens?: number; // -1 for dynamic, 0 to disable
|
||||
level?: GoogleThinkingLevel;
|
||||
};
|
||||
project?: string;
|
||||
location?: string;
|
||||
}
|
||||
|
||||
const API_VERSION = "v1";
|
||||
|
||||
const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, ThinkingLevel> = {
|
||||
THINKING_LEVEL_UNSPECIFIED: ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
|
||||
MINIMAL: ThinkingLevel.MINIMAL,
|
||||
LOW: ThinkingLevel.LOW,
|
||||
MEDIUM: ThinkingLevel.MEDIUM,
|
||||
HIGH: ThinkingLevel.HIGH,
|
||||
};
|
||||
|
||||
// Counter for generating unique tool call IDs
|
||||
let toolCallCounter = 0;
|
||||
|
||||
export const streamGoogleVertex: StreamFunction<
|
||||
"google-vertex",
|
||||
GoogleVertexOptions
|
||||
> = (
|
||||
model: Model<"google-vertex">,
|
||||
context: Context,
|
||||
options?: GoogleVertexOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "google-vertex" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
const project = resolveProject(options);
|
||||
const location = resolveLocation(options);
|
||||
const client = createClient(model, project, location, options?.headers);
|
||||
const params = buildParams(model, context, options);
|
||||
options?.onPayload?.(params);
|
||||
const googleStream = await client.models.generateContentStream(params);
|
||||
|
||||
stream.push({ type: "start", partial: output });
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
for await (const chunk of googleStream) {
|
||||
const candidate = chunk.candidates?.[0];
|
||||
if (candidate?.content?.parts) {
|
||||
for (const part of candidate.content.parts) {
|
||||
if (part.text !== undefined) {
|
||||
const isThinking = isThinkingPart(part);
|
||||
if (
|
||||
!currentBlock ||
|
||||
(isThinking && currentBlock.type !== "thinking") ||
|
||||
(!isThinking && currentBlock.type !== "text")
|
||||
) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blocks.length - 1,
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (isThinking) {
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: undefined,
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += part.text;
|
||||
currentBlock.thinkingSignature = retainThoughtSignature(
|
||||
currentBlock.thinkingSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock.text += part.text;
|
||||
currentBlock.textSignature = retainThoughtSignature(
|
||||
currentBlock.textSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (part.functionCall) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
|
||||
const providedId = part.functionCall.id;
|
||||
const needsNewId =
|
||||
!providedId ||
|
||||
output.content.some(
|
||||
(b) => b.type === "toolCall" && b.id === providedId,
|
||||
);
|
||||
const toolCallId = needsNewId
|
||||
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
||||
: providedId;
|
||||
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: toolCallId,
|
||||
name: part.functionCall.name || "",
|
||||
arguments:
|
||||
(part.functionCall.args as Record<string, any>) ?? {},
|
||||
...(part.thoughtSignature && {
|
||||
thoughtSignature: part.thoughtSignature,
|
||||
}),
|
||||
};
|
||||
|
||||
output.content.push(toolCall);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: JSON.stringify(toolCall.arguments),
|
||||
partial: output,
|
||||
});
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: blockIndex(),
|
||||
toolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (candidate?.finishReason) {
|
||||
output.stopReason = mapStopReason(candidate.finishReason);
|
||||
if (output.content.some((b) => b.type === "toolCall")) {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk.usageMetadata) {
|
||||
output.usage = {
|
||||
input: chunk.usageMetadata.promptTokenCount || 0,
|
||||
output:
|
||||
(chunk.usageMetadata.candidatesTokenCount || 0) +
|
||||
(chunk.usageMetadata.thoughtsTokenCount || 0),
|
||||
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
// Remove internal index property used during streaming
|
||||
for (const block of output.content) {
|
||||
if ("index" in block) {
|
||||
delete (block as { index?: number }).index;
|
||||
}
|
||||
}
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleGoogleVertex: StreamFunction<
|
||||
"google-vertex",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"google-vertex">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const base = buildBaseOptions(model, options, undefined);
|
||||
if (!options?.reasoning) {
|
||||
return streamGoogleVertex(model, context, {
|
||||
...base,
|
||||
thinking: { enabled: false },
|
||||
} satisfies GoogleVertexOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(options.reasoning)!;
|
||||
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
||||
|
||||
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
||||
return streamGoogleVertex(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
level: getGemini3ThinkingLevel(effort, geminiModel),
|
||||
},
|
||||
} satisfies GoogleVertexOptions);
|
||||
}
|
||||
|
||||
return streamGoogleVertex(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: getGoogleBudget(
|
||||
geminiModel,
|
||||
effort,
|
||||
options.thinkingBudgets,
|
||||
),
|
||||
},
|
||||
} satisfies GoogleVertexOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
model: Model<"google-vertex">,
|
||||
project: string,
|
||||
location: string,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
): GoogleGenAI {
|
||||
const httpOptions: { headers?: Record<string, string> } = {};
|
||||
|
||||
if (model.headers || optionsHeaders) {
|
||||
httpOptions.headers = { ...model.headers, ...optionsHeaders };
|
||||
}
|
||||
|
||||
const hasHttpOptions = Object.values(httpOptions).some(Boolean);
|
||||
|
||||
return new GoogleGenAI({
|
||||
vertexai: true,
|
||||
project,
|
||||
location,
|
||||
apiVersion: API_VERSION,
|
||||
httpOptions: hasHttpOptions ? httpOptions : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
function resolveProject(options?: GoogleVertexOptions): string {
|
||||
const project =
|
||||
options?.project ||
|
||||
process.env.GOOGLE_CLOUD_PROJECT ||
|
||||
process.env.GCLOUD_PROJECT;
|
||||
if (!project) {
|
||||
throw new Error(
|
||||
"Vertex AI requires a project ID. Set GOOGLE_CLOUD_PROJECT/GCLOUD_PROJECT or pass project in options.",
|
||||
);
|
||||
}
|
||||
return project;
|
||||
}
|
||||
|
||||
function resolveLocation(options?: GoogleVertexOptions): string {
|
||||
const location = options?.location || process.env.GOOGLE_CLOUD_LOCATION;
|
||||
if (!location) {
|
||||
throw new Error(
|
||||
"Vertex AI requires a location. Set GOOGLE_CLOUD_LOCATION or pass location in options.",
|
||||
);
|
||||
}
|
||||
return location;
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"google-vertex">,
|
||||
context: Context,
|
||||
options: GoogleVertexOptions = {},
|
||||
): GenerateContentParameters {
|
||||
const contents = convertMessages(model, context);
|
||||
|
||||
const generationConfig: GenerateContentConfig = {};
|
||||
if (options.temperature !== undefined) {
|
||||
generationConfig.temperature = options.temperature;
|
||||
}
|
||||
if (options.maxTokens !== undefined) {
|
||||
generationConfig.maxOutputTokens = options.maxTokens;
|
||||
}
|
||||
|
||||
const config: GenerateContentConfig = {
|
||||
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
||||
...(context.systemPrompt && {
|
||||
systemInstruction: sanitizeSurrogates(context.systemPrompt),
|
||||
}),
|
||||
...(context.tools &&
|
||||
context.tools.length > 0 && { tools: convertTools(context.tools) }),
|
||||
};
|
||||
|
||||
if (context.tools && context.tools.length > 0 && options.toolChoice) {
|
||||
config.toolConfig = {
|
||||
functionCallingConfig: {
|
||||
mode: mapToolChoice(options.toolChoice),
|
||||
},
|
||||
};
|
||||
} else {
|
||||
config.toolConfig = undefined;
|
||||
}
|
||||
|
||||
if (options.thinking?.enabled && model.reasoning) {
|
||||
const thinkingConfig: ThinkingConfig = { includeThoughts: true };
|
||||
if (options.thinking.level !== undefined) {
|
||||
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level];
|
||||
} else if (options.thinking.budgetTokens !== undefined) {
|
||||
thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
|
||||
}
|
||||
config.thinkingConfig = thinkingConfig;
|
||||
}
|
||||
|
||||
if (options.signal) {
|
||||
if (options.signal.aborted) {
|
||||
throw new Error("Request aborted");
|
||||
}
|
||||
config.abortSignal = options.signal;
|
||||
}
|
||||
|
||||
const params: GenerateContentParameters = {
|
||||
model: model.id,
|
||||
contents,
|
||||
config,
|
||||
};
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
type ClampedThinkingLevel = Exclude<PiThinkingLevel, "xhigh">;
|
||||
|
||||
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
|
||||
return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase());
|
||||
}
|
||||
|
||||
function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
|
||||
return /gemini-3(?:\.\d+)?-flash/.test(model.id.toLowerCase());
|
||||
}
|
||||
|
||||
function getGemini3ThinkingLevel(
|
||||
effort: ClampedThinkingLevel,
|
||||
model: Model<"google-generative-ai">,
|
||||
): GoogleThinkingLevel {
|
||||
if (isGemini3ProModel(model)) {
|
||||
switch (effort) {
|
||||
case "minimal":
|
||||
case "low":
|
||||
return "LOW";
|
||||
case "medium":
|
||||
case "high":
|
||||
return "HIGH";
|
||||
}
|
||||
}
|
||||
switch (effort) {
|
||||
case "minimal":
|
||||
return "MINIMAL";
|
||||
case "low":
|
||||
return "LOW";
|
||||
case "medium":
|
||||
return "MEDIUM";
|
||||
case "high":
|
||||
return "HIGH";
|
||||
}
|
||||
}
|
||||
|
||||
function getGoogleBudget(
|
||||
model: Model<"google-generative-ai">,
|
||||
effort: ClampedThinkingLevel,
|
||||
customBudgets?: ThinkingBudgets,
|
||||
): number {
|
||||
if (customBudgets?.[effort] !== undefined) {
|
||||
return customBudgets[effort]!;
|
||||
}
|
||||
|
||||
if (model.id.includes("2.5-pro")) {
|
||||
const budgets: Record<ClampedThinkingLevel, number> = {
|
||||
minimal: 128,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 32768,
|
||||
};
|
||||
return budgets[effort];
|
||||
}
|
||||
|
||||
if (model.id.includes("2.5-flash")) {
|
||||
const budgets: Record<ClampedThinkingLevel, number> = {
|
||||
minimal: 128,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 24576,
|
||||
};
|
||||
return budgets[effort];
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
501
packages/ai/src/providers/google.ts
Normal file
501
packages/ai/src/providers/google.ts
Normal file
|
|
@ -0,0 +1,501 @@
|
|||
import {
|
||||
type GenerateContentConfig,
|
||||
type GenerateContentParameters,
|
||||
GoogleGenAI,
|
||||
type ThinkingConfig,
|
||||
} from "@google/genai";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingBudgets,
|
||||
ThinkingContent,
|
||||
ThinkingLevel,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import type { GoogleThinkingLevel } from "./google-gemini-cli.js";
|
||||
import {
|
||||
convertMessages,
|
||||
convertTools,
|
||||
isThinkingPart,
|
||||
mapStopReason,
|
||||
mapToolChoice,
|
||||
retainThoughtSignature,
|
||||
} from "./google-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
export interface GoogleOptions extends StreamOptions {
|
||||
toolChoice?: "auto" | "none" | "any";
|
||||
thinking?: {
|
||||
enabled: boolean;
|
||||
budgetTokens?: number; // -1 for dynamic, 0 to disable
|
||||
level?: GoogleThinkingLevel;
|
||||
};
|
||||
}
|
||||
|
||||
// Counter for generating unique tool call IDs
|
||||
let toolCallCounter = 0;
|
||||
|
||||
export const streamGoogle: StreamFunction<
|
||||
"google-generative-ai",
|
||||
GoogleOptions
|
||||
> = (
|
||||
model: Model<"google-generative-ai">,
|
||||
context: Context,
|
||||
options?: GoogleOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "google-generative-ai" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, apiKey, options?.headers);
|
||||
const params = buildParams(model, context, options);
|
||||
options?.onPayload?.(params);
|
||||
const googleStream = await client.models.generateContentStream(params);
|
||||
|
||||
stream.push({ type: "start", partial: output });
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
for await (const chunk of googleStream) {
|
||||
const candidate = chunk.candidates?.[0];
|
||||
if (candidate?.content?.parts) {
|
||||
for (const part of candidate.content.parts) {
|
||||
if (part.text !== undefined) {
|
||||
const isThinking = isThinkingPart(part);
|
||||
if (
|
||||
!currentBlock ||
|
||||
(isThinking && currentBlock.type !== "thinking") ||
|
||||
(!isThinking && currentBlock.type !== "text")
|
||||
) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blocks.length - 1,
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (isThinking) {
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: undefined,
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += part.text;
|
||||
currentBlock.thinkingSignature = retainThoughtSignature(
|
||||
currentBlock.thinkingSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock.text += part.text;
|
||||
currentBlock.textSignature = retainThoughtSignature(
|
||||
currentBlock.textSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (part.functionCall) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
|
||||
// Generate unique ID if not provided or if it's a duplicate
|
||||
const providedId = part.functionCall.id;
|
||||
const needsNewId =
|
||||
!providedId ||
|
||||
output.content.some(
|
||||
(b) => b.type === "toolCall" && b.id === providedId,
|
||||
);
|
||||
const toolCallId = needsNewId
|
||||
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
||||
: providedId;
|
||||
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: toolCallId,
|
||||
name: part.functionCall.name || "",
|
||||
arguments:
|
||||
(part.functionCall.args as Record<string, any>) ?? {},
|
||||
...(part.thoughtSignature && {
|
||||
thoughtSignature: part.thoughtSignature,
|
||||
}),
|
||||
};
|
||||
|
||||
output.content.push(toolCall);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: JSON.stringify(toolCall.arguments),
|
||||
partial: output,
|
||||
});
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: blockIndex(),
|
||||
toolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (candidate?.finishReason) {
|
||||
output.stopReason = mapStopReason(candidate.finishReason);
|
||||
if (output.content.some((b) => b.type === "toolCall")) {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk.usageMetadata) {
|
||||
output.usage = {
|
||||
input: chunk.usageMetadata.promptTokenCount || 0,
|
||||
output:
|
||||
(chunk.usageMetadata.candidatesTokenCount || 0) +
|
||||
(chunk.usageMetadata.thoughtsTokenCount || 0),
|
||||
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
// Remove internal index property used during streaming
|
||||
for (const block of output.content) {
|
||||
if ("index" in block) {
|
||||
delete (block as { index?: number }).index;
|
||||
}
|
||||
}
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleGoogle: StreamFunction<
|
||||
"google-generative-ai",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"google-generative-ai">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
if (!options?.reasoning) {
|
||||
return streamGoogle(model, context, {
|
||||
...base,
|
||||
thinking: { enabled: false },
|
||||
} satisfies GoogleOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(options.reasoning)!;
|
||||
const googleModel = model as Model<"google-generative-ai">;
|
||||
|
||||
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
||||
return streamGoogle(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
level: getGemini3ThinkingLevel(effort, googleModel),
|
||||
},
|
||||
} satisfies GoogleOptions);
|
||||
}
|
||||
|
||||
return streamGoogle(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: getGoogleBudget(
|
||||
googleModel,
|
||||
effort,
|
||||
options.thinkingBudgets,
|
||||
),
|
||||
},
|
||||
} satisfies GoogleOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
model: Model<"google-generative-ai">,
|
||||
apiKey?: string,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
): GoogleGenAI {
|
||||
const httpOptions: {
|
||||
baseUrl?: string;
|
||||
apiVersion?: string;
|
||||
headers?: Record<string, string>;
|
||||
} = {};
|
||||
if (model.baseUrl) {
|
||||
httpOptions.baseUrl = model.baseUrl;
|
||||
httpOptions.apiVersion = ""; // baseUrl already includes version path, don't append
|
||||
}
|
||||
if (model.headers || optionsHeaders) {
|
||||
httpOptions.headers = { ...model.headers, ...optionsHeaders };
|
||||
}
|
||||
|
||||
return new GoogleGenAI({
|
||||
apiKey,
|
||||
httpOptions: Object.keys(httpOptions).length > 0 ? httpOptions : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"google-generative-ai">,
|
||||
context: Context,
|
||||
options: GoogleOptions = {},
|
||||
): GenerateContentParameters {
|
||||
const contents = convertMessages(model, context);
|
||||
|
||||
const generationConfig: GenerateContentConfig = {};
|
||||
if (options.temperature !== undefined) {
|
||||
generationConfig.temperature = options.temperature;
|
||||
}
|
||||
if (options.maxTokens !== undefined) {
|
||||
generationConfig.maxOutputTokens = options.maxTokens;
|
||||
}
|
||||
|
||||
const config: GenerateContentConfig = {
|
||||
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
||||
...(context.systemPrompt && {
|
||||
systemInstruction: sanitizeSurrogates(context.systemPrompt),
|
||||
}),
|
||||
...(context.tools &&
|
||||
context.tools.length > 0 && { tools: convertTools(context.tools) }),
|
||||
};
|
||||
|
||||
if (context.tools && context.tools.length > 0 && options.toolChoice) {
|
||||
config.toolConfig = {
|
||||
functionCallingConfig: {
|
||||
mode: mapToolChoice(options.toolChoice),
|
||||
},
|
||||
};
|
||||
} else {
|
||||
config.toolConfig = undefined;
|
||||
}
|
||||
|
||||
if (options.thinking?.enabled && model.reasoning) {
|
||||
const thinkingConfig: ThinkingConfig = { includeThoughts: true };
|
||||
if (options.thinking.level !== undefined) {
|
||||
// Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values
|
||||
thinkingConfig.thinkingLevel = options.thinking.level as any;
|
||||
} else if (options.thinking.budgetTokens !== undefined) {
|
||||
thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
|
||||
}
|
||||
config.thinkingConfig = thinkingConfig;
|
||||
}
|
||||
|
||||
if (options.signal) {
|
||||
if (options.signal.aborted) {
|
||||
throw new Error("Request aborted");
|
||||
}
|
||||
config.abortSignal = options.signal;
|
||||
}
|
||||
|
||||
const params: GenerateContentParameters = {
|
||||
model: model.id,
|
||||
contents,
|
||||
config,
|
||||
};
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
|
||||
|
||||
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
|
||||
return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase());
|
||||
}
|
||||
|
||||
function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
|
||||
return /gemini-3(?:\.\d+)?-flash/.test(model.id.toLowerCase());
|
||||
}
|
||||
|
||||
function getGemini3ThinkingLevel(
|
||||
effort: ClampedThinkingLevel,
|
||||
model: Model<"google-generative-ai">,
|
||||
): GoogleThinkingLevel {
|
||||
if (isGemini3ProModel(model)) {
|
||||
switch (effort) {
|
||||
case "minimal":
|
||||
case "low":
|
||||
return "LOW";
|
||||
case "medium":
|
||||
case "high":
|
||||
return "HIGH";
|
||||
}
|
||||
}
|
||||
switch (effort) {
|
||||
case "minimal":
|
||||
return "MINIMAL";
|
||||
case "low":
|
||||
return "LOW";
|
||||
case "medium":
|
||||
return "MEDIUM";
|
||||
case "high":
|
||||
return "HIGH";
|
||||
}
|
||||
}
|
||||
|
||||
function getGoogleBudget(
|
||||
model: Model<"google-generative-ai">,
|
||||
effort: ClampedThinkingLevel,
|
||||
customBudgets?: ThinkingBudgets,
|
||||
): number {
|
||||
if (customBudgets?.[effort] !== undefined) {
|
||||
return customBudgets[effort]!;
|
||||
}
|
||||
|
||||
if (model.id.includes("2.5-pro")) {
|
||||
const budgets: Record<ClampedThinkingLevel, number> = {
|
||||
minimal: 128,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 32768,
|
||||
};
|
||||
return budgets[effort];
|
||||
}
|
||||
|
||||
if (model.id.includes("2.5-flash")) {
|
||||
const budgets: Record<ClampedThinkingLevel, number> = {
|
||||
minimal: 128,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 24576,
|
||||
};
|
||||
return budgets[effort];
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
688
packages/ai/src/providers/mistral.ts
Normal file
688
packages/ai/src/providers/mistral.ts
Normal file
|
|
@ -0,0 +1,688 @@
|
|||
import { Mistral } from "@mistralai/mistralai";
|
||||
import type { RequestOptions } from "@mistralai/mistralai/lib/sdks.js";
|
||||
import type {
|
||||
ChatCompletionStreamRequest,
|
||||
ChatCompletionStreamRequestMessages,
|
||||
CompletionEvent,
|
||||
ContentChunk,
|
||||
FunctionTool,
|
||||
} from "@mistralai/mistralai/models/components/index.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
Context,
|
||||
Message,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StopReason,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { shortHash } from "../utils/hash.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
|
||||
const MAX_MISTRAL_ERROR_BODY_CHARS = 4000;
|
||||
|
||||
/**
|
||||
* Provider-specific options for the Mistral API.
|
||||
*/
|
||||
export interface MistralOptions extends StreamOptions {
|
||||
toolChoice?:
|
||||
| "auto"
|
||||
| "none"
|
||||
| "any"
|
||||
| "required"
|
||||
| { type: "function"; function: { name: string } };
|
||||
promptMode?: "reasoning";
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream responses from Mistral using `chat.stream`.
|
||||
*/
|
||||
export const streamMistral: StreamFunction<
|
||||
"mistral-conversations",
|
||||
MistralOptions
|
||||
> = (
|
||||
model: Model<"mistral-conversations">,
|
||||
context: Context,
|
||||
options?: MistralOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output = createOutput(model);
|
||||
|
||||
try {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
// Intentionally per-request: avoids shared SDK mutable state across concurrent consumers.
|
||||
const mistral = new Mistral({
|
||||
apiKey,
|
||||
serverURL: model.baseUrl,
|
||||
});
|
||||
|
||||
const normalizeMistralToolCallId = createMistralToolCallIdNormalizer();
|
||||
const transformedMessages = transformMessages(
|
||||
context.messages,
|
||||
model,
|
||||
(id) => normalizeMistralToolCallId(id),
|
||||
);
|
||||
|
||||
const payload = buildChatPayload(
|
||||
model,
|
||||
context,
|
||||
transformedMessages,
|
||||
options,
|
||||
);
|
||||
options?.onPayload?.(payload);
|
||||
const mistralStream = await mistral.chat.stream(
|
||||
payload,
|
||||
buildRequestOptions(model, options),
|
||||
);
|
||||
stream.push({ type: "start", partial: output });
|
||||
await consumeChatStream(model, output, stream, mistralStream);
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage = formatMistralError(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
/**
|
||||
* Maps provider-agnostic `SimpleStreamOptions` to Mistral options.
|
||||
*/
|
||||
export const streamSimpleMistral: StreamFunction<
|
||||
"mistral-conversations",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"mistral-conversations">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoning = clampReasoning(options?.reasoning);
|
||||
|
||||
return streamMistral(model, context, {
|
||||
...base,
|
||||
promptMode: model.reasoning && reasoning ? "reasoning" : undefined,
|
||||
} satisfies MistralOptions);
|
||||
};
|
||||
|
||||
function createOutput(model: Model<"mistral-conversations">): AssistantMessage {
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: model.api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function createMistralToolCallIdNormalizer(): (id: string) => string {
|
||||
const idMap = new Map<string, string>();
|
||||
const reverseMap = new Map<string, string>();
|
||||
|
||||
return (id: string): string => {
|
||||
const existing = idMap.get(id);
|
||||
if (existing) return existing;
|
||||
|
||||
let attempt = 0;
|
||||
while (true) {
|
||||
const candidate = deriveMistralToolCallId(id, attempt);
|
||||
const owner = reverseMap.get(candidate);
|
||||
if (!owner || owner === id) {
|
||||
idMap.set(id, candidate);
|
||||
reverseMap.set(candidate, id);
|
||||
return candidate;
|
||||
}
|
||||
attempt++;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function deriveMistralToolCallId(id: string, attempt: number): string {
|
||||
const normalized = id.replace(/[^a-zA-Z0-9]/g, "");
|
||||
if (attempt === 0 && normalized.length === MISTRAL_TOOL_CALL_ID_LENGTH)
|
||||
return normalized;
|
||||
const seedBase = normalized || id;
|
||||
const seed = attempt === 0 ? seedBase : `${seedBase}:${attempt}`;
|
||||
return shortHash(seed)
|
||||
.replace(/[^a-zA-Z0-9]/g, "")
|
||||
.slice(0, MISTRAL_TOOL_CALL_ID_LENGTH);
|
||||
}
|
||||
|
||||
function formatMistralError(error: unknown): string {
|
||||
if (error instanceof Error) {
|
||||
const sdkError = error as Error & { statusCode?: unknown; body?: unknown };
|
||||
const statusCode =
|
||||
typeof sdkError.statusCode === "number" ? sdkError.statusCode : undefined;
|
||||
const bodyText =
|
||||
typeof sdkError.body === "string" ? sdkError.body.trim() : undefined;
|
||||
if (statusCode !== undefined && bodyText) {
|
||||
return `Mistral API error (${statusCode}): ${truncateErrorText(bodyText, MAX_MISTRAL_ERROR_BODY_CHARS)}`;
|
||||
}
|
||||
if (statusCode !== undefined)
|
||||
return `Mistral API error (${statusCode}): ${error.message}`;
|
||||
return error.message;
|
||||
}
|
||||
return safeJsonStringify(error);
|
||||
}
|
||||
|
||||
function truncateErrorText(text: string, maxChars: number): string {
|
||||
if (text.length <= maxChars) return text;
|
||||
return `${text.slice(0, maxChars)}... [truncated ${text.length - maxChars} chars]`;
|
||||
}
|
||||
|
||||
function safeJsonStringify(value: unknown): string {
|
||||
try {
|
||||
const serialized = JSON.stringify(value);
|
||||
return serialized === undefined ? String(value) : serialized;
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
}
|
||||
|
||||
function buildRequestOptions(
|
||||
model: Model<"mistral-conversations">,
|
||||
options?: MistralOptions,
|
||||
): RequestOptions {
|
||||
const requestOptions: RequestOptions = {};
|
||||
if (options?.signal) requestOptions.signal = options.signal;
|
||||
requestOptions.retries = { strategy: "none" };
|
||||
|
||||
const headers: Record<string, string> = {};
|
||||
if (model.headers) Object.assign(headers, model.headers);
|
||||
if (options?.headers) Object.assign(headers, options.headers);
|
||||
|
||||
// Mistral infrastructure uses `x-affinity` for KV-cache reuse (prefix caching).
|
||||
// Respect explicit caller-provided header values.
|
||||
if (options?.sessionId && !headers["x-affinity"]) {
|
||||
headers["x-affinity"] = options.sessionId;
|
||||
}
|
||||
|
||||
if (Object.keys(headers).length > 0) {
|
||||
requestOptions.headers = headers;
|
||||
}
|
||||
|
||||
return requestOptions;
|
||||
}
|
||||
|
||||
function buildChatPayload(
|
||||
model: Model<"mistral-conversations">,
|
||||
context: Context,
|
||||
messages: Message[],
|
||||
options?: MistralOptions,
|
||||
): ChatCompletionStreamRequest {
|
||||
const payload: ChatCompletionStreamRequest = {
|
||||
model: model.id,
|
||||
stream: true,
|
||||
messages: toChatMessages(messages, model.input.includes("image")),
|
||||
};
|
||||
|
||||
if (context.tools?.length) payload.tools = toFunctionTools(context.tools);
|
||||
if (options?.temperature !== undefined)
|
||||
payload.temperature = options.temperature;
|
||||
if (options?.maxTokens !== undefined) payload.maxTokens = options.maxTokens;
|
||||
if (options?.toolChoice)
|
||||
payload.toolChoice = mapToolChoice(options.toolChoice);
|
||||
if (options?.promptMode) payload.promptMode = options.promptMode as any;
|
||||
|
||||
if (context.systemPrompt) {
|
||||
payload.messages.unshift({
|
||||
role: "system",
|
||||
content: sanitizeSurrogates(context.systemPrompt),
|
||||
});
|
||||
}
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
async function consumeChatStream(
|
||||
model: Model<"mistral-conversations">,
|
||||
output: AssistantMessage,
|
||||
stream: AssistantMessageEventStream,
|
||||
mistralStream: AsyncIterable<CompletionEvent>,
|
||||
): Promise<void> {
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
const toolBlocksByKey = new Map<string, number>();
|
||||
|
||||
const finishCurrentBlock = (block?: typeof currentBlock) => {
|
||||
if (!block) return;
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
for await (const event of mistralStream) {
|
||||
const chunk = event.data;
|
||||
|
||||
if (chunk.usage) {
|
||||
output.usage.input = chunk.usage.promptTokens || 0;
|
||||
output.usage.output = chunk.usage.completionTokens || 0;
|
||||
output.usage.cacheRead = 0;
|
||||
output.usage.cacheWrite = 0;
|
||||
output.usage.totalTokens =
|
||||
chunk.usage.totalTokens || output.usage.input + output.usage.output;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
|
||||
const choice = chunk.choices[0];
|
||||
if (!choice) continue;
|
||||
|
||||
if (choice.finishReason) {
|
||||
output.stopReason = mapChatStopReason(choice.finishReason);
|
||||
}
|
||||
|
||||
const delta = choice.delta;
|
||||
if (delta.content !== null && delta.content !== undefined) {
|
||||
const contentItems =
|
||||
typeof delta.content === "string" ? [delta.content] : delta.content;
|
||||
for (const item of contentItems) {
|
||||
if (typeof item === "string") {
|
||||
const textDelta = sanitizeSurrogates(item);
|
||||
if (!currentBlock || currentBlock.type !== "text") {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock.text += textDelta;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: textDelta,
|
||||
partial: output,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item.type === "thinking") {
|
||||
const deltaText = item.thinking
|
||||
.map((part) => ("text" in part ? part.text : ""))
|
||||
.filter((text) => text.length > 0)
|
||||
.join("");
|
||||
const thinkingDelta = sanitizeSurrogates(deltaText);
|
||||
if (!thinkingDelta) continue;
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = { type: "thinking", thinking: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock.thinking += thinkingDelta;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: thinkingDelta,
|
||||
partial: output,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item.type === "text") {
|
||||
const textDelta = sanitizeSurrogates(item.text);
|
||||
if (!currentBlock || currentBlock.type !== "text") {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock.text += textDelta;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: textDelta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const toolCalls = delta.toolCalls || [];
|
||||
for (const toolCall of toolCalls) {
|
||||
if (currentBlock) {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = null;
|
||||
}
|
||||
const callId =
|
||||
toolCall.id && toolCall.id !== "null"
|
||||
? toolCall.id
|
||||
: deriveMistralToolCallId(`toolcall:${toolCall.index ?? 0}`, 0);
|
||||
const key = `${callId}:${toolCall.index || 0}`;
|
||||
const existingIndex = toolBlocksByKey.get(key);
|
||||
let block: (ToolCall & { partialArgs?: string }) | undefined;
|
||||
|
||||
if (existingIndex !== undefined) {
|
||||
const existing = output.content[existingIndex];
|
||||
if (existing?.type === "toolCall") {
|
||||
block = existing as ToolCall & { partialArgs?: string };
|
||||
}
|
||||
}
|
||||
|
||||
if (!block) {
|
||||
block = {
|
||||
type: "toolCall",
|
||||
id: callId,
|
||||
name: toolCall.function.name,
|
||||
arguments: {},
|
||||
partialArgs: "",
|
||||
};
|
||||
output.content.push(block);
|
||||
toolBlocksByKey.set(key, output.content.length - 1);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: output.content.length - 1,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
|
||||
const argsDelta =
|
||||
typeof toolCall.function.arguments === "string"
|
||||
? toolCall.function.arguments
|
||||
: JSON.stringify(toolCall.function.arguments || {});
|
||||
block.partialArgs = (block.partialArgs || "") + argsDelta;
|
||||
block.arguments = parseStreamingJson<Record<string, unknown>>(
|
||||
block.partialArgs,
|
||||
);
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: toolBlocksByKey.get(key)!,
|
||||
delta: argsDelta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
finishCurrentBlock(currentBlock);
|
||||
for (const index of toolBlocksByKey.values()) {
|
||||
const block = output.content[index];
|
||||
if (block.type !== "toolCall") continue;
|
||||
const toolBlock = block as ToolCall & { partialArgs?: string };
|
||||
toolBlock.arguments = parseStreamingJson<Record<string, unknown>>(
|
||||
toolBlock.partialArgs,
|
||||
);
|
||||
delete toolBlock.partialArgs;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: index,
|
||||
toolCall: toolBlock,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function toFunctionTools(
|
||||
tools: Tool[],
|
||||
): Array<FunctionTool & { type: "function" }> {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters as unknown as Record<string, unknown>,
|
||||
strict: false,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function toChatMessages(
|
||||
messages: Message[],
|
||||
supportsImages: boolean,
|
||||
): ChatCompletionStreamRequestMessages[] {
|
||||
const result: ChatCompletionStreamRequestMessages[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
result.push({ role: "user", content: sanitizeSurrogates(msg.content) });
|
||||
continue;
|
||||
}
|
||||
const hadImages = msg.content.some((item) => item.type === "image");
|
||||
const content: ContentChunk[] = msg.content
|
||||
.filter((item) => item.type === "text" || supportsImages)
|
||||
.map((item) => {
|
||||
if (item.type === "text")
|
||||
return { type: "text", text: sanitizeSurrogates(item.text) };
|
||||
return {
|
||||
type: "image_url",
|
||||
imageUrl: `data:${item.mimeType};base64,${item.data}`,
|
||||
};
|
||||
});
|
||||
if (content.length > 0) {
|
||||
result.push({ role: "user", content });
|
||||
continue;
|
||||
}
|
||||
if (hadImages && !supportsImages) {
|
||||
result.push({
|
||||
role: "user",
|
||||
content: "(image omitted: model does not support images)",
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === "assistant") {
|
||||
const contentParts: ContentChunk[] = [];
|
||||
const toolCalls: Array<{
|
||||
id: string;
|
||||
type: "function";
|
||||
function: { name: string; arguments: string };
|
||||
}> = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
if (block.text.trim().length > 0) {
|
||||
contentParts.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.text),
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (block.type === "thinking") {
|
||||
if (block.thinking.trim().length > 0) {
|
||||
contentParts.push({
|
||||
type: "thinking",
|
||||
thinking: [
|
||||
{ type: "text", text: sanitizeSurrogates(block.thinking) },
|
||||
],
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
toolCalls.push({
|
||||
id: block.id,
|
||||
type: "function",
|
||||
function: {
|
||||
name: block.name,
|
||||
arguments: JSON.stringify(block.arguments || {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const assistantMessage: ChatCompletionStreamRequestMessages = {
|
||||
role: "assistant",
|
||||
};
|
||||
if (contentParts.length > 0) assistantMessage.content = contentParts;
|
||||
if (toolCalls.length > 0) assistantMessage.toolCalls = toolCalls;
|
||||
if (contentParts.length > 0 || toolCalls.length > 0)
|
||||
result.push(assistantMessage);
|
||||
continue;
|
||||
}
|
||||
|
||||
const toolContent: ContentChunk[] = [];
|
||||
const textResult = msg.content
|
||||
.filter((part) => part.type === "text")
|
||||
.map((part) =>
|
||||
part.type === "text" ? sanitizeSurrogates(part.text) : "",
|
||||
)
|
||||
.join("\n");
|
||||
const hasImages = msg.content.some((part) => part.type === "image");
|
||||
const toolText = buildToolResultText(
|
||||
textResult,
|
||||
hasImages,
|
||||
supportsImages,
|
||||
msg.isError,
|
||||
);
|
||||
toolContent.push({ type: "text", text: toolText });
|
||||
for (const part of msg.content) {
|
||||
if (!supportsImages) continue;
|
||||
if (part.type !== "image") continue;
|
||||
toolContent.push({
|
||||
type: "image_url",
|
||||
imageUrl: `data:${part.mimeType};base64,${part.data}`,
|
||||
});
|
||||
}
|
||||
result.push({
|
||||
role: "tool",
|
||||
toolCallId: msg.toolCallId,
|
||||
name: msg.toolName,
|
||||
content: toolContent,
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function buildToolResultText(
|
||||
text: string,
|
||||
hasImages: boolean,
|
||||
supportsImages: boolean,
|
||||
isError: boolean,
|
||||
): string {
|
||||
const trimmed = text.trim();
|
||||
const errorPrefix = isError ? "[tool error] " : "";
|
||||
|
||||
if (trimmed.length > 0) {
|
||||
const imageSuffix =
|
||||
hasImages && !supportsImages
|
||||
? "\n[tool image omitted: model does not support images]"
|
||||
: "";
|
||||
return `${errorPrefix}${trimmed}${imageSuffix}`;
|
||||
}
|
||||
|
||||
if (hasImages) {
|
||||
if (supportsImages) {
|
||||
return isError
|
||||
? "[tool error] (see attached image)"
|
||||
: "(see attached image)";
|
||||
}
|
||||
return isError
|
||||
? "[tool error] (image omitted: model does not support images)"
|
||||
: "(image omitted: model does not support images)";
|
||||
}
|
||||
|
||||
return isError ? "[tool error] (no tool output)" : "(no tool output)";
|
||||
}
|
||||
|
||||
function mapToolChoice(
|
||||
choice: MistralOptions["toolChoice"],
|
||||
):
|
||||
| "auto"
|
||||
| "none"
|
||||
| "any"
|
||||
| "required"
|
||||
| { type: "function"; function: { name: string } }
|
||||
| undefined {
|
||||
if (!choice) return undefined;
|
||||
if (
|
||||
choice === "auto" ||
|
||||
choice === "none" ||
|
||||
choice === "any" ||
|
||||
choice === "required"
|
||||
) {
|
||||
return choice as any;
|
||||
}
|
||||
return {
|
||||
type: "function",
|
||||
function: { name: choice.function.name },
|
||||
};
|
||||
}
|
||||
|
||||
function mapChatStopReason(reason: string | null): StopReason {
|
||||
if (reason === null) return "stop";
|
||||
switch (reason) {
|
||||
case "stop":
|
||||
return "stop";
|
||||
case "length":
|
||||
case "model_length":
|
||||
return "length";
|
||||
case "tool_calls":
|
||||
return "toolUse";
|
||||
case "error":
|
||||
return "error";
|
||||
default:
|
||||
return "stop";
|
||||
}
|
||||
}
|
||||
1016
packages/ai/src/providers/openai-codex-responses.ts
Normal file
1016
packages/ai/src/providers/openai-codex-responses.ts
Normal file
File diff suppressed because it is too large
Load diff
949
packages/ai/src/providers/openai-completions.ts
Normal file
949
packages/ai/src/providers/openai-completions.ts
Normal file
|
|
@ -0,0 +1,949 @@
|
|||
import OpenAI from "openai";
|
||||
import type {
|
||||
ChatCompletionAssistantMessageParam,
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionContentPart,
|
||||
ChatCompletionContentPartImage,
|
||||
ChatCompletionContentPartText,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionToolMessageParam,
|
||||
} from "openai/resources/chat/completions.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { calculateCost, supportsXhigh } from "../models.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
Context,
|
||||
Message,
|
||||
Model,
|
||||
OpenAICompletionsCompat,
|
||||
SimpleStreamOptions,
|
||||
StopReason,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import {
|
||||
buildCopilotDynamicHeaders,
|
||||
hasCopilotVisionInput,
|
||||
} from "./github-copilot-headers.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
/**
|
||||
* Check if conversation messages contain tool calls or tool results.
|
||||
* This is needed because Anthropic (via proxy) requires the tools param
|
||||
* to be present when messages include tool_calls or tool role messages.
|
||||
*/
|
||||
function hasToolHistory(messages: Message[]): boolean {
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "toolResult") {
|
||||
return true;
|
||||
}
|
||||
if (msg.role === "assistant") {
|
||||
if (msg.content.some((block) => block.type === "toolCall")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export interface OpenAICompletionsOptions extends StreamOptions {
|
||||
toolChoice?:
|
||||
| "auto"
|
||||
| "none"
|
||||
| "required"
|
||||
| { type: "function"; function: { name: string } };
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
}
|
||||
|
||||
export const streamOpenAICompletions: StreamFunction<
|
||||
"openai-completions",
|
||||
OpenAICompletionsOptions
|
||||
> = (
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
options?: OpenAICompletionsOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: model.api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, context, apiKey, options?.headers);
|
||||
const params = buildParams(model, context, options);
|
||||
options?.onPayload?.(params);
|
||||
const openaiStream = await client.chat.completions.create(params, {
|
||||
signal: options?.signal,
|
||||
});
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentBlock:
|
||||
| TextContent
|
||||
| ThinkingContent
|
||||
| (ToolCall & { partialArgs?: string })
|
||||
| null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
const finishCurrentBlock = (block?: typeof currentBlock) => {
|
||||
if (block) {
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
block.arguments = parseStreamingJson(block.partialArgs);
|
||||
delete block.partialArgs;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: blockIndex(),
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for await (const chunk of openaiStream) {
|
||||
if (chunk.usage) {
|
||||
const cachedTokens =
|
||||
chunk.usage.prompt_tokens_details?.cached_tokens || 0;
|
||||
const reasoningTokens =
|
||||
chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
|
||||
const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
|
||||
const outputTokens =
|
||||
(chunk.usage.completion_tokens || 0) + reasoningTokens;
|
||||
output.usage = {
|
||||
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
|
||||
input,
|
||||
output: outputTokens,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
// Compute totalTokens ourselves since we add reasoning_tokens to output
|
||||
// and some providers (e.g., Groq) don't include them in total_tokens
|
||||
totalTokens: input + outputTokens + cachedTokens,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
|
||||
const choice = chunk.choices?.[0];
|
||||
if (!choice) continue;
|
||||
|
||||
if (choice.finish_reason) {
|
||||
output.stopReason = mapStopReason(choice.finish_reason);
|
||||
}
|
||||
|
||||
if (choice.delta) {
|
||||
if (
|
||||
choice.delta.content !== null &&
|
||||
choice.delta.content !== undefined &&
|
||||
choice.delta.content.length > 0
|
||||
) {
|
||||
if (!currentBlock || currentBlock.type !== "text") {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
|
||||
if (currentBlock.type === "text") {
|
||||
currentBlock.text += choice.delta.content;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: choice.delta.content,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
||||
// or reasoning (other openai compatible endpoints)
|
||||
// Use the first non-empty reasoning field to avoid duplication
|
||||
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
||||
const reasoningFields = [
|
||||
"reasoning_content",
|
||||
"reasoning",
|
||||
"reasoning_text",
|
||||
];
|
||||
let foundReasoningField: string | null = null;
|
||||
for (const field of reasoningFields) {
|
||||
if (
|
||||
(choice.delta as any)[field] !== null &&
|
||||
(choice.delta as any)[field] !== undefined &&
|
||||
(choice.delta as any)[field].length > 0
|
||||
) {
|
||||
if (!foundReasoningField) {
|
||||
foundReasoningField = field;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (foundReasoningField) {
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: foundReasoningField,
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
|
||||
if (currentBlock.type === "thinking") {
|
||||
const delta = (choice.delta as any)[foundReasoningField];
|
||||
currentBlock.thinking += delta;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (choice?.delta?.tool_calls) {
|
||||
for (const toolCall of choice.delta.tool_calls) {
|
||||
if (
|
||||
!currentBlock ||
|
||||
currentBlock.type !== "toolCall" ||
|
||||
(toolCall.id && currentBlock.id !== toolCall.id)
|
||||
) {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: toolCall.id || "",
|
||||
name: toolCall.function?.name || "",
|
||||
arguments: {},
|
||||
partialArgs: "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
|
||||
if (currentBlock.type === "toolCall") {
|
||||
if (toolCall.id) currentBlock.id = toolCall.id;
|
||||
if (toolCall.function?.name)
|
||||
currentBlock.name = toolCall.function.name;
|
||||
let delta = "";
|
||||
if (toolCall.function?.arguments) {
|
||||
delta = toolCall.function.arguments;
|
||||
currentBlock.partialArgs += toolCall.function.arguments;
|
||||
currentBlock.arguments = parseStreamingJson(
|
||||
currentBlock.partialArgs,
|
||||
);
|
||||
}
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const reasoningDetails = (choice.delta as any).reasoning_details;
|
||||
if (reasoningDetails && Array.isArray(reasoningDetails)) {
|
||||
for (const detail of reasoningDetails) {
|
||||
if (
|
||||
detail.type === "reasoning.encrypted" &&
|
||||
detail.id &&
|
||||
detail.data
|
||||
) {
|
||||
const matchingToolCall = output.content.find(
|
||||
(b) => b.type === "toolCall" && b.id === detail.id,
|
||||
) as ToolCall | undefined;
|
||||
if (matchingToolCall) {
|
||||
matchingToolCall.thoughtSignature = JSON.stringify(detail);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
finishCurrentBlock(currentBlock);
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content) delete (block as any).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
// Some providers via OpenRouter give additional information in this field.
|
||||
const rawMetadata = (error as any)?.error?.metadata?.raw;
|
||||
if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleOpenAICompletions: StreamFunction<
|
||||
"openai-completions",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model)
|
||||
? options?.reasoning
|
||||
: clampReasoning(options?.reasoning);
|
||||
const toolChoice = (options as OpenAICompletionsOptions | undefined)
|
||||
?.toolChoice;
|
||||
|
||||
return streamOpenAICompletions(model, context, {
|
||||
...base,
|
||||
reasoningEffort,
|
||||
toolChoice,
|
||||
} satisfies OpenAICompletionsOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
apiKey?: string,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.OPENAI_API_KEY;
|
||||
}
|
||||
|
||||
const headers = { ...model.headers };
|
||||
if (model.provider === "github-copilot") {
|
||||
const hasImages = hasCopilotVisionInput(context.messages);
|
||||
const copilotHeaders = buildCopilotDynamicHeaders({
|
||||
messages: context.messages,
|
||||
hasImages,
|
||||
});
|
||||
Object.assign(headers, copilotHeaders);
|
||||
}
|
||||
|
||||
// Merge options headers last so they can override defaults
|
||||
if (optionsHeaders) {
|
||||
Object.assign(headers, optionsHeaders);
|
||||
}
|
||||
|
||||
return new OpenAI({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: headers,
|
||||
});
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
options?: OpenAICompletionsOptions,
|
||||
) {
|
||||
const compat = getCompat(model);
|
||||
const messages = convertMessages(model, context, compat);
|
||||
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
||||
|
||||
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
messages,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
if (compat.supportsUsageInStreaming !== false) {
|
||||
(params as any).stream_options = { include_usage: true };
|
||||
}
|
||||
|
||||
if (compat.supportsStore) {
|
||||
params.store = false;
|
||||
}
|
||||
|
||||
if (options?.maxTokens) {
|
||||
if (compat.maxTokensField === "max_tokens") {
|
||||
(params as any).max_tokens = options.maxTokens;
|
||||
} else {
|
||||
params.max_completion_tokens = options.maxTokens;
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, compat);
|
||||
} else if (hasToolHistory(context.messages)) {
|
||||
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
||||
params.tools = [];
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
|
||||
if (
|
||||
(compat.thinkingFormat === "zai" || compat.thinkingFormat === "qwen") &&
|
||||
model.reasoning
|
||||
) {
|
||||
// Both Z.ai and Qwen use enable_thinking: boolean
|
||||
(params as any).enable_thinking = !!options?.reasoningEffort;
|
||||
} else if (
|
||||
options?.reasoningEffort &&
|
||||
model.reasoning &&
|
||||
compat.supportsReasoningEffort
|
||||
) {
|
||||
// OpenAI-style reasoning_effort
|
||||
(params as any).reasoning_effort = mapReasoningEffort(
|
||||
options.reasoningEffort,
|
||||
compat.reasoningEffortMap,
|
||||
);
|
||||
}
|
||||
|
||||
// OpenRouter provider routing preferences
|
||||
if (
|
||||
model.baseUrl.includes("openrouter.ai") &&
|
||||
model.compat?.openRouterRouting
|
||||
) {
|
||||
(params as any).provider = model.compat.openRouterRouting;
|
||||
}
|
||||
|
||||
// Vercel AI Gateway provider routing preferences
|
||||
if (
|
||||
model.baseUrl.includes("ai-gateway.vercel.sh") &&
|
||||
model.compat?.vercelGatewayRouting
|
||||
) {
|
||||
const routing = model.compat.vercelGatewayRouting;
|
||||
if (routing.only || routing.order) {
|
||||
const gatewayOptions: Record<string, string[]> = {};
|
||||
if (routing.only) gatewayOptions.only = routing.only;
|
||||
if (routing.order) gatewayOptions.order = routing.order;
|
||||
(params as any).providerOptions = { gateway: gatewayOptions };
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
function mapReasoningEffort(
|
||||
effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
|
||||
reasoningEffortMap: Partial<
|
||||
Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>
|
||||
>,
|
||||
): string {
|
||||
return reasoningEffortMap[effort] ?? effort;
|
||||
}
|
||||
|
||||
function maybeAddOpenRouterAnthropicCacheControl(
|
||||
model: Model<"openai-completions">,
|
||||
messages: ChatCompletionMessageParam[],
|
||||
): void {
|
||||
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
|
||||
return;
|
||||
|
||||
// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
|
||||
// on the last user/assistant message (walking backwards until we find text content).
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (msg.role !== "user" && msg.role !== "assistant") continue;
|
||||
|
||||
const content = msg.content;
|
||||
if (typeof content === "string") {
|
||||
msg.content = [
|
||||
Object.assign(
|
||||
{ type: "text" as const, text: content },
|
||||
{ cache_control: { type: "ephemeral" } },
|
||||
),
|
||||
];
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) continue;
|
||||
|
||||
// Find last text part and add cache_control
|
||||
for (let j = content.length - 1; j >= 0; j--) {
|
||||
const part = content[j];
|
||||
if (part?.type === "text") {
|
||||
Object.assign(part, { cache_control: { type: "ephemeral" } });
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function convertMessages(
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
compat: Required<OpenAICompletionsCompat>,
|
||||
): ChatCompletionMessageParam[] {
|
||||
const params: ChatCompletionMessageParam[] = [];
|
||||
|
||||
const normalizeToolCallId = (id: string): string => {
|
||||
// Handle pipe-separated IDs from OpenAI Responses API
|
||||
// Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
|
||||
// These come from providers like github-copilot, openai-codex, opencode
|
||||
// Extract just the call_id part and normalize it
|
||||
if (id.includes("|")) {
|
||||
const [callId] = id.split("|");
|
||||
// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
|
||||
return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
|
||||
}
|
||||
|
||||
if (model.provider === "openai")
|
||||
return id.length > 40 ? id.slice(0, 40) : id;
|
||||
return id;
|
||||
};
|
||||
|
||||
const transformedMessages = transformMessages(context.messages, model, (id) =>
|
||||
normalizeToolCallId(id),
|
||||
);
|
||||
|
||||
if (context.systemPrompt) {
|
||||
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
||||
const role = useDeveloperRole ? "developer" : "system";
|
||||
params.push({
|
||||
role: role,
|
||||
content: sanitizeSurrogates(context.systemPrompt),
|
||||
});
|
||||
}
|
||||
|
||||
let lastRole: string | null = null;
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
// Some providers don't allow user messages directly after tool results
|
||||
// Insert a synthetic assistant message to bridge the gap
|
||||
if (
|
||||
compat.requiresAssistantAfterToolResult &&
|
||||
lastRole === "toolResult" &&
|
||||
msg.role === "user"
|
||||
) {
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: "I have processed the tool results.",
|
||||
});
|
||||
}
|
||||
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: sanitizeSurrogates(msg.content),
|
||||
});
|
||||
} else {
|
||||
const content: ChatCompletionContentPart[] = msg.content.map(
|
||||
(item): ChatCompletionContentPart => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(item.text),
|
||||
} satisfies ChatCompletionContentPartText;
|
||||
} else {
|
||||
return {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${item.mimeType};base64,${item.data}`,
|
||||
},
|
||||
} satisfies ChatCompletionContentPartImage;
|
||||
}
|
||||
},
|
||||
);
|
||||
const filteredContent = !model.input.includes("image")
|
||||
? content.filter((c) => c.type !== "image_url")
|
||||
: content;
|
||||
if (filteredContent.length === 0) continue;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredContent,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
// Some providers don't accept null content, use empty string instead
|
||||
const assistantMsg: ChatCompletionAssistantMessageParam = {
|
||||
role: "assistant",
|
||||
content: compat.requiresAssistantAfterToolResult ? "" : null,
|
||||
};
|
||||
|
||||
const textBlocks = msg.content.filter(
|
||||
(b) => b.type === "text",
|
||||
) as TextContent[];
|
||||
// Filter out empty text blocks to avoid API validation errors
|
||||
const nonEmptyTextBlocks = textBlocks.filter(
|
||||
(b) => b.text && b.text.trim().length > 0,
|
||||
);
|
||||
if (nonEmptyTextBlocks.length > 0) {
|
||||
// GitHub Copilot requires assistant content as a string, not an array.
|
||||
// Sending as array causes Claude models to re-answer all previous prompts.
|
||||
if (model.provider === "github-copilot") {
|
||||
assistantMsg.content = nonEmptyTextBlocks
|
||||
.map((b) => sanitizeSurrogates(b.text))
|
||||
.join("");
|
||||
} else {
|
||||
assistantMsg.content = nonEmptyTextBlocks.map((b) => {
|
||||
return { type: "text", text: sanitizeSurrogates(b.text) };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Handle thinking blocks
|
||||
const thinkingBlocks = msg.content.filter(
|
||||
(b) => b.type === "thinking",
|
||||
) as ThinkingContent[];
|
||||
// Filter out empty thinking blocks to avoid API validation errors
|
||||
const nonEmptyThinkingBlocks = thinkingBlocks.filter(
|
||||
(b) => b.thinking && b.thinking.trim().length > 0,
|
||||
);
|
||||
if (nonEmptyThinkingBlocks.length > 0) {
|
||||
if (compat.requiresThinkingAsText) {
|
||||
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
||||
const thinkingText = nonEmptyThinkingBlocks
|
||||
.map((b) => b.thinking)
|
||||
.join("\n\n");
|
||||
const textContent = assistantMsg.content as Array<{
|
||||
type: "text";
|
||||
text: string;
|
||||
}> | null;
|
||||
if (textContent) {
|
||||
textContent.unshift({ type: "text", text: thinkingText });
|
||||
} else {
|
||||
assistantMsg.content = [{ type: "text", text: thinkingText }];
|
||||
}
|
||||
} else {
|
||||
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
|
||||
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
||||
if (signature && signature.length > 0) {
|
||||
(assistantMsg as any)[signature] = nonEmptyThinkingBlocks
|
||||
.map((b) => b.thinking)
|
||||
.join("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const toolCalls = msg.content.filter(
|
||||
(b) => b.type === "toolCall",
|
||||
) as ToolCall[];
|
||||
if (toolCalls.length > 0) {
|
||||
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
type: "function" as const,
|
||||
function: {
|
||||
name: tc.name,
|
||||
arguments: JSON.stringify(tc.arguments),
|
||||
},
|
||||
}));
|
||||
const reasoningDetails = toolCalls
|
||||
.filter((tc) => tc.thoughtSignature)
|
||||
.map((tc) => {
|
||||
try {
|
||||
return JSON.parse(tc.thoughtSignature!);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(Boolean);
|
||||
if (reasoningDetails.length > 0) {
|
||||
(assistantMsg as any).reasoning_details = reasoningDetails;
|
||||
}
|
||||
}
|
||||
// Skip assistant messages that have no content and no tool calls.
|
||||
// Some providers require "either content or tool_calls, but not none".
|
||||
// Other providers also don't accept empty assistant messages.
|
||||
// This handles aborted assistant responses that got no content.
|
||||
const content = assistantMsg.content;
|
||||
const hasContent =
|
||||
content !== null &&
|
||||
content !== undefined &&
|
||||
(typeof content === "string" ? content.length > 0 : content.length > 0);
|
||||
if (!hasContent && !assistantMsg.tool_calls) {
|
||||
continue;
|
||||
}
|
||||
params.push(assistantMsg);
|
||||
} else if (msg.role === "toolResult") {
|
||||
const imageBlocks: Array<{
|
||||
type: "image_url";
|
||||
image_url: { url: string };
|
||||
}> = [];
|
||||
let j = i;
|
||||
|
||||
for (
|
||||
;
|
||||
j < transformedMessages.length &&
|
||||
transformedMessages[j].role === "toolResult";
|
||||
j++
|
||||
) {
|
||||
const toolMsg = transformedMessages[j] as ToolResultMessage;
|
||||
|
||||
// Extract text and image content
|
||||
const textResult = toolMsg.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => (c as any).text)
|
||||
.join("\n");
|
||||
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
||||
|
||||
// Always send tool result with text (or placeholder if only images)
|
||||
const hasText = textResult.length > 0;
|
||||
// Some providers require the 'name' field in tool results
|
||||
const toolResultMsg: ChatCompletionToolMessageParam = {
|
||||
role: "tool",
|
||||
content: sanitizeSurrogates(
|
||||
hasText ? textResult : "(see attached image)",
|
||||
),
|
||||
tool_call_id: toolMsg.toolCallId,
|
||||
};
|
||||
if (compat.requiresToolResultName && toolMsg.toolName) {
|
||||
(toolResultMsg as any).name = toolMsg.toolName;
|
||||
}
|
||||
params.push(toolResultMsg);
|
||||
|
||||
if (hasImages && model.input.includes("image")) {
|
||||
for (const block of toolMsg.content) {
|
||||
if (block.type === "image") {
|
||||
imageBlocks.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i = j - 1;
|
||||
|
||||
if (imageBlocks.length > 0) {
|
||||
if (compat.requiresAssistantAfterToolResult) {
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: "I have processed the tool results.",
|
||||
});
|
||||
}
|
||||
|
||||
params.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Attached image(s) from tool result:",
|
||||
},
|
||||
...imageBlocks,
|
||||
],
|
||||
});
|
||||
lastRole = "user";
|
||||
} else {
|
||||
lastRole = "toolResult";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
lastRole = msg.role;
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
function convertTools(
|
||||
tools: Tool[],
|
||||
compat: Required<OpenAICompletionsCompat>,
|
||||
): OpenAI.Chat.Completions.ChatCompletionTool[] {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters as any, // TypeBox already generates JSON Schema
|
||||
// Only include strict if provider supports it. Some reject unknown fields.
|
||||
...(compat.supportsStrictMode !== false && { strict: false }),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function mapStopReason(
|
||||
reason: ChatCompletionChunk.Choice["finish_reason"],
|
||||
): StopReason {
|
||||
if (reason === null) return "stop";
|
||||
switch (reason) {
|
||||
case "stop":
|
||||
return "stop";
|
||||
case "length":
|
||||
return "length";
|
||||
case "function_call":
|
||||
case "tool_calls":
|
||||
return "toolUse";
|
||||
case "content_filter":
|
||||
return "error";
|
||||
default: {
|
||||
const _exhaustive: never = reason;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect compatibility settings from provider and baseUrl for known providers.
|
||||
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
||||
* Returns a fully resolved OpenAICompletionsCompat object with all fields set.
|
||||
*/
|
||||
function detectCompat(
|
||||
model: Model<"openai-completions">,
|
||||
): Required<OpenAICompletionsCompat> {
|
||||
const provider = model.provider;
|
||||
const baseUrl = model.baseUrl;
|
||||
|
||||
const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
|
||||
|
||||
const isNonStandard =
|
||||
provider === "cerebras" ||
|
||||
baseUrl.includes("cerebras.ai") ||
|
||||
provider === "xai" ||
|
||||
baseUrl.includes("api.x.ai") ||
|
||||
baseUrl.includes("chutes.ai") ||
|
||||
baseUrl.includes("deepseek.com") ||
|
||||
isZai ||
|
||||
provider === "opencode" ||
|
||||
baseUrl.includes("opencode.ai");
|
||||
|
||||
const useMaxTokens = baseUrl.includes("chutes.ai");
|
||||
|
||||
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
||||
const isGroq = provider === "groq" || baseUrl.includes("groq.com");
|
||||
|
||||
const reasoningEffortMap =
|
||||
isGroq && model.id === "qwen/qwen3-32b"
|
||||
? {
|
||||
minimal: "default",
|
||||
low: "default",
|
||||
medium: "default",
|
||||
high: "default",
|
||||
xhigh: "default",
|
||||
}
|
||||
: {};
|
||||
return {
|
||||
supportsStore: !isNonStandard,
|
||||
supportsDeveloperRole: !isNonStandard,
|
||||
supportsReasoningEffort: !isGrok && !isZai,
|
||||
reasoningEffortMap,
|
||||
supportsUsageInStreaming: true,
|
||||
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
||||
requiresToolResultName: false,
|
||||
requiresAssistantAfterToolResult: false,
|
||||
requiresThinkingAsText: false,
|
||||
thinkingFormat: isZai ? "zai" : "openai",
|
||||
openRouterRouting: {},
|
||||
vercelGatewayRouting: {},
|
||||
supportsStrictMode: true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get resolved compatibility settings for a model.
|
||||
* Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
|
||||
*/
|
||||
function getCompat(
|
||||
model: Model<"openai-completions">,
|
||||
): Required<OpenAICompletionsCompat> {
|
||||
const detected = detectCompat(model);
|
||||
if (!model.compat) return detected;
|
||||
|
||||
return {
|
||||
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
|
||||
supportsDeveloperRole:
|
||||
model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
|
||||
supportsReasoningEffort:
|
||||
model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
||||
reasoningEffortMap:
|
||||
model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
|
||||
supportsUsageInStreaming:
|
||||
model.compat.supportsUsageInStreaming ??
|
||||
detected.supportsUsageInStreaming,
|
||||
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
|
||||
requiresToolResultName:
|
||||
model.compat.requiresToolResultName ?? detected.requiresToolResultName,
|
||||
requiresAssistantAfterToolResult:
|
||||
model.compat.requiresAssistantAfterToolResult ??
|
||||
detected.requiresAssistantAfterToolResult,
|
||||
requiresThinkingAsText:
|
||||
model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
|
||||
thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
|
||||
openRouterRouting: model.compat.openRouterRouting ?? {},
|
||||
vercelGatewayRouting:
|
||||
model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
||||
supportsStrictMode:
|
||||
model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
||||
};
|
||||
}
|
||||
583
packages/ai/src/providers/openai-responses-shared.ts
Normal file
583
packages/ai/src/providers/openai-responses-shared.ts
Normal file
|
|
@ -0,0 +1,583 @@
|
|||
import type OpenAI from "openai";
|
||||
import type {
|
||||
Tool as OpenAITool,
|
||||
ResponseCreateParamsStreaming,
|
||||
ResponseFunctionToolCall,
|
||||
ResponseInput,
|
||||
ResponseInputContent,
|
||||
ResponseInputImage,
|
||||
ResponseInputText,
|
||||
ResponseOutputMessage,
|
||||
ResponseReasoningItem,
|
||||
ResponseStreamEvent,
|
||||
} from "openai/resources/responses/responses.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
ImageContent,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
TextSignatureV1,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
Usage,
|
||||
} from "../types.js";
|
||||
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { shortHash } from "../utils/hash.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
// =============================================================================
|
||||
// Utilities
|
||||
// =============================================================================
|
||||
|
||||
function encodeTextSignatureV1(
|
||||
id: string,
|
||||
phase?: TextSignatureV1["phase"],
|
||||
): string {
|
||||
const payload: TextSignatureV1 = { v: 1, id };
|
||||
if (phase) payload.phase = phase;
|
||||
return JSON.stringify(payload);
|
||||
}
|
||||
|
||||
function parseTextSignature(
|
||||
signature: string | undefined,
|
||||
): { id: string; phase?: TextSignatureV1["phase"] } | undefined {
|
||||
if (!signature) return undefined;
|
||||
if (signature.startsWith("{")) {
|
||||
try {
|
||||
const parsed = JSON.parse(signature) as Partial<TextSignatureV1>;
|
||||
if (parsed.v === 1 && typeof parsed.id === "string") {
|
||||
if (parsed.phase === "commentary" || parsed.phase === "final_answer") {
|
||||
return { id: parsed.id, phase: parsed.phase };
|
||||
}
|
||||
return { id: parsed.id };
|
||||
}
|
||||
} catch {
|
||||
// Fall through to legacy plain-string handling.
|
||||
}
|
||||
}
|
||||
return { id: signature };
|
||||
}
|
||||
|
||||
export interface OpenAIResponsesStreamOptions {
|
||||
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
|
||||
applyServiceTierPricing?: (
|
||||
usage: Usage,
|
||||
serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
|
||||
) => void;
|
||||
}
|
||||
|
||||
export interface ConvertResponsesMessagesOptions {
|
||||
includeSystemPrompt?: boolean;
|
||||
}
|
||||
|
||||
export interface ConvertResponsesToolsOptions {
|
||||
strict?: boolean | null;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Message conversion
|
||||
// =============================================================================
|
||||
|
||||
export function convertResponsesMessages<TApi extends Api>(
|
||||
model: Model<TApi>,
|
||||
context: Context,
|
||||
allowedToolCallProviders: ReadonlySet<string>,
|
||||
options?: ConvertResponsesMessagesOptions,
|
||||
): ResponseInput {
|
||||
const messages: ResponseInput = [];
|
||||
|
||||
const normalizeToolCallId = (id: string): string => {
|
||||
if (!allowedToolCallProviders.has(model.provider)) return id;
|
||||
if (!id.includes("|")) return id;
|
||||
const [callId, itemId] = id.split("|");
|
||||
const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
// OpenAI Responses API requires item id to start with "fc"
|
||||
if (!sanitizedItemId.startsWith("fc")) {
|
||||
sanitizedItemId = `fc_${sanitizedItemId}`;
|
||||
}
|
||||
// Truncate to 64 chars and strip trailing underscores (OpenAI Codex rejects them)
|
||||
let normalizedCallId =
|
||||
sanitizedCallId.length > 64
|
||||
? sanitizedCallId.slice(0, 64)
|
||||
: sanitizedCallId;
|
||||
let normalizedItemId =
|
||||
sanitizedItemId.length > 64
|
||||
? sanitizedItemId.slice(0, 64)
|
||||
: sanitizedItemId;
|
||||
normalizedCallId = normalizedCallId.replace(/_+$/, "");
|
||||
normalizedItemId = normalizedItemId.replace(/_+$/, "");
|
||||
return `${normalizedCallId}|${normalizedItemId}`;
|
||||
};
|
||||
|
||||
const transformedMessages = transformMessages(
|
||||
context.messages,
|
||||
model,
|
||||
normalizeToolCallId,
|
||||
);
|
||||
|
||||
const includeSystemPrompt = options?.includeSystemPrompt ?? true;
|
||||
if (includeSystemPrompt && context.systemPrompt) {
|
||||
const role = model.reasoning ? "developer" : "system";
|
||||
messages.push({
|
||||
role,
|
||||
content: sanitizeSurrogates(context.systemPrompt),
|
||||
});
|
||||
}
|
||||
|
||||
let msgIndex = 0;
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "input_text", text: sanitizeSurrogates(msg.content) },
|
||||
],
|
||||
});
|
||||
} else {
|
||||
const content: ResponseInputContent[] = msg.content.map(
|
||||
(item): ResponseInputContent => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "input_text",
|
||||
text: sanitizeSurrogates(item.text),
|
||||
} satisfies ResponseInputText;
|
||||
}
|
||||
return {
|
||||
type: "input_image",
|
||||
detail: "auto",
|
||||
image_url: `data:${item.mimeType};base64,${item.data}`,
|
||||
} satisfies ResponseInputImage;
|
||||
},
|
||||
);
|
||||
const filteredContent = !model.input.includes("image")
|
||||
? content.filter((c) => c.type !== "input_image")
|
||||
: content;
|
||||
if (filteredContent.length === 0) continue;
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: filteredContent,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const output: ResponseInput = [];
|
||||
const assistantMsg = msg as AssistantMessage;
|
||||
const isDifferentModel =
|
||||
assistantMsg.model !== model.id &&
|
||||
assistantMsg.provider === model.provider &&
|
||||
assistantMsg.api === model.api;
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "thinking") {
|
||||
if (block.thinking.trim().length === 0) continue;
|
||||
if (block.thinkingSignature) {
|
||||
const reasoningItem = JSON.parse(
|
||||
block.thinkingSignature,
|
||||
) as ResponseReasoningItem;
|
||||
output.push(reasoningItem);
|
||||
}
|
||||
} else if (block.type === "text") {
|
||||
const textBlock = block as TextContent;
|
||||
const parsedSignature = parseTextSignature(textBlock.textSignature);
|
||||
// OpenAI requires id to be max 64 characters
|
||||
let msgId = parsedSignature?.id;
|
||||
if (!msgId) {
|
||||
msgId = `msg_${msgIndex}`;
|
||||
} else if (msgId.length > 64) {
|
||||
msgId = `msg_${shortHash(msgId)}`;
|
||||
}
|
||||
output.push({
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "output_text",
|
||||
text: sanitizeSurrogates(textBlock.text),
|
||||
annotations: [],
|
||||
},
|
||||
],
|
||||
status: "completed",
|
||||
id: msgId,
|
||||
phase: parsedSignature?.phase,
|
||||
} satisfies ResponseOutputMessage);
|
||||
} else if (block.type === "toolCall") {
|
||||
const toolCall = block as ToolCall;
|
||||
const [callId, itemIdRaw] = toolCall.id.split("|");
|
||||
let itemId: string | undefined = itemIdRaw;
|
||||
|
||||
// For different-model messages, set id to undefined to avoid pairing validation.
|
||||
// OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.
|
||||
// By omitting the id, we avoid triggering that validation (like cross-provider does).
|
||||
if (isDifferentModel && itemId?.startsWith("fc_")) {
|
||||
itemId = undefined;
|
||||
}
|
||||
|
||||
output.push({
|
||||
type: "function_call",
|
||||
id: itemId,
|
||||
call_id: callId,
|
||||
name: toolCall.name,
|
||||
arguments: JSON.stringify(toolCall.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
if (output.length === 0) continue;
|
||||
messages.push(...output);
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Extract text and image content
|
||||
const textResult = msg.content
|
||||
.filter((c): c is TextContent => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n");
|
||||
const hasImages = msg.content.some(
|
||||
(c): c is ImageContent => c.type === "image",
|
||||
);
|
||||
|
||||
// Always send function_call_output with text (or placeholder if only images)
|
||||
const hasText = textResult.length > 0;
|
||||
const [callId] = msg.toolCallId.split("|");
|
||||
messages.push({
|
||||
type: "function_call_output",
|
||||
call_id: callId,
|
||||
output: sanitizeSurrogates(
|
||||
hasText ? textResult : "(see attached image)",
|
||||
),
|
||||
});
|
||||
|
||||
// If there are images and model supports them, send a follow-up user message with images
|
||||
if (hasImages && model.input.includes("image")) {
|
||||
const contentParts: ResponseInputContent[] = [];
|
||||
|
||||
// Add text prefix
|
||||
contentParts.push({
|
||||
type: "input_text",
|
||||
text: "Attached image(s) from tool result:",
|
||||
} satisfies ResponseInputText);
|
||||
|
||||
// Add images
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "image") {
|
||||
contentParts.push({
|
||||
type: "input_image",
|
||||
detail: "auto",
|
||||
image_url: `data:${block.mimeType};base64,${block.data}`,
|
||||
} satisfies ResponseInputImage);
|
||||
}
|
||||
}
|
||||
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: contentParts,
|
||||
});
|
||||
}
|
||||
}
|
||||
msgIndex++;
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Tool conversion
|
||||
// =============================================================================
|
||||
|
||||
export function convertResponsesTools(
|
||||
tools: Tool[],
|
||||
options?: ConvertResponsesToolsOptions,
|
||||
): OpenAITool[] {
|
||||
const strict = options?.strict === undefined ? false : options.strict;
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters as any, // TypeBox already generates JSON Schema
|
||||
strict,
|
||||
}));
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Stream processing
|
||||
// =============================================================================
|
||||
|
||||
export async function processResponsesStream<TApi extends Api>(
|
||||
openaiStream: AsyncIterable<ResponseStreamEvent>,
|
||||
output: AssistantMessage,
|
||||
stream: AssistantMessageEventStream,
|
||||
model: Model<TApi>,
|
||||
options?: OpenAIResponsesStreamOptions,
|
||||
): Promise<void> {
|
||||
let currentItem:
|
||||
| ResponseReasoningItem
|
||||
| ResponseOutputMessage
|
||||
| ResponseFunctionToolCall
|
||||
| null = null;
|
||||
let currentBlock:
|
||||
| ThinkingContent
|
||||
| TextContent
|
||||
| (ToolCall & { partialJson: string })
|
||||
| null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
|
||||
for await (const event of openaiStream) {
|
||||
if (event.type === "response.output_item.added") {
|
||||
const item = event.item;
|
||||
if (item.type === "reasoning") {
|
||||
currentItem = item;
|
||||
currentBlock = { type: "thinking", thinking: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
} else if (item.type === "message") {
|
||||
currentItem = item;
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "text_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
} else if (item.type === "function_call") {
|
||||
currentItem = item;
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: `${item.call_id}|${item.id}`,
|
||||
name: item.name,
|
||||
arguments: {},
|
||||
partialJson: item.arguments || "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({
|
||||
type: "toolcall_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.type === "response.reasoning_summary_part.added") {
|
||||
if (currentItem && currentItem.type === "reasoning") {
|
||||
currentItem.summary = currentItem.summary || [];
|
||||
currentItem.summary.push(event.part);
|
||||
}
|
||||
} else if (event.type === "response.reasoning_summary_text.delta") {
|
||||
if (
|
||||
currentItem?.type === "reasoning" &&
|
||||
currentBlock?.type === "thinking"
|
||||
) {
|
||||
currentItem.summary = currentItem.summary || [];
|
||||
const lastPart = currentItem.summary[currentItem.summary.length - 1];
|
||||
if (lastPart) {
|
||||
currentBlock.thinking += event.delta;
|
||||
lastPart.text += event.delta;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "response.reasoning_summary_part.done") {
|
||||
if (
|
||||
currentItem?.type === "reasoning" &&
|
||||
currentBlock?.type === "thinking"
|
||||
) {
|
||||
currentItem.summary = currentItem.summary || [];
|
||||
const lastPart = currentItem.summary[currentItem.summary.length - 1];
|
||||
if (lastPart) {
|
||||
currentBlock.thinking += "\n\n";
|
||||
lastPart.text += "\n\n";
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: "\n\n",
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "response.content_part.added") {
|
||||
if (currentItem?.type === "message") {
|
||||
currentItem.content = currentItem.content || [];
|
||||
// Filter out ReasoningText, only accept output_text and refusal
|
||||
if (
|
||||
event.part.type === "output_text" ||
|
||||
event.part.type === "refusal"
|
||||
) {
|
||||
currentItem.content.push(event.part);
|
||||
}
|
||||
}
|
||||
} else if (event.type === "response.output_text.delta") {
|
||||
if (currentItem?.type === "message" && currentBlock?.type === "text") {
|
||||
if (!currentItem.content || currentItem.content.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const lastPart = currentItem.content[currentItem.content.length - 1];
|
||||
if (lastPart?.type === "output_text") {
|
||||
currentBlock.text += event.delta;
|
||||
lastPart.text += event.delta;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "response.refusal.delta") {
|
||||
if (currentItem?.type === "message" && currentBlock?.type === "text") {
|
||||
if (!currentItem.content || currentItem.content.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const lastPart = currentItem.content[currentItem.content.length - 1];
|
||||
if (lastPart?.type === "refusal") {
|
||||
currentBlock.text += event.delta;
|
||||
lastPart.refusal += event.delta;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "response.function_call_arguments.delta") {
|
||||
if (
|
||||
currentItem?.type === "function_call" &&
|
||||
currentBlock?.type === "toolCall"
|
||||
) {
|
||||
currentBlock.partialJson += event.delta;
|
||||
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.type === "response.function_call_arguments.done") {
|
||||
if (
|
||||
currentItem?.type === "function_call" &&
|
||||
currentBlock?.type === "toolCall"
|
||||
) {
|
||||
currentBlock.partialJson = event.arguments;
|
||||
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
|
||||
}
|
||||
} else if (event.type === "response.output_item.done") {
|
||||
const item = event.item;
|
||||
|
||||
if (item.type === "reasoning" && currentBlock?.type === "thinking") {
|
||||
currentBlock.thinking =
|
||||
item.summary?.map((s) => s.text).join("\n\n") || "";
|
||||
currentBlock.thinkingSignature = JSON.stringify(item);
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
currentBlock = null;
|
||||
} else if (item.type === "message" && currentBlock?.type === "text") {
|
||||
currentBlock.text = item.content
|
||||
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
|
||||
.join("");
|
||||
currentBlock.textSignature = encodeTextSignatureV1(
|
||||
item.id,
|
||||
item.phase ?? undefined,
|
||||
);
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
currentBlock = null;
|
||||
} else if (item.type === "function_call") {
|
||||
const args =
|
||||
currentBlock?.type === "toolCall" && currentBlock.partialJson
|
||||
? parseStreamingJson(currentBlock.partialJson)
|
||||
: parseStreamingJson(item.arguments || "{}");
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: `${item.call_id}|${item.id}`,
|
||||
name: item.name,
|
||||
arguments: args,
|
||||
};
|
||||
|
||||
currentBlock = null;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: blockIndex(),
|
||||
toolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.type === "response.completed") {
|
||||
const response = event.response;
|
||||
if (response?.usage) {
|
||||
const cachedTokens =
|
||||
response.usage.input_tokens_details?.cached_tokens || 0;
|
||||
output.usage = {
|
||||
// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
|
||||
input: (response.usage.input_tokens || 0) - cachedTokens,
|
||||
output: response.usage.output_tokens || 0,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
totalTokens: response.usage.total_tokens || 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
}
|
||||
calculateCost(model, output.usage);
|
||||
if (options?.applyServiceTierPricing) {
|
||||
const serviceTier = response?.service_tier ?? options.serviceTier;
|
||||
options.applyServiceTierPricing(output.usage, serviceTier);
|
||||
}
|
||||
// Map status to stop reason
|
||||
output.stopReason = mapStopReason(response?.status);
|
||||
if (
|
||||
output.content.some((b) => b.type === "toolCall") &&
|
||||
output.stopReason === "stop"
|
||||
) {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
} else if (event.type === "error") {
|
||||
throw new Error(
|
||||
`Error Code ${event.code}: ${event.message}` || "Unknown error",
|
||||
);
|
||||
} else if (event.type === "response.failed") {
|
||||
throw new Error("Unknown error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function mapStopReason(
|
||||
status: OpenAI.Responses.ResponseStatus | undefined,
|
||||
): StopReason {
|
||||
if (!status) return "stop";
|
||||
switch (status) {
|
||||
case "completed":
|
||||
return "stop";
|
||||
case "incomplete":
|
||||
return "length";
|
||||
case "failed":
|
||||
case "cancelled":
|
||||
return "error";
|
||||
// These two are wonky ...
|
||||
case "in_progress":
|
||||
case "queued":
|
||||
return "stop";
|
||||
default: {
|
||||
const _exhaustive: never = status;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
309
packages/ai/src/providers/openai-responses.ts
Normal file
309
packages/ai/src/providers/openai-responses.ts
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
import OpenAI from "openai";
|
||||
import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { supportsXhigh } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
CacheRetention,
|
||||
Context,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
Usage,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import {
|
||||
buildCopilotDynamicHeaders,
|
||||
hasCopilotVisionInput,
|
||||
} from "./github-copilot-headers.js";
|
||||
import {
|
||||
convertResponsesMessages,
|
||||
convertResponsesTools,
|
||||
processResponsesStream,
|
||||
} from "./openai-responses-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
const OPENAI_TOOL_CALL_PROVIDERS = new Set([
|
||||
"openai",
|
||||
"openai-codex",
|
||||
"opencode",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Resolve cache retention preference.
|
||||
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
|
||||
*/
|
||||
function resolveCacheRetention(
|
||||
cacheRetention?: CacheRetention,
|
||||
): CacheRetention {
|
||||
if (cacheRetention) {
|
||||
return cacheRetention;
|
||||
}
|
||||
if (
|
||||
typeof process !== "undefined" &&
|
||||
process.env.PI_CACHE_RETENTION === "long"
|
||||
) {
|
||||
return "long";
|
||||
}
|
||||
return "short";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get prompt cache retention based on cacheRetention and base URL.
|
||||
* Only applies to direct OpenAI API calls (api.openai.com).
|
||||
*/
|
||||
function getPromptCacheRetention(
|
||||
baseUrl: string,
|
||||
cacheRetention: CacheRetention,
|
||||
): "24h" | undefined {
|
||||
if (cacheRetention !== "long") {
|
||||
return undefined;
|
||||
}
|
||||
if (baseUrl.includes("api.openai.com")) {
|
||||
return "24h";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// OpenAI Responses-specific options
|
||||
export interface OpenAIResponsesOptions extends StreamOptions {
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
||||
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate function for OpenAI Responses API
|
||||
*/
|
||||
export const streamOpenAIResponses: StreamFunction<
|
||||
"openai-responses",
|
||||
OpenAIResponsesOptions
|
||||
> = (
|
||||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
options?: OpenAIResponsesOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
// Start async processing
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: model.api as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
// Create OpenAI client
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, context, apiKey, options?.headers);
|
||||
const params = buildParams(model, context, options);
|
||||
options?.onPayload?.(params);
|
||||
const openaiStream = await client.responses.create(
|
||||
params,
|
||||
options?.signal ? { signal: options.signal } : undefined,
|
||||
);
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
await processResponsesStream(openaiStream, output, stream, model, {
|
||||
serviceTier: options?.serviceTier,
|
||||
applyServiceTierPricing,
|
||||
});
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content)
|
||||
delete (block as { index?: number }).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage =
|
||||
error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleOpenAIResponses: StreamFunction<
|
||||
"openai-responses",
|
||||
SimpleStreamOptions
|
||||
> = (
|
||||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model)
|
||||
? options?.reasoning
|
||||
: clampReasoning(options?.reasoning);
|
||||
|
||||
return streamOpenAIResponses(model, context, {
|
||||
...base,
|
||||
reasoningEffort,
|
||||
} satisfies OpenAIResponsesOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
apiKey?: string,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.OPENAI_API_KEY;
|
||||
}
|
||||
|
||||
const headers = { ...model.headers };
|
||||
if (model.provider === "github-copilot") {
|
||||
const hasImages = hasCopilotVisionInput(context.messages);
|
||||
const copilotHeaders = buildCopilotDynamicHeaders({
|
||||
messages: context.messages,
|
||||
hasImages,
|
||||
});
|
||||
Object.assign(headers, copilotHeaders);
|
||||
}
|
||||
|
||||
// Merge options headers last so they can override defaults
|
||||
if (optionsHeaders) {
|
||||
Object.assign(headers, optionsHeaders);
|
||||
}
|
||||
|
||||
return new OpenAI({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: headers,
|
||||
});
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
options?: OpenAIResponsesOptions,
|
||||
) {
|
||||
const messages = convertResponsesMessages(
|
||||
model,
|
||||
context,
|
||||
OPENAI_TOOL_CALL_PROVIDERS,
|
||||
);
|
||||
|
||||
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
||||
const params: ResponseCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
input: messages,
|
||||
stream: true,
|
||||
prompt_cache_key:
|
||||
cacheRetention === "none" ? undefined : options?.sessionId,
|
||||
prompt_cache_retention: getPromptCacheRetention(
|
||||
model.baseUrl,
|
||||
cacheRetention,
|
||||
),
|
||||
store: false,
|
||||
};
|
||||
|
||||
if (options?.maxTokens) {
|
||||
params.max_output_tokens = options?.maxTokens;
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
if (options?.serviceTier !== undefined) {
|
||||
params.service_tier = options.serviceTier;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertResponsesTools(context.tools);
|
||||
}
|
||||
|
||||
if (model.reasoning) {
|
||||
if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||
params.reasoning = {
|
||||
effort: options?.reasoningEffort || "medium",
|
||||
summary: options?.reasoningSummary || "auto",
|
||||
};
|
||||
params.include = ["reasoning.encrypted_content"];
|
||||
} else {
|
||||
if (model.name.startsWith("gpt-5")) {
|
||||
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
||||
messages.push({
|
||||
role: "developer",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "# Juice: 0 !important",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
function getServiceTierCostMultiplier(
|
||||
serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
|
||||
): number {
|
||||
switch (serviceTier) {
|
||||
case "flex":
|
||||
return 0.5;
|
||||
case "priority":
|
||||
return 2;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
function applyServiceTierPricing(
|
||||
usage: Usage,
|
||||
serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
|
||||
) {
|
||||
const multiplier = getServiceTierCostMultiplier(serviceTier);
|
||||
if (multiplier === 1) return;
|
||||
|
||||
usage.cost.input *= multiplier;
|
||||
usage.cost.output *= multiplier;
|
||||
usage.cost.cacheRead *= multiplier;
|
||||
usage.cost.cacheWrite *= multiplier;
|
||||
usage.cost.total =
|
||||
usage.cost.input +
|
||||
usage.cost.output +
|
||||
usage.cost.cacheRead +
|
||||
usage.cost.cacheWrite;
|
||||
}
|
||||
216
packages/ai/src/providers/register-builtins.ts
Normal file
216
packages/ai/src/providers/register-builtins.ts
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
import { clearApiProviders, registerApiProvider } from "../api-registry.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
AssistantMessageEvent,
|
||||
Context,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StreamOptions,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
|
||||
import {
|
||||
streamAzureOpenAIResponses,
|
||||
streamSimpleAzureOpenAIResponses,
|
||||
} from "./azure-openai-responses.js";
|
||||
import { streamGoogle, streamSimpleGoogle } from "./google.js";
|
||||
import {
|
||||
streamGoogleGeminiCli,
|
||||
streamSimpleGoogleGeminiCli,
|
||||
} from "./google-gemini-cli.js";
|
||||
import {
|
||||
streamGoogleVertex,
|
||||
streamSimpleGoogleVertex,
|
||||
} from "./google-vertex.js";
|
||||
import { streamMistral, streamSimpleMistral } from "./mistral.js";
|
||||
import {
|
||||
streamOpenAICodexResponses,
|
||||
streamSimpleOpenAICodexResponses,
|
||||
} from "./openai-codex-responses.js";
|
||||
import {
|
||||
streamOpenAICompletions,
|
||||
streamSimpleOpenAICompletions,
|
||||
} from "./openai-completions.js";
|
||||
import {
|
||||
streamOpenAIResponses,
|
||||
streamSimpleOpenAIResponses,
|
||||
} from "./openai-responses.js";
|
||||
|
||||
interface BedrockProviderModule {
|
||||
streamBedrock: (
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
context: Context,
|
||||
options?: StreamOptions,
|
||||
) => AsyncIterable<AssistantMessageEvent>;
|
||||
streamSimpleBedrock: (
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
) => AsyncIterable<AssistantMessageEvent>;
|
||||
}
|
||||
|
||||
type DynamicImport = (specifier: string) => Promise<unknown>;
|
||||
|
||||
const dynamicImport: DynamicImport = (specifier) => import(specifier);
|
||||
const BEDROCK_PROVIDER_SPECIFIER = "./amazon-" + "bedrock.js";
|
||||
|
||||
let bedrockProviderModuleOverride: BedrockProviderModule | undefined;
|
||||
|
||||
export function setBedrockProviderModule(module: BedrockProviderModule): void {
|
||||
bedrockProviderModuleOverride = module;
|
||||
}
|
||||
|
||||
async function loadBedrockProviderModule(): Promise<BedrockProviderModule> {
|
||||
if (bedrockProviderModuleOverride) {
|
||||
return bedrockProviderModuleOverride;
|
||||
}
|
||||
const module = await dynamicImport(BEDROCK_PROVIDER_SPECIFIER);
|
||||
return module as BedrockProviderModule;
|
||||
}
|
||||
|
||||
function forwardStream(
|
||||
target: AssistantMessageEventStream,
|
||||
source: AsyncIterable<AssistantMessageEvent>,
|
||||
): void {
|
||||
(async () => {
|
||||
for await (const event of source) {
|
||||
target.push(event);
|
||||
}
|
||||
target.end();
|
||||
})();
|
||||
}
|
||||
|
||||
function createLazyLoadErrorMessage(
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
error: unknown,
|
||||
): AssistantMessage {
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "bedrock-converse-stream",
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "error",
|
||||
errorMessage: error instanceof Error ? error.message : String(error),
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function streamBedrockLazy(
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
context: Context,
|
||||
options?: StreamOptions,
|
||||
): AssistantMessageEventStream {
|
||||
const outer = new AssistantMessageEventStream();
|
||||
|
||||
loadBedrockProviderModule()
|
||||
.then((module) => {
|
||||
const inner = module.streamBedrock(model, context, options);
|
||||
forwardStream(outer, inner);
|
||||
})
|
||||
.catch((error) => {
|
||||
const message = createLazyLoadErrorMessage(model, error);
|
||||
outer.push({ type: "error", reason: "error", error: message });
|
||||
outer.end(message);
|
||||
});
|
||||
|
||||
return outer;
|
||||
}
|
||||
|
||||
function streamSimpleBedrockLazy(
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream {
|
||||
const outer = new AssistantMessageEventStream();
|
||||
|
||||
loadBedrockProviderModule()
|
||||
.then((module) => {
|
||||
const inner = module.streamSimpleBedrock(model, context, options);
|
||||
forwardStream(outer, inner);
|
||||
})
|
||||
.catch((error) => {
|
||||
const message = createLazyLoadErrorMessage(model, error);
|
||||
outer.push({ type: "error", reason: "error", error: message });
|
||||
outer.end(message);
|
||||
});
|
||||
|
||||
return outer;
|
||||
}
|
||||
|
||||
export function registerBuiltInApiProviders(): void {
|
||||
registerApiProvider({
|
||||
api: "anthropic-messages",
|
||||
stream: streamAnthropic,
|
||||
streamSimple: streamSimpleAnthropic,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "openai-completions",
|
||||
stream: streamOpenAICompletions,
|
||||
streamSimple: streamSimpleOpenAICompletions,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "mistral-conversations",
|
||||
stream: streamMistral,
|
||||
streamSimple: streamSimpleMistral,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "openai-responses",
|
||||
stream: streamOpenAIResponses,
|
||||
streamSimple: streamSimpleOpenAIResponses,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "azure-openai-responses",
|
||||
stream: streamAzureOpenAIResponses,
|
||||
streamSimple: streamSimpleAzureOpenAIResponses,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "openai-codex-responses",
|
||||
stream: streamOpenAICodexResponses,
|
||||
streamSimple: streamSimpleOpenAICodexResponses,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "google-generative-ai",
|
||||
stream: streamGoogle,
|
||||
streamSimple: streamSimpleGoogle,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "google-gemini-cli",
|
||||
stream: streamGoogleGeminiCli,
|
||||
streamSimple: streamSimpleGoogleGeminiCli,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "google-vertex",
|
||||
stream: streamGoogleVertex,
|
||||
streamSimple: streamSimpleGoogleVertex,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "bedrock-converse-stream",
|
||||
stream: streamBedrockLazy,
|
||||
streamSimple: streamSimpleBedrockLazy,
|
||||
});
|
||||
}
|
||||
|
||||
export function resetApiProviders(): void {
|
||||
clearApiProviders();
|
||||
registerBuiltInApiProviders();
|
||||
}
|
||||
|
||||
registerBuiltInApiProviders();
|
||||
59
packages/ai/src/providers/simple-options.ts
Normal file
59
packages/ai/src/providers/simple-options.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import type {
|
||||
Api,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StreamOptions,
|
||||
ThinkingBudgets,
|
||||
ThinkingLevel,
|
||||
} from "../types.js";
|
||||
|
||||
export function buildBaseOptions(
|
||||
model: Model<Api>,
|
||||
options?: SimpleStreamOptions,
|
||||
apiKey?: string,
|
||||
): StreamOptions {
|
||||
return {
|
||||
temperature: options?.temperature,
|
||||
maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
|
||||
signal: options?.signal,
|
||||
apiKey: apiKey || options?.apiKey,
|
||||
cacheRetention: options?.cacheRetention,
|
||||
sessionId: options?.sessionId,
|
||||
headers: options?.headers,
|
||||
onPayload: options?.onPayload,
|
||||
maxRetryDelayMs: options?.maxRetryDelayMs,
|
||||
metadata: options?.metadata,
|
||||
};
|
||||
}
|
||||
|
||||
export function clampReasoning(
|
||||
effort: ThinkingLevel | undefined,
|
||||
): Exclude<ThinkingLevel, "xhigh"> | undefined {
|
||||
return effort === "xhigh" ? "high" : effort;
|
||||
}
|
||||
|
||||
export function adjustMaxTokensForThinking(
|
||||
baseMaxTokens: number,
|
||||
modelMaxTokens: number,
|
||||
reasoningLevel: ThinkingLevel,
|
||||
customBudgets?: ThinkingBudgets,
|
||||
): { maxTokens: number; thinkingBudget: number } {
|
||||
const defaultBudgets: ThinkingBudgets = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
};
|
||||
const budgets = { ...defaultBudgets, ...customBudgets };
|
||||
|
||||
const minOutputTokens = 1024;
|
||||
const level = clampReasoning(reasoningLevel)!;
|
||||
let thinkingBudget = budgets[level]!;
|
||||
const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens);
|
||||
|
||||
if (maxTokens <= thinkingBudget) {
|
||||
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||
}
|
||||
|
||||
return { maxTokens, thinkingBudget };
|
||||
}
|
||||
193
packages/ai/src/providers/transform-messages.ts
Normal file
193
packages/ai/src/providers/transform-messages.ts
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Message,
|
||||
Model,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
} from "../types.js";
|
||||
|
||||
/**
|
||||
* Normalize tool call ID for cross-provider compatibility.
|
||||
* OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
|
||||
* Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars).
|
||||
*/
|
||||
export function transformMessages<TApi extends Api>(
|
||||
messages: Message[],
|
||||
model: Model<TApi>,
|
||||
normalizeToolCallId?: (
|
||||
id: string,
|
||||
model: Model<TApi>,
|
||||
source: AssistantMessage,
|
||||
) => string,
|
||||
): Message[] {
|
||||
// Build a map of original tool call IDs to normalized IDs
|
||||
const toolCallIdMap = new Map<string, string>();
|
||||
|
||||
// First pass: transform messages (thinking blocks, tool call ID normalization)
|
||||
const transformed = messages.map((msg) => {
|
||||
// User messages pass through unchanged
|
||||
if (msg.role === "user") {
|
||||
return msg;
|
||||
}
|
||||
|
||||
// Handle toolResult messages - normalize toolCallId if we have a mapping
|
||||
if (msg.role === "toolResult") {
|
||||
const normalizedId = toolCallIdMap.get(msg.toolCallId);
|
||||
if (normalizedId && normalizedId !== msg.toolCallId) {
|
||||
return { ...msg, toolCallId: normalizedId };
|
||||
}
|
||||
return msg;
|
||||
}
|
||||
|
||||
// Assistant messages need transformation check
|
||||
if (msg.role === "assistant") {
|
||||
const assistantMsg = msg as AssistantMessage;
|
||||
const isSameModel =
|
||||
assistantMsg.provider === model.provider &&
|
||||
assistantMsg.api === model.api &&
|
||||
assistantMsg.model === model.id;
|
||||
|
||||
const transformedContent = assistantMsg.content.flatMap((block) => {
|
||||
if (block.type === "thinking") {
|
||||
// Redacted thinking is opaque encrypted content, only valid for the same model.
|
||||
// Drop it for cross-model to avoid API errors.
|
||||
if (block.redacted) {
|
||||
return isSameModel ? block : [];
|
||||
}
|
||||
// For same model: keep thinking blocks with signatures (needed for replay)
|
||||
// even if the thinking text is empty (OpenAI encrypted reasoning)
|
||||
if (isSameModel && block.thinkingSignature) return block;
|
||||
// Skip empty thinking blocks, convert others to plain text
|
||||
if (!block.thinking || block.thinking.trim() === "") return [];
|
||||
if (isSameModel) return block;
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: block.thinking,
|
||||
};
|
||||
}
|
||||
|
||||
if (block.type === "text") {
|
||||
if (isSameModel) return block;
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: block.text,
|
||||
};
|
||||
}
|
||||
|
||||
if (block.type === "toolCall") {
|
||||
const toolCall = block as ToolCall;
|
||||
let normalizedToolCall: ToolCall = toolCall;
|
||||
|
||||
if (!isSameModel && toolCall.thoughtSignature) {
|
||||
normalizedToolCall = { ...toolCall };
|
||||
delete (normalizedToolCall as { thoughtSignature?: string })
|
||||
.thoughtSignature;
|
||||
}
|
||||
|
||||
if (!isSameModel && normalizeToolCallId) {
|
||||
const normalizedId = normalizeToolCallId(
|
||||
toolCall.id,
|
||||
model,
|
||||
assistantMsg,
|
||||
);
|
||||
if (normalizedId !== toolCall.id) {
|
||||
toolCallIdMap.set(toolCall.id, normalizedId);
|
||||
normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
|
||||
}
|
||||
}
|
||||
|
||||
return normalizedToolCall;
|
||||
}
|
||||
|
||||
return block;
|
||||
});
|
||||
|
||||
return {
|
||||
...assistantMsg,
|
||||
content: transformedContent,
|
||||
};
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
// Second pass: insert synthetic empty tool results for orphaned tool calls
|
||||
// This preserves thinking signatures and satisfies API requirements
|
||||
const result: Message[] = [];
|
||||
let pendingToolCalls: ToolCall[] = [];
|
||||
let existingToolResultIds = new Set<string>();
|
||||
|
||||
for (let i = 0; i < transformed.length; i++) {
|
||||
const msg = transformed[i];
|
||||
|
||||
if (msg.role === "assistant") {
|
||||
// If we have pending orphaned tool calls from a previous assistant, insert synthetic results now
|
||||
if (pendingToolCalls.length > 0) {
|
||||
for (const tc of pendingToolCalls) {
|
||||
if (!existingToolResultIds.has(tc.id)) {
|
||||
result.push({
|
||||
role: "toolResult",
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
content: [{ type: "text", text: "No result provided" }],
|
||||
isError: true,
|
||||
timestamp: Date.now(),
|
||||
} as ToolResultMessage);
|
||||
}
|
||||
}
|
||||
pendingToolCalls = [];
|
||||
existingToolResultIds = new Set();
|
||||
}
|
||||
|
||||
// Skip errored/aborted assistant messages entirely.
|
||||
// These are incomplete turns that shouldn't be replayed:
|
||||
// - May have partial content (reasoning without message, incomplete tool calls)
|
||||
// - Replaying them can cause API errors (e.g., OpenAI "reasoning without following item")
|
||||
// - The model should retry from the last valid state
|
||||
const assistantMsg = msg as AssistantMessage;
|
||||
if (
|
||||
assistantMsg.stopReason === "error" ||
|
||||
assistantMsg.stopReason === "aborted"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Track tool calls from this assistant message
|
||||
const toolCalls = assistantMsg.content.filter(
|
||||
(b) => b.type === "toolCall",
|
||||
) as ToolCall[];
|
||||
if (toolCalls.length > 0) {
|
||||
pendingToolCalls = toolCalls;
|
||||
existingToolResultIds = new Set();
|
||||
}
|
||||
|
||||
result.push(msg);
|
||||
} else if (msg.role === "toolResult") {
|
||||
existingToolResultIds.add(msg.toolCallId);
|
||||
result.push(msg);
|
||||
} else if (msg.role === "user") {
|
||||
// User message interrupts tool flow - insert synthetic results for orphaned calls
|
||||
if (pendingToolCalls.length > 0) {
|
||||
for (const tc of pendingToolCalls) {
|
||||
if (!existingToolResultIds.has(tc.id)) {
|
||||
result.push({
|
||||
role: "toolResult",
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
content: [{ type: "text", text: "No result provided" }],
|
||||
isError: true,
|
||||
timestamp: Date.now(),
|
||||
} as ToolResultMessage);
|
||||
}
|
||||
}
|
||||
pendingToolCalls = [];
|
||||
existingToolResultIds = new Set();
|
||||
}
|
||||
result.push(msg);
|
||||
} else {
|
||||
result.push(msg);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue