diff --git a/packages/ai/src/agent/agent.ts b/packages/ai/src/agent/agent.ts index 540995a5..964df2a1 100644 --- a/packages/ai/src/agent/agent.ts +++ b/packages/ai/src/agent/agent.ts @@ -10,6 +10,7 @@ export function prompt( context: AgentContext, config: PromptConfig, signal?: AbortSignal, + streamFn?: typeof streamSimple, ): EventStream { const stream = new EventStream( (event) => event.type === "agent_end", @@ -45,7 +46,7 @@ export function prompt( firstTurn = false; } // Stream assistant response - const assistantMessage = await streamAssistantResponse(currentContext, config, signal, stream); + const assistantMessage = await streamAssistantResponse(currentContext, config, signal, stream, streamFn); newMessages.push(assistantMessage); // Check for tool calls @@ -74,6 +75,7 @@ async function streamAssistantResponse( config: PromptConfig, signal: AbortSignal | undefined, stream: EventStream, + streamFn?: typeof streamSimple, ): Promise { // Convert AgentContext to Context for streamSimple // Use a copy of messages to avoid mutating the original context @@ -93,7 +95,9 @@ async function streamAssistantResponse( tools: context.tools, // AgentTool extends Tool, so this works }; - const response = await streamSimple(config.model, processedContext, { ...config, signal }); + // Use custom stream function if provided, otherwise use default streamSimple + const streamFunction = streamFn || streamSimple; + const response = await streamFunction(config.model, processedContext, { ...config, signal }); let partialMessage: AssistantMessage | null = null; let addedPartial = false; diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 67694c6b..a4651020 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -1047,7 +1047,7 @@ export const MODELS = { input: 0.6, output: 4, cacheRead: 0.15, - cacheWrite: 4, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1064,7 +1064,7 @@ export const MODELS = { input: 0.3, output: 0.5, cacheRead: 0.075, - cacheWrite: 0.5, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1081,7 +1081,7 @@ export const MODELS = { input: 5, output: 15, cacheRead: 5, - cacheWrite: 15, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 4096, @@ -1098,7 +1098,7 @@ export const MODELS = { input: 5, output: 25, cacheRead: 1.25, - cacheWrite: 25, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1115,7 +1115,7 @@ export const MODELS = { input: 3, output: 15, cacheRead: 0.75, - cacheWrite: 15, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1132,7 +1132,7 @@ export const MODELS = { input: 0.3, output: 0.5, cacheRead: 0.075, - cacheWrite: 0.5, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1149,7 +1149,7 @@ export const MODELS = { input: 2, output: 10, cacheRead: 2, - cacheWrite: 10, + cacheWrite: 0, }, contextWindow: 8192, maxTokens: 4096, @@ -1166,7 +1166,7 @@ export const MODELS = { input: 2, output: 10, cacheRead: 2, - cacheWrite: 10, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1183,7 +1183,7 @@ export const MODELS = { input: 2, output: 10, cacheRead: 2, - cacheWrite: 10, + cacheWrite: 0, }, contextWindow: 8192, maxTokens: 4096, @@ -1200,7 +1200,7 @@ export const MODELS = { input: 3, output: 15, cacheRead: 0.75, - cacheWrite: 15, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1217,7 +1217,7 @@ export const MODELS = { input: 2, output: 10, cacheRead: 2, - cacheWrite: 10, + cacheWrite: 0, }, contextWindow: 8192, maxTokens: 4096, @@ -1234,7 +1234,7 @@ export const MODELS = { input: 2, output: 10, cacheRead: 2, - cacheWrite: 10, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1251,7 +1251,7 @@ export const MODELS = { input: 5, output: 25, cacheRead: 1.25, - cacheWrite: 25, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1268,7 +1268,7 @@ export const MODELS = { input: 2, output: 10, cacheRead: 2, - cacheWrite: 10, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1285,7 +1285,7 @@ export const MODELS = { input: 3, output: 15, cacheRead: 0.75, - cacheWrite: 15, + cacheWrite: 0, }, contextWindow: 256000, maxTokens: 64000, @@ -1302,7 +1302,7 @@ export const MODELS = { input: 5, output: 15, cacheRead: 5, - cacheWrite: 15, + cacheWrite: 0, }, contextWindow: 8192, maxTokens: 4096, @@ -1319,7 +1319,7 @@ export const MODELS = { input: 0.6, output: 4, cacheRead: 0.15, - cacheWrite: 4, + cacheWrite: 0, }, contextWindow: 131072, maxTokens: 8192, @@ -1413,9 +1413,94 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, }, openrouter: { - "nvidia/nemotron-nano-9b-v2": { - id: "nvidia/nemotron-nano-9b-v2", - name: "NVIDIA: Nemotron Nano 9B V2", + "qwen/qwen3-next-80b-a3b-thinking": { + id: "qwen/qwen3-next-80b-a3b-thinking", + name: "Qwen: Qwen3 Next 80B A3B Thinking", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.09782604, + output: 0.391304304, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen3-next-80b-a3b-instruct": { + id: "qwen/qwen3-next-80b-a3b-instruct", + name: "Qwen: Qwen3 Next 80B A3B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09782604, + output: 0.391304304, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "meituan/longcat-flash-chat": { + id: "meituan/longcat-flash-chat", + name: "Meituan: LongCat Flash Chat", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.15, + output: 0.75, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "qwen/qwen-plus-2025-07-28": { + id: "qwen/qwen-plus-2025-07-28", + name: "Qwen: Qwen Plus 0728", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "qwen/qwen-plus-2025-07-28:thinking": { + id: "qwen/qwen-plus-2025-07-28:thinking", + name: "Qwen: Qwen Plus 0728 (thinking)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "nvidia/nemotron-nano-9b-v2:free": { + id: "nvidia/nemotron-nano-9b-v2:free", + name: "NVIDIA: Nemotron Nano 9B V2 (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -1430,6 +1515,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "nvidia/nemotron-nano-9b-v2": { + id: "nvidia/nemotron-nano-9b-v2", + name: "NVIDIA: Nemotron Nano 9B V2", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.04, + output: 0.16, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "openrouter/sonoma-dusk-alpha": { id: "openrouter/sonoma-dusk-alpha", name: "Sonoma Dusk Alpha", @@ -1490,8 +1592,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.2962, - output: 1.1852999999999998, + input: 0.38043459999999996, + output: 1.52173896, cacheRead: 0, cacheWrite: 0, }, @@ -1541,8 +1643,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.0713, - output: 0.2852, + input: 0.08967387, + output: 0.358695612, cacheRead: 0, cacheWrite: 0, }, @@ -1558,8 +1660,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09329544, - output: 0.3733632, + input: 0.127173852, + output: 0.5086955952000001, cacheRead: 0, cacheWrite: 0, }, @@ -1575,8 +1677,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.1999188, - output: 0.800064, + input: 0.24999987999999998, + output: 0.999999888, cacheRead: 0, cacheWrite: 0, }, @@ -1609,8 +1711,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.19999999999999998, - output: 0.7999999999999999, + input: 0.24999987999999998, + output: 0.999999888, cacheRead: 0, cacheWrite: 0, }, @@ -1711,8 +1813,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.051830799999999996, - output: 0.207424, + input: 0.07065213999999999, + output: 0.282608664, cacheRead: 0, cacheWrite: 0, }, @@ -1728,8 +1830,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.051830799999999996, - output: 0.207424, + input: 0.07065213999999999, + output: 0.282608664, cacheRead: 0, cacheWrite: 0, }, @@ -1745,8 +1847,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.32986602, - output: 1.3201056, + input: 0.41249980199999997, + output: 1.6499998152000002, cacheRead: 0, cacheWrite: 0, }, @@ -1796,8 +1898,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.077968332, - output: 0.31202496, + input: 0.0974999532, + output: 0.38999995632, cacheRead: 0, cacheWrite: 0, }, @@ -1847,8 +1949,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.19999999999999998, - output: 0.7999999999999999, + input: 0.24999987999999998, + output: 0.999999888, cacheRead: 0, cacheWrite: 0, }, @@ -1864,8 +1966,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.077968332, - output: 0.31202496, + input: 0.0974999532, + output: 0.38999995632, cacheRead: 0, cacheWrite: 0, }, @@ -2068,8 +2170,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.1999188, - output: 0.800064, + input: 0.24999987999999998, + output: 0.999999888, cacheRead: 0, cacheWrite: 0, }, @@ -2102,8 +2204,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.01999188, - output: 0.0800064, + input: 0.035869548, + output: 0.14347824480000002, cacheRead: 0, cacheWrite: 0, }, @@ -2204,8 +2306,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.01999188, - output: 0.0800064, + input: 0.035869548, + output: 0.14347824480000002, cacheRead: 0, cacheWrite: 0, }, @@ -2238,8 +2340,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.017992691999999998, - output: 0.07200576, + input: 0.0322825932, + output: 0.12913042032, cacheRead: 0, cacheWrite: 0, }, @@ -2374,8 +2476,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.1999188, - output: 0.800064, + input: 0.24999987999999998, + output: 0.999999888, cacheRead: 0, cacheWrite: 0, }, @@ -2408,8 +2510,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.01999188, - output: 0.0800064, + input: 0.03804346, + output: 0.152173896, cacheRead: 0, cacheWrite: 0, }, @@ -2510,8 +2612,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.01999188, - output: 0.0800064, + input: 0.03804346, + output: 0.152173896, cacheRead: 0, cacheWrite: 0, }, @@ -2527,8 +2629,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.025915399999999998, - output: 0.103712, + input: 0.03260868, + output: 0.130434768, cacheRead: 0, cacheWrite: 0, }, @@ -2578,8 +2680,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.1999188, - output: 0.800064, + input: 0.24999987999999998, + output: 0.999999888, cacheRead: 0, cacheWrite: 0, }, @@ -2765,8 +2867,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.12, - output: 0.3, + input: 0.6, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, @@ -2816,8 +2918,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.051830799999999996, - output: 0.207424, + input: 0.06521736, + output: 0.260869536, cacheRead: 0, cacheWrite: 0, }, @@ -2841,23 +2943,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "cohere/command-r-08-2024": { - id: "cohere/command-r-08-2024", - name: "Cohere: Command R (08-2024)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4000, - } satisfies Model<"openai-completions">, "cohere/command-r-plus-08-2024": { id: "cohere/command-r-plus-08-2024", name: "Cohere: Command R+ (08-2024)", @@ -2875,6 +2960,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4000, } satisfies Model<"openai-completions">, + "cohere/command-r-08-2024": { + id: "cohere/command-r-08-2024", + name: "Cohere: Command R (08-2024)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4000, + } satisfies Model<"openai-completions">, "microsoft/phi-3.5-mini-128k-instruct": { id: "microsoft/phi-3.5-mini-128k-instruct", name: "Microsoft: Phi-3.5 Mini 128K Instruct", @@ -2901,14 +3003,31 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.28, + input: 0.12, + output: 0.3, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-8b-instruct": { + id: "meta-llama/llama-3.1-8b-instruct", + name: "Meta: Llama 3.1 8B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.015, + output: 0.02, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-405b-instruct": { id: "meta-llama/llama-3.1-405b-instruct", name: "Meta: Llama 3.1 405B Instruct", @@ -2943,23 +3062,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-8b-instruct": { - id: "meta-llama/llama-3.1-8b-instruct", - name: "Meta: Llama 3.1 8B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.015, - output: 0.02, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", @@ -2969,14 +3071,31 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.01, - output: 0.0400032, + input: 0.017934774, + output: 0.07173912240000001, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 128000, } satisfies Model<"openai-completions">, + "mistralai/mistral-7b-instruct-v0.3": { + id: "mistralai/mistral-7b-instruct-v0.3", + name: "Mistral: Mistral 7B Instruct v0.3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.028, + output: 0.054, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mistral-7b-instruct:free": { id: "mistralai/mistral-7b-instruct:free", name: "Mistral: Mistral 7B Instruct (free)", @@ -3011,23 +3130,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 16384, } satisfies Model<"openai-completions">, - "mistralai/mistral-7b-instruct-v0.3": { - id: "mistralai/mistral-7b-instruct-v0.3", - name: "Mistral: Mistral 7B Instruct v0.3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.028, - output: 0.054, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "microsoft/phi-3-mini-128k-instruct": { id: "microsoft/phi-3-mini-128k-instruct", name: "Microsoft: Phi-3 Mini 128K Instruct", @@ -3198,23 +3300,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-small": { - id: "mistralai/mistral-small", - name: "Mistral Small", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.19999999999999998, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-tiny": { id: "mistralai/mistral-tiny", name: "Mistral Tiny", @@ -3232,6 +3317,23 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, + "mistralai/mistral-small": { + id: "mistralai/mistral-small", + name: "Mistral Small", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x7b-instruct": { id: "mistralai/mixtral-8x7b-instruct", name: "Mistral: Mixtral 8x7B Instruct", @@ -3241,8 +3343,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.08, - output: 0.24, + input: 0.39999999999999997, + output: 0.39999999999999997, cacheRead: 0, cacheWrite: 0, },