Make stream function configurable in agent

This commit is contained in:
Mario Zechner 2025-09-15 20:31:53 +02:00
parent 1c9ab1ca24
commit 5f7a50deef
2 changed files with 246 additions and 140 deletions

View file

@ -10,6 +10,7 @@ export function prompt(
context: AgentContext, context: AgentContext,
config: PromptConfig, config: PromptConfig,
signal?: AbortSignal, signal?: AbortSignal,
streamFn?: typeof streamSimple,
): EventStream<AgentEvent, AgentContext["messages"]> { ): EventStream<AgentEvent, AgentContext["messages"]> {
const stream = new EventStream<AgentEvent, AgentContext["messages"]>( const stream = new EventStream<AgentEvent, AgentContext["messages"]>(
(event) => event.type === "agent_end", (event) => event.type === "agent_end",
@ -45,7 +46,7 @@ export function prompt(
firstTurn = false; firstTurn = false;
} }
// Stream assistant response // Stream assistant response
const assistantMessage = await streamAssistantResponse(currentContext, config, signal, stream); const assistantMessage = await streamAssistantResponse(currentContext, config, signal, stream, streamFn);
newMessages.push(assistantMessage); newMessages.push(assistantMessage);
// Check for tool calls // Check for tool calls
@ -74,6 +75,7 @@ async function streamAssistantResponse(
config: PromptConfig, config: PromptConfig,
signal: AbortSignal | undefined, signal: AbortSignal | undefined,
stream: EventStream<AgentEvent, AgentContext["messages"]>, stream: EventStream<AgentEvent, AgentContext["messages"]>,
streamFn?: typeof streamSimple,
): Promise<AssistantMessage> { ): Promise<AssistantMessage> {
// Convert AgentContext to Context for streamSimple // Convert AgentContext to Context for streamSimple
// Use a copy of messages to avoid mutating the original context // Use a copy of messages to avoid mutating the original context
@ -93,7 +95,9 @@ async function streamAssistantResponse(
tools: context.tools, // AgentTool extends Tool, so this works tools: context.tools, // AgentTool extends Tool, so this works
}; };
const response = await streamSimple(config.model, processedContext, { ...config, signal }); // Use custom stream function if provided, otherwise use default streamSimple
const streamFunction = streamFn || streamSimple;
const response = await streamFunction(config.model, processedContext, { ...config, signal });
let partialMessage: AssistantMessage | null = null; let partialMessage: AssistantMessage | null = null;
let addedPartial = false; let addedPartial = false;

View file

@ -1047,7 +1047,7 @@ export const MODELS = {
input: 0.6, input: 0.6,
output: 4, output: 4,
cacheRead: 0.15, cacheRead: 0.15,
cacheWrite: 4, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1064,7 +1064,7 @@ export const MODELS = {
input: 0.3, input: 0.3,
output: 0.5, output: 0.5,
cacheRead: 0.075, cacheRead: 0.075,
cacheWrite: 0.5, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1081,7 +1081,7 @@ export const MODELS = {
input: 5, input: 5,
output: 15, output: 15,
cacheRead: 5, cacheRead: 5,
cacheWrite: 15, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 4096, maxTokens: 4096,
@ -1098,7 +1098,7 @@ export const MODELS = {
input: 5, input: 5,
output: 25, output: 25,
cacheRead: 1.25, cacheRead: 1.25,
cacheWrite: 25, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1115,7 +1115,7 @@ export const MODELS = {
input: 3, input: 3,
output: 15, output: 15,
cacheRead: 0.75, cacheRead: 0.75,
cacheWrite: 15, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1132,7 +1132,7 @@ export const MODELS = {
input: 0.3, input: 0.3,
output: 0.5, output: 0.5,
cacheRead: 0.075, cacheRead: 0.075,
cacheWrite: 0.5, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1149,7 +1149,7 @@ export const MODELS = {
input: 2, input: 2,
output: 10, output: 10,
cacheRead: 2, cacheRead: 2,
cacheWrite: 10, cacheWrite: 0,
}, },
contextWindow: 8192, contextWindow: 8192,
maxTokens: 4096, maxTokens: 4096,
@ -1166,7 +1166,7 @@ export const MODELS = {
input: 2, input: 2,
output: 10, output: 10,
cacheRead: 2, cacheRead: 2,
cacheWrite: 10, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1183,7 +1183,7 @@ export const MODELS = {
input: 2, input: 2,
output: 10, output: 10,
cacheRead: 2, cacheRead: 2,
cacheWrite: 10, cacheWrite: 0,
}, },
contextWindow: 8192, contextWindow: 8192,
maxTokens: 4096, maxTokens: 4096,
@ -1200,7 +1200,7 @@ export const MODELS = {
input: 3, input: 3,
output: 15, output: 15,
cacheRead: 0.75, cacheRead: 0.75,
cacheWrite: 15, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1217,7 +1217,7 @@ export const MODELS = {
input: 2, input: 2,
output: 10, output: 10,
cacheRead: 2, cacheRead: 2,
cacheWrite: 10, cacheWrite: 0,
}, },
contextWindow: 8192, contextWindow: 8192,
maxTokens: 4096, maxTokens: 4096,
@ -1234,7 +1234,7 @@ export const MODELS = {
input: 2, input: 2,
output: 10, output: 10,
cacheRead: 2, cacheRead: 2,
cacheWrite: 10, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1251,7 +1251,7 @@ export const MODELS = {
input: 5, input: 5,
output: 25, output: 25,
cacheRead: 1.25, cacheRead: 1.25,
cacheWrite: 25, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1268,7 +1268,7 @@ export const MODELS = {
input: 2, input: 2,
output: 10, output: 10,
cacheRead: 2, cacheRead: 2,
cacheWrite: 10, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1285,7 +1285,7 @@ export const MODELS = {
input: 3, input: 3,
output: 15, output: 15,
cacheRead: 0.75, cacheRead: 0.75,
cacheWrite: 15, cacheWrite: 0,
}, },
contextWindow: 256000, contextWindow: 256000,
maxTokens: 64000, maxTokens: 64000,
@ -1302,7 +1302,7 @@ export const MODELS = {
input: 5, input: 5,
output: 15, output: 15,
cacheRead: 5, cacheRead: 5,
cacheWrite: 15, cacheWrite: 0,
}, },
contextWindow: 8192, contextWindow: 8192,
maxTokens: 4096, maxTokens: 4096,
@ -1319,7 +1319,7 @@ export const MODELS = {
input: 0.6, input: 0.6,
output: 4, output: 4,
cacheRead: 0.15, cacheRead: 0.15,
cacheWrite: 4, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 8192, maxTokens: 8192,
@ -1413,9 +1413,94 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
}, },
openrouter: { openrouter: {
"nvidia/nemotron-nano-9b-v2": { "qwen/qwen3-next-80b-a3b-thinking": {
id: "nvidia/nemotron-nano-9b-v2", id: "qwen/qwen3-next-80b-a3b-thinking",
name: "NVIDIA: Nemotron Nano 9B V2", name: "Qwen: Qwen3 Next 80B A3B Thinking",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.09782604,
output: 0.391304304,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"qwen/qwen3-next-80b-a3b-instruct": {
id: "qwen/qwen3-next-80b-a3b-instruct",
name: "Qwen: Qwen3 Next 80B A3B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09782604,
output: 0.391304304,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meituan/longcat-flash-chat": {
id: "meituan/longcat-flash-chat",
name: "Meituan: LongCat Flash Chat",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.15,
output: 0.75,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"qwen/qwen-plus-2025-07-28": {
id: "qwen/qwen-plus-2025-07-28",
name: "Qwen: Qwen Plus 0728",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 1.2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"qwen/qwen-plus-2025-07-28:thinking": {
id: "qwen/qwen-plus-2025-07-28:thinking",
name: "Qwen: Qwen Plus 0728 (thinking)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 4,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"nvidia/nemotron-nano-9b-v2:free": {
id: "nvidia/nemotron-nano-9b-v2:free",
name: "NVIDIA: Nemotron Nano 9B V2 (free)",
api: "openai-completions", api: "openai-completions",
provider: "openrouter", provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1", baseUrl: "https://openrouter.ai/api/v1",
@ -1430,6 +1515,23 @@ export const MODELS = {
contextWindow: 128000, contextWindow: 128000,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"nvidia/nemotron-nano-9b-v2": {
id: "nvidia/nemotron-nano-9b-v2",
name: "NVIDIA: Nemotron Nano 9B V2",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.04,
output: 0.16,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openrouter/sonoma-dusk-alpha": { "openrouter/sonoma-dusk-alpha": {
id: "openrouter/sonoma-dusk-alpha", id: "openrouter/sonoma-dusk-alpha",
name: "Sonoma Dusk Alpha", name: "Sonoma Dusk Alpha",
@ -1490,8 +1592,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.2962, input: 0.38043459999999996,
output: 1.1852999999999998, output: 1.52173896,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1541,8 +1643,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.0713, input: 0.08967387,
output: 0.2852, output: 0.358695612,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1558,8 +1660,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.09329544, input: 0.127173852,
output: 0.3733632, output: 0.5086955952000001,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1575,8 +1677,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.1999188, input: 0.24999987999999998,
output: 0.800064, output: 0.999999888,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1609,8 +1711,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.19999999999999998, input: 0.24999987999999998,
output: 0.7999999999999999, output: 0.999999888,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1711,8 +1813,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.051830799999999996, input: 0.07065213999999999,
output: 0.207424, output: 0.282608664,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1728,8 +1830,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.051830799999999996, input: 0.07065213999999999,
output: 0.207424, output: 0.282608664,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1745,8 +1847,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.32986602, input: 0.41249980199999997,
output: 1.3201056, output: 1.6499998152000002,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1796,8 +1898,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.077968332, input: 0.0974999532,
output: 0.31202496, output: 0.38999995632,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1847,8 +1949,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.19999999999999998, input: 0.24999987999999998,
output: 0.7999999999999999, output: 0.999999888,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -1864,8 +1966,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.077968332, input: 0.0974999532,
output: 0.31202496, output: 0.38999995632,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2068,8 +2170,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.1999188, input: 0.24999987999999998,
output: 0.800064, output: 0.999999888,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2102,8 +2204,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.01999188, input: 0.035869548,
output: 0.0800064, output: 0.14347824480000002,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2204,8 +2306,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.01999188, input: 0.035869548,
output: 0.0800064, output: 0.14347824480000002,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2238,8 +2340,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.017992691999999998, input: 0.0322825932,
output: 0.07200576, output: 0.12913042032,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2374,8 +2476,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.1999188, input: 0.24999987999999998,
output: 0.800064, output: 0.999999888,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2408,8 +2510,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0.01999188, input: 0.03804346,
output: 0.0800064, output: 0.152173896,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2510,8 +2612,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.01999188, input: 0.03804346,
output: 0.0800064, output: 0.152173896,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2527,8 +2629,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.025915399999999998, input: 0.03260868,
output: 0.103712, output: 0.130434768,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2578,8 +2680,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.1999188, input: 0.24999987999999998,
output: 0.800064, output: 0.999999888,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2765,8 +2867,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.12, input: 0.6,
output: 0.3, output: 0.6,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2816,8 +2918,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.051830799999999996, input: 0.06521736,
output: 0.207424, output: 0.260869536,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -2841,23 +2943,6 @@ export const MODELS = {
contextWindow: 32768, contextWindow: 32768,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"cohere/command-r-08-2024": {
id: "cohere/command-r-08-2024",
name: "Cohere: Command R (08-2024)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4000,
} satisfies Model<"openai-completions">,
"cohere/command-r-plus-08-2024": { "cohere/command-r-plus-08-2024": {
id: "cohere/command-r-plus-08-2024", id: "cohere/command-r-plus-08-2024",
name: "Cohere: Command R+ (08-2024)", name: "Cohere: Command R+ (08-2024)",
@ -2875,6 +2960,23 @@ export const MODELS = {
contextWindow: 128000, contextWindow: 128000,
maxTokens: 4000, maxTokens: 4000,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"cohere/command-r-08-2024": {
id: "cohere/command-r-08-2024",
name: "Cohere: Command R (08-2024)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4000,
} satisfies Model<"openai-completions">,
"microsoft/phi-3.5-mini-128k-instruct": { "microsoft/phi-3.5-mini-128k-instruct": {
id: "microsoft/phi-3.5-mini-128k-instruct", id: "microsoft/phi-3.5-mini-128k-instruct",
name: "Microsoft: Phi-3.5 Mini 128K Instruct", name: "Microsoft: Phi-3.5 Mini 128K Instruct",
@ -2901,14 +3003,31 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.09999999999999999, input: 0.12,
output: 0.28, output: 0.3,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.015,
output: 0.02,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": { "meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct", id: "meta-llama/llama-3.1-405b-instruct",
name: "Meta: Llama 3.1 405B Instruct", name: "Meta: Llama 3.1 405B Instruct",
@ -2943,23 +3062,6 @@ export const MODELS = {
contextWindow: 131072, contextWindow: 131072,
maxTokens: 16384, maxTokens: 16384,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.015,
output: 0.02,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-nemo": { "mistralai/mistral-nemo": {
id: "mistralai/mistral-nemo", id: "mistralai/mistral-nemo",
name: "Mistral: Mistral Nemo", name: "Mistral: Mistral Nemo",
@ -2969,14 +3071,31 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.01, input: 0.017934774,
output: 0.0400032, output: 0.07173912240000001,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 128000, maxTokens: 128000,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct-v0.3": {
id: "mistralai/mistral-7b-instruct-v0.3",
name: "Mistral: Mistral 7B Instruct v0.3",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.028,
output: 0.054,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct:free": { "mistralai/mistral-7b-instruct:free": {
id: "mistralai/mistral-7b-instruct:free", id: "mistralai/mistral-7b-instruct:free",
name: "Mistral: Mistral 7B Instruct (free)", name: "Mistral: Mistral 7B Instruct (free)",
@ -3011,23 +3130,6 @@ export const MODELS = {
contextWindow: 32768, contextWindow: 32768,
maxTokens: 16384, maxTokens: 16384,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct-v0.3": {
id: "mistralai/mistral-7b-instruct-v0.3",
name: "Mistral: Mistral 7B Instruct v0.3",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.028,
output: 0.054,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"microsoft/phi-3-mini-128k-instruct": { "microsoft/phi-3-mini-128k-instruct": {
id: "microsoft/phi-3-mini-128k-instruct", id: "microsoft/phi-3-mini-128k-instruct",
name: "Microsoft: Phi-3 Mini 128K Instruct", name: "Microsoft: Phi-3 Mini 128K Instruct",
@ -3198,23 +3300,6 @@ export const MODELS = {
contextWindow: 128000, contextWindow: 128000,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-tiny": { "mistralai/mistral-tiny": {
id: "mistralai/mistral-tiny", id: "mistralai/mistral-tiny",
name: "Mistral Tiny", name: "Mistral Tiny",
@ -3232,6 +3317,23 @@ export const MODELS = {
contextWindow: 32768, contextWindow: 32768,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mixtral-8x7b-instruct": { "mistralai/mixtral-8x7b-instruct": {
id: "mistralai/mixtral-8x7b-instruct", id: "mistralai/mixtral-8x7b-instruct",
name: "Mistral: Mixtral 8x7B Instruct", name: "Mistral: Mixtral 8x7B Instruct",
@ -3241,8 +3343,8 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.08, input: 0.39999999999999997,
output: 0.24, output: 0.39999999999999997,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },