Make stream function configurable in agent

This commit is contained in:
Mario Zechner 2025-09-15 20:31:53 +02:00
parent 1c9ab1ca24
commit 5f7a50deef
2 changed files with 246 additions and 140 deletions

View file

@ -10,6 +10,7 @@ export function prompt(
context: AgentContext,
config: PromptConfig,
signal?: AbortSignal,
streamFn?: typeof streamSimple,
): EventStream<AgentEvent, AgentContext["messages"]> {
const stream = new EventStream<AgentEvent, AgentContext["messages"]>(
(event) => event.type === "agent_end",
@ -45,7 +46,7 @@ export function prompt(
firstTurn = false;
}
// Stream assistant response
const assistantMessage = await streamAssistantResponse(currentContext, config, signal, stream);
const assistantMessage = await streamAssistantResponse(currentContext, config, signal, stream, streamFn);
newMessages.push(assistantMessage);
// Check for tool calls
@ -74,6 +75,7 @@ async function streamAssistantResponse(
config: PromptConfig,
signal: AbortSignal | undefined,
stream: EventStream<AgentEvent, AgentContext["messages"]>,
streamFn?: typeof streamSimple,
): Promise<AssistantMessage> {
// Convert AgentContext to Context for streamSimple
// Use a copy of messages to avoid mutating the original context
@ -93,7 +95,9 @@ async function streamAssistantResponse(
tools: context.tools, // AgentTool extends Tool, so this works
};
const response = await streamSimple(config.model, processedContext, { ...config, signal });
// Use custom stream function if provided, otherwise use default streamSimple
const streamFunction = streamFn || streamSimple;
const response = await streamFunction(config.model, processedContext, { ...config, signal });
let partialMessage: AssistantMessage | null = null;
let addedPartial = false;

View file

@ -1047,7 +1047,7 @@ export const MODELS = {
input: 0.6,
output: 4,
cacheRead: 0.15,
cacheWrite: 4,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1064,7 +1064,7 @@ export const MODELS = {
input: 0.3,
output: 0.5,
cacheRead: 0.075,
cacheWrite: 0.5,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1081,7 +1081,7 @@ export const MODELS = {
input: 5,
output: 15,
cacheRead: 5,
cacheWrite: 15,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
@ -1098,7 +1098,7 @@ export const MODELS = {
input: 5,
output: 25,
cacheRead: 1.25,
cacheWrite: 25,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1115,7 +1115,7 @@ export const MODELS = {
input: 3,
output: 15,
cacheRead: 0.75,
cacheWrite: 15,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1132,7 +1132,7 @@ export const MODELS = {
input: 0.3,
output: 0.5,
cacheRead: 0.075,
cacheWrite: 0.5,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1149,7 +1149,7 @@ export const MODELS = {
input: 2,
output: 10,
cacheRead: 2,
cacheWrite: 10,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 4096,
@ -1166,7 +1166,7 @@ export const MODELS = {
input: 2,
output: 10,
cacheRead: 2,
cacheWrite: 10,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1183,7 +1183,7 @@ export const MODELS = {
input: 2,
output: 10,
cacheRead: 2,
cacheWrite: 10,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 4096,
@ -1200,7 +1200,7 @@ export const MODELS = {
input: 3,
output: 15,
cacheRead: 0.75,
cacheWrite: 15,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1217,7 +1217,7 @@ export const MODELS = {
input: 2,
output: 10,
cacheRead: 2,
cacheWrite: 10,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 4096,
@ -1234,7 +1234,7 @@ export const MODELS = {
input: 2,
output: 10,
cacheRead: 2,
cacheWrite: 10,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1251,7 +1251,7 @@ export const MODELS = {
input: 5,
output: 25,
cacheRead: 1.25,
cacheWrite: 25,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1268,7 +1268,7 @@ export const MODELS = {
input: 2,
output: 10,
cacheRead: 2,
cacheWrite: 10,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1285,7 +1285,7 @@ export const MODELS = {
input: 3,
output: 15,
cacheRead: 0.75,
cacheWrite: 15,
cacheWrite: 0,
},
contextWindow: 256000,
maxTokens: 64000,
@ -1302,7 +1302,7 @@ export const MODELS = {
input: 5,
output: 15,
cacheRead: 5,
cacheWrite: 15,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 4096,
@ -1319,7 +1319,7 @@ export const MODELS = {
input: 0.6,
output: 4,
cacheRead: 0.15,
cacheWrite: 4,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 8192,
@ -1413,9 +1413,94 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">,
},
openrouter: {
"nvidia/nemotron-nano-9b-v2": {
id: "nvidia/nemotron-nano-9b-v2",
name: "NVIDIA: Nemotron Nano 9B V2",
"qwen/qwen3-next-80b-a3b-thinking": {
id: "qwen/qwen3-next-80b-a3b-thinking",
name: "Qwen: Qwen3 Next 80B A3B Thinking",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.09782604,
output: 0.391304304,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"qwen/qwen3-next-80b-a3b-instruct": {
id: "qwen/qwen3-next-80b-a3b-instruct",
name: "Qwen: Qwen3 Next 80B A3B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09782604,
output: 0.391304304,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meituan/longcat-flash-chat": {
id: "meituan/longcat-flash-chat",
name: "Meituan: LongCat Flash Chat",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.15,
output: 0.75,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"qwen/qwen-plus-2025-07-28": {
id: "qwen/qwen-plus-2025-07-28",
name: "Qwen: Qwen Plus 0728",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 1.2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"qwen/qwen-plus-2025-07-28:thinking": {
id: "qwen/qwen-plus-2025-07-28:thinking",
name: "Qwen: Qwen Plus 0728 (thinking)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 4,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"nvidia/nemotron-nano-9b-v2:free": {
id: "nvidia/nemotron-nano-9b-v2:free",
name: "NVIDIA: Nemotron Nano 9B V2 (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
@ -1430,6 +1515,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"nvidia/nemotron-nano-9b-v2": {
id: "nvidia/nemotron-nano-9b-v2",
name: "NVIDIA: Nemotron Nano 9B V2",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.04,
output: 0.16,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openrouter/sonoma-dusk-alpha": {
id: "openrouter/sonoma-dusk-alpha",
name: "Sonoma Dusk Alpha",
@ -1490,8 +1592,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.2962,
output: 1.1852999999999998,
input: 0.38043459999999996,
output: 1.52173896,
cacheRead: 0,
cacheWrite: 0,
},
@ -1541,8 +1643,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.0713,
output: 0.2852,
input: 0.08967387,
output: 0.358695612,
cacheRead: 0,
cacheWrite: 0,
},
@ -1558,8 +1660,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.09329544,
output: 0.3733632,
input: 0.127173852,
output: 0.5086955952000001,
cacheRead: 0,
cacheWrite: 0,
},
@ -1575,8 +1677,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.1999188,
output: 0.800064,
input: 0.24999987999999998,
output: 0.999999888,
cacheRead: 0,
cacheWrite: 0,
},
@ -1609,8 +1711,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.7999999999999999,
input: 0.24999987999999998,
output: 0.999999888,
cacheRead: 0,
cacheWrite: 0,
},
@ -1711,8 +1813,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.051830799999999996,
output: 0.207424,
input: 0.07065213999999999,
output: 0.282608664,
cacheRead: 0,
cacheWrite: 0,
},
@ -1728,8 +1830,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.051830799999999996,
output: 0.207424,
input: 0.07065213999999999,
output: 0.282608664,
cacheRead: 0,
cacheWrite: 0,
},
@ -1745,8 +1847,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.32986602,
output: 1.3201056,
input: 0.41249980199999997,
output: 1.6499998152000002,
cacheRead: 0,
cacheWrite: 0,
},
@ -1796,8 +1898,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.077968332,
output: 0.31202496,
input: 0.0974999532,
output: 0.38999995632,
cacheRead: 0,
cacheWrite: 0,
},
@ -1847,8 +1949,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.7999999999999999,
input: 0.24999987999999998,
output: 0.999999888,
cacheRead: 0,
cacheWrite: 0,
},
@ -1864,8 +1966,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.077968332,
output: 0.31202496,
input: 0.0974999532,
output: 0.38999995632,
cacheRead: 0,
cacheWrite: 0,
},
@ -2068,8 +2170,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.1999188,
output: 0.800064,
input: 0.24999987999999998,
output: 0.999999888,
cacheRead: 0,
cacheWrite: 0,
},
@ -2102,8 +2204,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.01999188,
output: 0.0800064,
input: 0.035869548,
output: 0.14347824480000002,
cacheRead: 0,
cacheWrite: 0,
},
@ -2204,8 +2306,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.01999188,
output: 0.0800064,
input: 0.035869548,
output: 0.14347824480000002,
cacheRead: 0,
cacheWrite: 0,
},
@ -2238,8 +2340,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.017992691999999998,
output: 0.07200576,
input: 0.0322825932,
output: 0.12913042032,
cacheRead: 0,
cacheWrite: 0,
},
@ -2374,8 +2476,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.1999188,
output: 0.800064,
input: 0.24999987999999998,
output: 0.999999888,
cacheRead: 0,
cacheWrite: 0,
},
@ -2408,8 +2510,8 @@ export const MODELS = {
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.01999188,
output: 0.0800064,
input: 0.03804346,
output: 0.152173896,
cacheRead: 0,
cacheWrite: 0,
},
@ -2510,8 +2612,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.01999188,
output: 0.0800064,
input: 0.03804346,
output: 0.152173896,
cacheRead: 0,
cacheWrite: 0,
},
@ -2527,8 +2629,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.025915399999999998,
output: 0.103712,
input: 0.03260868,
output: 0.130434768,
cacheRead: 0,
cacheWrite: 0,
},
@ -2578,8 +2680,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.1999188,
output: 0.800064,
input: 0.24999987999999998,
output: 0.999999888,
cacheRead: 0,
cacheWrite: 0,
},
@ -2765,8 +2867,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.12,
output: 0.3,
input: 0.6,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
@ -2816,8 +2918,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.051830799999999996,
output: 0.207424,
input: 0.06521736,
output: 0.260869536,
cacheRead: 0,
cacheWrite: 0,
},
@ -2841,23 +2943,6 @@ export const MODELS = {
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"cohere/command-r-08-2024": {
id: "cohere/command-r-08-2024",
name: "Cohere: Command R (08-2024)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4000,
} satisfies Model<"openai-completions">,
"cohere/command-r-plus-08-2024": {
id: "cohere/command-r-plus-08-2024",
name: "Cohere: Command R+ (08-2024)",
@ -2875,6 +2960,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4000,
} satisfies Model<"openai-completions">,
"cohere/command-r-08-2024": {
id: "cohere/command-r-08-2024",
name: "Cohere: Command R (08-2024)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4000,
} satisfies Model<"openai-completions">,
"microsoft/phi-3.5-mini-128k-instruct": {
id: "microsoft/phi-3.5-mini-128k-instruct",
name: "Microsoft: Phi-3.5 Mini 128K Instruct",
@ -2901,14 +3003,31 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.28,
input: 0.12,
output: 0.3,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.015,
output: 0.02,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct",
name: "Meta: Llama 3.1 405B Instruct",
@ -2943,23 +3062,6 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.015,
output: 0.02,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-nemo": {
id: "mistralai/mistral-nemo",
name: "Mistral: Mistral Nemo",
@ -2969,14 +3071,31 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.01,
output: 0.0400032,
input: 0.017934774,
output: 0.07173912240000001,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct-v0.3": {
id: "mistralai/mistral-7b-instruct-v0.3",
name: "Mistral: Mistral 7B Instruct v0.3",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.028,
output: 0.054,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct:free": {
id: "mistralai/mistral-7b-instruct:free",
name: "Mistral: Mistral 7B Instruct (free)",
@ -3011,23 +3130,6 @@ export const MODELS = {
contextWindow: 32768,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct-v0.3": {
id: "mistralai/mistral-7b-instruct-v0.3",
name: "Mistral: Mistral 7B Instruct v0.3",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.028,
output: 0.054,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"microsoft/phi-3-mini-128k-instruct": {
id: "microsoft/phi-3-mini-128k-instruct",
name: "Microsoft: Phi-3 Mini 128K Instruct",
@ -3198,23 +3300,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-tiny": {
id: "mistralai/mistral-tiny",
name: "Mistral Tiny",
@ -3232,6 +3317,23 @@ export const MODELS = {
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mixtral-8x7b-instruct": {
id: "mistralai/mixtral-8x7b-instruct",
name: "Mistral: Mixtral 8x7B Instruct",
@ -3241,8 +3343,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.08,
output: 0.24,
input: 0.39999999999999997,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},