Merge pull request #311 from getcompanion-ai/hari/migrate-chat-lossless-convex

hari/migrate chat lossless convex
This commit is contained in:
Hari 2026-03-12 01:59:09 -04:00 committed by GitHub
commit 9115d5647f
6 changed files with 446 additions and 117 deletions

View file

@ -3010,7 +3010,7 @@ export const MODELS = {
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 128000, contextWindow: 264000,
maxTokens: 64000, maxTokens: 64000,
} satisfies Model<"openai-responses">, } satisfies Model<"openai-responses">,
"gpt-5.2-codex": { "gpt-5.2-codex": {
@ -3033,7 +3033,7 @@ export const MODELS = {
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 272000, contextWindow: 400000,
maxTokens: 128000, maxTokens: 128000,
} satisfies Model<"openai-responses">, } satisfies Model<"openai-responses">,
"gpt-5.3-codex": { "gpt-5.3-codex": {
@ -6486,6 +6486,23 @@ export const MODELS = {
contextWindow: 204800, contextWindow: 204800,
maxTokens: 131072, maxTokens: 131072,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"nemotron-3-super-free": {
id: "nemotron-3-super-free",
name: "Nemotron 3 Super Free",
api: "openai-completions",
provider: "opencode",
baseUrl: "https://opencode.ai/zen/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
}, },
"opencode-go": { "opencode-go": {
"glm-5": { "glm-5": {
@ -7230,13 +7247,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.25, input: 0.26,
output: 0.39999999999999997, output: 0.38,
cacheRead: 0, cacheRead: 0.13,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 163840, contextWindow: 163840,
maxTokens: 65536, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"deepseek/deepseek-v3.2-exp": { "deepseek/deepseek-v3.2-exp": {
id: "deepseek/deepseek-v3.2-exp", id: "deepseek/deepseek-v3.2-exp",
@ -8360,6 +8377,23 @@ export const MODELS = {
contextWindow: 256000, contextWindow: 256000,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"nvidia/nemotron-3-super-120b-a12b:free": {
id: "nvidia/nemotron-3-super-120b-a12b:free",
name: "NVIDIA: Nemotron 3 Super (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
"nvidia/nemotron-nano-12b-v2-vl:free": { "nvidia/nemotron-nano-12b-v2-vl:free": {
id: "nvidia/nemotron-nano-12b-v2-vl:free", id: "nvidia/nemotron-nano-12b-v2-vl:free",
name: "NVIDIA: Nemotron Nano 12B 2 VL (free)", name: "NVIDIA: Nemotron Nano 12B 2 VL (free)",
@ -9346,6 +9380,40 @@ export const MODELS = {
contextWindow: 200000, contextWindow: 200000,
maxTokens: 4096, maxTokens: 4096,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"openrouter/healer-alpha": {
id: "openrouter/healer-alpha",
name: "Healer Alpha",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"openrouter/hunter-alpha": {
id: "openrouter/hunter-alpha",
name: "Hunter Alpha",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"prime-intellect/intellect-3": { "prime-intellect/intellect-3": {
id: "prime-intellect/intellect-3", id: "prime-intellect/intellect-3",
name: "Prime Intellect: INTELLECT-3", name: "Prime Intellect: INTELLECT-3",
@ -9848,13 +9916,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.15, input: 0.0975,
output: 1.2, output: 0.78,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 128000, contextWindow: 131072,
maxTokens: 4096, maxTokens: 32768,
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
"qwen/qwen3-vl-235b-a22b-instruct": { "qwen/qwen3-vl-235b-a22b-instruct": {
id: "qwen/qwen3-vl-235b-a22b-instruct", id: "qwen/qwen3-vl-235b-a22b-instruct",
@ -9882,8 +9950,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0, input: 0.26,
output: 0, output: 2.6,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -9916,8 +9984,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0, input: 0.13,
output: 0, output: 1.56,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -10649,13 +10717,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.09999999999999999, input: 0.29,
output: 0.3, output: 0.59,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 40960, contextWindow: 131072,
maxTokens: 16384, maxTokens: 40960,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"alibaba/qwen3-235b-a22b-thinking": { "alibaba/qwen3-235b-a22b-thinking": {
id: "alibaba/qwen3-235b-a22b-thinking", id: "alibaba/qwen3-235b-a22b-thinking",
@ -10700,13 +10768,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.07, input: 0.15,
output: 0.27, output: 0.6,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 160000, contextWindow: 262144,
maxTokens: 32768, maxTokens: 8192,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"alibaba/qwen3-coder-next": { "alibaba/qwen3-coder-next": {
id: "alibaba/qwen3-coder-next", id: "alibaba/qwen3-coder-next",
@ -10742,6 +10810,23 @@ export const MODELS = {
contextWindow: 1000000, contextWindow: 1000000,
maxTokens: 65536, maxTokens: 65536,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"alibaba/qwen3-max": {
id: "alibaba/qwen3-max",
name: "Qwen3 Max",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: false,
input: ["text"],
cost: {
input: 1.2,
output: 6,
cacheRead: 0.24,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 32768,
} satisfies Model<"anthropic-messages">,
"alibaba/qwen3-max-preview": { "alibaba/qwen3-max-preview": {
id: "alibaba/qwen3-max-preview", id: "alibaba/qwen3-max-preview",
name: "Qwen3 Max Preview", name: "Qwen3 Max Preview",
@ -10889,8 +10974,8 @@ export const MODELS = {
cost: { cost: {
input: 3, input: 3,
output: 15, output: 15,
cacheRead: 0, cacheRead: 0.3,
cacheWrite: 0, cacheWrite: 3.75,
}, },
contextWindow: 200000, contextWindow: 200000,
maxTokens: 8192, maxTokens: 8192,
@ -11099,6 +11184,23 @@ export const MODELS = {
contextWindow: 256000, contextWindow: 256000,
maxTokens: 8000, maxTokens: 8000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"deepseek/deepseek-r1": {
id: "deepseek/deepseek-r1",
name: "DeepSeek-R1",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
cost: {
input: 1.35,
output: 5.4,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 8192,
} satisfies Model<"anthropic-messages">,
"deepseek/deepseek-v3": { "deepseek/deepseek-v3": {
id: "deepseek/deepseek-v3", id: "deepseek/deepseek-v3",
name: "DeepSeek V3 0324", name: "DeepSeek V3 0324",
@ -11125,13 +11227,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.21, input: 0.5,
output: 0.7899999999999999, output: 1.5,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 163840, contextWindow: 163840,
maxTokens: 128000, maxTokens: 16384,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"deepseek/deepseek-v3.1-terminus": { "deepseek/deepseek-v3.1-terminus": {
id: "deepseek/deepseek-v3.1-terminus", id: "deepseek/deepseek-v3.1-terminus",
@ -11184,6 +11286,40 @@ export const MODELS = {
contextWindow: 128000, contextWindow: 128000,
maxTokens: 64000, maxTokens: 64000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"google/gemini-2.0-flash": {
id: "google/gemini-2.0-flash",
name: "Gemini 2.0 Flash",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 8192,
} satisfies Model<"anthropic-messages">,
"google/gemini-2.0-flash-lite": {
id: "google/gemini-2.0-flash-lite",
name: "Gemini 2.0 Flash Lite",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.075,
output: 0.3,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 8192,
} satisfies Model<"anthropic-messages">,
"google/gemini-2.5-flash": { "google/gemini-2.5-flash": {
id: "google/gemini-2.5-flash", id: "google/gemini-2.5-flash",
name: "Gemini 2.5 Flash", name: "Gemini 2.5 Flash",
@ -11191,11 +11327,11 @@ export const MODELS = {
provider: "vercel-ai-gateway", provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh", baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true, reasoning: true,
input: ["text"], input: ["text", "image"],
cost: { cost: {
input: 0.3, input: 0.3,
output: 2.5, output: 2.5,
cacheRead: 0, cacheRead: 0.03,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 1000000, contextWindow: 1000000,
@ -11259,11 +11395,11 @@ export const MODELS = {
provider: "vercel-ai-gateway", provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh", baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true, reasoning: true,
input: ["text"], input: ["text", "image"],
cost: { cost: {
input: 1.25, input: 1.25,
output: 10, output: 10,
cacheRead: 0, cacheRead: 0.125,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 1048576, contextWindow: 1048576,
@ -11284,7 +11420,7 @@ export const MODELS = {
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 1000000, contextWindow: 1000000,
maxTokens: 64000, maxTokens: 65000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"google/gemini-3-pro-preview": { "google/gemini-3-pro-preview": {
id: "google/gemini-3-pro-preview", id: "google/gemini-3-pro-preview",
@ -11386,7 +11522,7 @@ export const MODELS = {
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 128000, contextWindow: 128000,
maxTokens: 8192, maxTokens: 100000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"meituan/longcat-flash-thinking": { "meituan/longcat-flash-thinking": {
id: "meituan/longcat-flash-thinking", id: "meituan/longcat-flash-thinking",
@ -11414,13 +11550,13 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.39999999999999997, input: 0.72,
output: 0.39999999999999997, output: 0.72,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 128000,
maxTokens: 16384, maxTokens: 8192,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"meta/llama-3.1-8b": { "meta/llama-3.1-8b": {
id: "meta/llama-3.1-8b", id: "meta/llama-3.1-8b",
@ -11431,12 +11567,12 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.03, input: 0.09999999999999999,
output: 0.049999999999999996, output: 0.09999999999999999,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 128000,
maxTokens: 16384, maxTokens: 16384,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"meta/llama-3.2-11b": { "meta/llama-3.2-11b": {
@ -11499,12 +11635,12 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0.15, input: 0.24,
output: 0.6, output: 0.9700000000000001,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 128000,
maxTokens: 8192, maxTokens: 8192,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"meta/llama-4-scout": { "meta/llama-4-scout": {
@ -11516,12 +11652,12 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0.08, input: 0.16999999999999998,
output: 0.3, output: 0.66,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 128000,
maxTokens: 8192, maxTokens: 8192,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"minimax/minimax-m2": { "minimax/minimax-m2": {
@ -11552,8 +11688,8 @@ export const MODELS = {
cost: { cost: {
input: 0.3, input: 0.3,
output: 1.2, output: 1.2,
cacheRead: 0.15, cacheRead: 0.03,
cacheWrite: 0, cacheWrite: 0.375,
}, },
contextWindow: 204800, contextWindow: 204800,
maxTokens: 131072, maxTokens: 131072,
@ -11788,14 +11924,31 @@ export const MODELS = {
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.5, input: 0.6,
output: 2, output: 2.5,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 131072,
maxTokens: 16384, maxTokens: 16384,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"moonshotai/kimi-k2-0905": {
id: "moonshotai/kimi-k2-0905",
name: "Kimi K2 0905",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: false,
input: ["text"],
cost: {
input: 0.6,
output: 2.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 256000,
maxTokens: 16384,
} satisfies Model<"anthropic-messages">,
"moonshotai/kimi-k2-thinking": { "moonshotai/kimi-k2-thinking": {
id: "moonshotai/kimi-k2-thinking", id: "moonshotai/kimi-k2-thinking",
name: "Kimi K2 Thinking", name: "Kimi K2 Thinking",
@ -11805,13 +11958,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.47, input: 0.6,
output: 2, output: 2.5,
cacheRead: 0.14100000000000001, cacheRead: 0.15,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 216144, contextWindow: 262114,
maxTokens: 216144, maxTokens: 262114,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"moonshotai/kimi-k2-thinking-turbo": { "moonshotai/kimi-k2-thinking-turbo": {
id: "moonshotai/kimi-k2-thinking-turbo", id: "moonshotai/kimi-k2-thinking-turbo",
@ -11856,13 +12009,13 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0.5, input: 0.6,
output: 2.8, output: 3,
cacheRead: 0, cacheRead: 0.09999999999999999,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 256000, contextWindow: 262114,
maxTokens: 256000, maxTokens: 262114,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"nvidia/nemotron-nano-12b-v2-vl": { "nvidia/nemotron-nano-12b-v2-vl": {
id: "nvidia/nemotron-nano-12b-v2-vl", id: "nvidia/nemotron-nano-12b-v2-vl",
@ -11890,8 +12043,8 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.04, input: 0.06,
output: 0.16, output: 0.22999999999999998,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
@ -11960,7 +12113,7 @@ export const MODELS = {
cost: { cost: {
input: 0.09999999999999999, input: 0.09999999999999999,
output: 0.39999999999999997, output: 0.39999999999999997,
cacheRead: 0.03, cacheRead: 0.024999999999999998,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 1047576, contextWindow: 1047576,
@ -12011,7 +12164,7 @@ export const MODELS = {
cost: { cost: {
input: 1.25, input: 1.25,
output: 10, output: 10,
cacheRead: 0.13, cacheRead: 0.125,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12041,11 +12194,11 @@ export const MODELS = {
provider: "vercel-ai-gateway", provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh", baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true, reasoning: true,
input: ["text", "image"], input: ["text"],
cost: { cost: {
input: 1.25, input: 1.25,
output: 10, output: 10,
cacheRead: 0.13, cacheRead: 0.125,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12062,7 +12215,7 @@ export const MODELS = {
cost: { cost: {
input: 0.25, input: 0.25,
output: 2, output: 2,
cacheRead: 0.03, cacheRead: 0.024999999999999998,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12079,7 +12232,7 @@ export const MODELS = {
cost: { cost: {
input: 0.049999999999999996, input: 0.049999999999999996,
output: 0.39999999999999997, output: 0.39999999999999997,
cacheRead: 0.01, cacheRead: 0.005,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12113,7 +12266,7 @@ export const MODELS = {
cost: { cost: {
input: 1.25, input: 1.25,
output: 10, output: 10,
cacheRead: 0.13, cacheRead: 0.125,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12164,7 +12317,7 @@ export const MODELS = {
cost: { cost: {
input: 1.25, input: 1.25,
output: 10, output: 10,
cacheRead: 0.13, cacheRead: 0.125,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 128000, contextWindow: 128000,
@ -12181,7 +12334,7 @@ export const MODELS = {
cost: { cost: {
input: 1.25, input: 1.25,
output: 10, output: 10,
cacheRead: 0.13, cacheRead: 0.125,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12198,7 +12351,7 @@ export const MODELS = {
cost: { cost: {
input: 1.75, input: 1.75,
output: 14, output: 14,
cacheRead: 0.18, cacheRead: 0.175,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 400000, contextWindow: 400000,
@ -12303,7 +12456,7 @@ export const MODELS = {
cacheRead: 0.25, cacheRead: 0.25,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 200000, contextWindow: 1050000,
maxTokens: 128000, maxTokens: 128000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"openai/gpt-5.4-pro": { "openai/gpt-5.4-pro": {
@ -12320,26 +12473,9 @@ export const MODELS = {
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 200000, contextWindow: 1050000,
maxTokens: 128000, maxTokens: 128000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"openai/gpt-oss-120b": {
id: "openai/gpt-oss-120b",
name: "gpt-oss-120b",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"anthropic-messages">,
"openai/gpt-oss-20b": { "openai/gpt-oss-20b": {
id: "openai/gpt-oss-20b", id: "openai/gpt-oss-20b",
name: "gpt-oss-20b", name: "gpt-oss-20b",
@ -12757,9 +12893,9 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.09, input: 0.09999999999999999,
output: 0.29, output: 0.3,
cacheRead: 0, cacheRead: 0.02,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 262144, contextWindow: 262144,
@ -12776,11 +12912,11 @@ export const MODELS = {
cost: { cost: {
input: 0.6, input: 0.6,
output: 2.2, output: 2.2,
cacheRead: 0, cacheRead: 0.11,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 131072, contextWindow: 128000,
maxTokens: 131072, maxTokens: 96000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"zai/glm-4.5-air": { "zai/glm-4.5-air": {
id: "zai/glm-4.5-air", id: "zai/glm-4.5-air",
@ -12805,16 +12941,16 @@ export const MODELS = {
api: "anthropic-messages", api: "anthropic-messages",
provider: "vercel-ai-gateway", provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh", baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true, reasoning: false,
input: ["text", "image"], input: ["text", "image"],
cost: { cost: {
input: 0.6, input: 0.6,
output: 1.7999999999999998, output: 1.7999999999999998,
cacheRead: 0, cacheRead: 0.11,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 65536, contextWindow: 66000,
maxTokens: 16384, maxTokens: 16000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"zai/glm-4.6": { "zai/glm-4.6": {
id: "zai/glm-4.6", id: "zai/glm-4.6",
@ -12876,12 +13012,12 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.43, input: 0.6,
output: 1.75, output: 2.2,
cacheRead: 0.08, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 202752, contextWindow: 200000,
maxTokens: 120000, maxTokens: 120000,
} satisfies Model<"anthropic-messages">, } satisfies Model<"anthropic-messages">,
"zai/glm-4.7-flash": { "zai/glm-4.7-flash": {
@ -12927,9 +13063,9 @@ export const MODELS = {
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
cost: { cost: {
input: 0.7999999999999999, input: 1,
output: 2.56, output: 3.1999999999999997,
cacheRead: 0.16, cacheRead: 0.19999999999999998,
cacheWrite: 0, cacheWrite: 0,
}, },
contextWindow: 202800, contextWindow: 202800,

View file

@ -55,7 +55,29 @@ export type GatewayEvent =
} }
| { type: "message_complete"; sessionKey: string; text: string } | { type: "message_complete"; sessionKey: string; text: string }
| { type: "error"; sessionKey: string; error: string } | { type: "error"; sessionKey: string; error: string }
| { type: "aborted"; sessionKey: string }; | { type: "aborted"; sessionKey: string }
| {
type: "structured_part";
sessionKey: string;
partType: "teamActivity";
payload: {
teamId: string;
status: string;
members: Array<{ id: string; name: string; role?: string; status: string; message?: string }>;
};
}
| {
type: "structured_part";
sessionKey: string;
partType: "media";
payload: { url: string; mimeType?: string };
}
| {
type: "structured_part";
sessionKey: string;
partType: "error";
payload: { code: string; message: string };
};
export interface ManagedGatewaySession { export interface ManagedGatewaySession {
sessionKey: string; sessionKey: string;

View file

@ -7,6 +7,7 @@ import {
import { rm } from "node:fs/promises"; import { rm } from "node:fs/promises";
import { join } from "node:path"; import { join } from "node:path";
import { URL } from "node:url"; import { URL } from "node:url";
import type { AgentMessage } from "@mariozechner/companion-agent-core";
import type { AgentSession, AgentSessionEvent } from "../agent-session.js"; import type { AgentSession, AgentSessionEvent } from "../agent-session.js";
import type { Settings } from "../settings-manager.js"; import type { Settings } from "../settings-manager.js";
import { extractMessageText, getLastAssistantText } from "./helpers.js"; import { extractMessageText, getLastAssistantText } from "./helpers.js";
@ -29,6 +30,7 @@ import type {
ModelInfo, ModelInfo,
} from "./types.js"; } from "./types.js";
import { import {
createGatewayStructuredPartListener,
createVercelStreamListener, createVercelStreamListener,
errorVercelStream, errorVercelStream,
extractUserText, extractUserText,
@ -265,7 +267,7 @@ export class GatewayRuntime {
sessionKey: string, sessionKey: string,
listener: (event: GatewayEvent) => void, listener: (event: GatewayEvent) => void,
): Promise<() => void> { ): Promise<() => void> {
const managedSession = await this.requireExistingSession(sessionKey); const managedSession = await this.ensureSession(sessionKey);
managedSession.listeners.add(listener); managedSession.listeners.add(listener);
listener({ listener({
type: "hello", type: "hello",
@ -567,6 +569,7 @@ export class GatewayRuntime {
sessionKey: managedSession.sessionKey, sessionKey: managedSession.sessionKey,
text: extractMessageText(event.message), text: extractMessageText(event.message),
}); });
this.emitStructuredParts(managedSession, event.message);
return; return;
} }
if (event.message.role === "toolResult") { if (event.message.role === "toolResult") {
@ -654,6 +657,76 @@ export class GatewayRuntime {
}); });
} }
private emitStructuredParts(
managedSession: ManagedGatewaySession,
message: AgentMessage,
): void {
const content = message.content;
if (!Array.isArray(content)) return;
for (const part of content) {
if (typeof part !== "object" || part === null) continue;
const p = part as Record<string, unknown>;
if (p.type === "teamActivity") {
const teamId = typeof p.teamId === "string" ? p.teamId : "";
const status = typeof p.status === "string" ? p.status : "running";
if (!teamId) continue;
const rawMembers = Array.isArray(p.members) ? p.members : [];
const members = rawMembers
.filter(
(m): m is Record<string, unknown> =>
typeof m === "object" && m !== null,
)
.map((m) => ({
id: typeof m.id === "string" ? m.id : "",
name: typeof m.name === "string" ? m.name : "Teammate",
...(typeof m.role === "string" ? { role: m.role } : {}),
status: typeof m.status === "string" ? m.status : "running",
...(typeof m.message === "string" ? { message: m.message } : {}),
}))
.filter((m) => m.id.length > 0);
this.emit(managedSession, {
type: "structured_part",
sessionKey: managedSession.sessionKey,
partType: "teamActivity",
payload: { teamId, status, members },
});
continue;
}
if (p.type === "image") {
const url = typeof p.url === "string" ? p.url : "";
if (!url) continue;
this.emit(managedSession, {
type: "structured_part",
sessionKey: managedSession.sessionKey,
partType: "media",
payload: {
url,
...(typeof p.mimeType === "string" ? { mimeType: p.mimeType } : {}),
},
});
continue;
}
if (p.type === "error") {
const errorMessage = typeof p.message === "string" ? p.message : "";
if (!errorMessage) continue;
this.emit(managedSession, {
type: "structured_part",
sessionKey: managedSession.sessionKey,
partType: "error",
payload: {
code: typeof p.code === "string" ? p.code : "unknown",
message: errorMessage,
},
});
continue;
}
}
}
private createSessionState( private createSessionState(
managedSession: ManagedGatewaySession, managedSession: ManagedGatewaySession,
): GatewaySessionState { ): GatewaySessionState {
@ -740,7 +813,28 @@ export class GatewayRuntime {
request: IncomingMessage, request: IncomingMessage,
response: ServerResponse, response: ServerResponse,
): Promise<void> { ): Promise<void> {
const origin = request.headers.origin;
if (origin) {
response.setHeader("Access-Control-Allow-Origin", origin);
response.setHeader(
"Access-Control-Allow-Methods",
"GET, POST, PUT, PATCH, DELETE, OPTIONS",
);
response.setHeader(
"Access-Control-Allow-Headers",
"Content-Type, Authorization",
);
response.setHeader("Access-Control-Allow-Credentials", "true");
}
const method = request.method ?? "GET"; const method = request.method ?? "GET";
if (method === "OPTIONS") {
response.writeHead(204);
response.end();
return;
}
const url = new URL( const url = new URL(
request.url ?? "/", request.url ?? "/",
`http://${request.headers.host ?? `${this.config.bind}:${this.config.port}`}`, `http://${request.headers.host ?? `${this.config.bind}:${this.config.port}`}`,
@ -919,7 +1013,7 @@ export class GatewayRuntime {
const action = sessionMatch[2]; const action = sessionMatch[2];
if (!action && method === "GET") { if (!action && method === "GET") {
const session = await this.requireExistingSession(sessionKey); const session = await this.ensureSession(sessionKey);
this.writeJson(response, 200, { session: this.createSnapshot(session) }); this.writeJson(response, 200, { session: this.createSnapshot(session) });
return; return;
} }
@ -1106,7 +1200,10 @@ export class GatewayRuntime {
response.write("\n"); response.write("\n");
const listener = createVercelStreamListener(response); const listener = createVercelStreamListener(response);
const structuredPartListener =
createGatewayStructuredPartListener(response);
let unsubscribe: (() => void) | undefined; let unsubscribe: (() => void) | undefined;
let unsubscribeStructured: (() => void) | undefined;
let streamingActive = false; let streamingActive = false;
const stopStreaming = () => { const stopStreaming = () => {
@ -1114,6 +1211,8 @@ export class GatewayRuntime {
streamingActive = false; streamingActive = false;
unsubscribe?.(); unsubscribe?.();
unsubscribe = undefined; unsubscribe = undefined;
unsubscribeStructured?.();
unsubscribeStructured = undefined;
}; };
// Clean up on client disconnect // Clean up on client disconnect
@ -1135,6 +1234,10 @@ export class GatewayRuntime {
onStart: () => { onStart: () => {
if (clientDisconnected || streamingActive) return; if (clientDisconnected || streamingActive) return;
unsubscribe = managedSession.session.subscribe(listener); unsubscribe = managedSession.session.subscribe(listener);
managedSession.listeners.add(structuredPartListener);
unsubscribeStructured = () => {
managedSession.listeners.delete(structuredPartListener);
};
streamingActive = true; streamingActive = true;
}, },
onFinish: () => { onFinish: () => {
@ -1283,7 +1386,7 @@ export class GatewayRuntime {
provider: string, provider: string,
modelId: string, modelId: string,
): Promise<{ ok: true; model: { provider: string; modelId: string } }> { ): Promise<{ ok: true; model: { provider: string; modelId: string } }> {
const managed = await this.requireExistingSession(sessionKey); const managed = await this.ensureSession(sessionKey);
const found = managed.session.modelRegistry.find(provider, modelId); const found = managed.session.modelRegistry.find(provider, modelId);
if (!found) { if (!found) {
throw new HttpError(404, `Model not found: ${provider}/${modelId}`); throw new HttpError(404, `Model not found: ${provider}/${modelId}`);
@ -1389,7 +1492,8 @@ export class GatewayRuntime {
} }
private getCompanionChannelsSettings(): CompanionChannelsSettings { private getCompanionChannelsSettings(): CompanionChannelsSettings {
const globalSettings = this.primarySession.settingsManager.getGlobalSettings(); const globalSettings =
this.primarySession.settingsManager.getGlobalSettings();
const projectSettings = const projectSettings =
this.primarySession.settingsManager.getProjectSettings(); this.primarySession.settingsManager.getProjectSettings();
const mergedSettings = mergeRecords( const mergedSettings = mergeRecords(
@ -1397,7 +1501,9 @@ export class GatewayRuntime {
isRecord(projectSettings) ? projectSettings : {}, isRecord(projectSettings) ? projectSettings : {},
); );
const piChannels = mergedSettings["companion-channels"]; const piChannels = mergedSettings["companion-channels"];
return isRecord(piChannels) ? (piChannels as CompanionChannelsSettings) : {}; return isRecord(piChannels)
? (piChannels as CompanionChannelsSettings)
: {};
} }
private buildSlackChannelStatus( private buildSlackChannelStatus(
@ -1419,7 +1525,8 @@ export class GatewayRuntime {
if (hasConfig) { if (hasConfig) {
if (!adapter) { if (!adapter) {
error = 'Slack requires `companion-channels.adapters.slack = { "type": "slack" }`.'; error =
'Slack requires `companion-channels.adapters.slack = { "type": "slack" }`.';
} else if (adapterType !== "slack") { } else if (adapterType !== "slack") {
error = 'Slack adapter type must be "slack".'; error = 'Slack adapter type must be "slack".';
} else if (!appToken) { } else if (!appToken) {
@ -1468,7 +1575,8 @@ export class GatewayRuntime {
} else if (adapterType !== "telegram") { } else if (adapterType !== "telegram") {
error = 'Telegram adapter type must be "telegram".'; error = 'Telegram adapter type must be "telegram".';
} else if (!botToken) { } else if (!botToken) {
error = "Telegram requires companion-channels.adapters.telegram.botToken."; error =
"Telegram requires companion-channels.adapters.telegram.botToken.";
} else if (!pollingEnabled) { } else if (!pollingEnabled) {
error = error =
"Telegram requires companion-channels.adapters.telegram.polling = true."; "Telegram requires companion-channels.adapters.telegram.polling = true.";

View file

@ -85,6 +85,41 @@ export function messageContentToHistoryParts(msg: AgentMessage): HistoryPart[] {
args: toolCall.arguments, args: toolCall.arguments,
state: "call", state: "call",
}); });
} else if (contentPart.type === "teamActivity") {
const activity = contentPart as {
type: "teamActivity";
teamId: string;
status: string;
members?: Array<{ id: string; name: string; role?: string; status: string; message?: string }>;
};
parts.push({
type: "teamActivity",
teamId: activity.teamId,
status: activity.status,
members: Array.isArray(activity.members) ? activity.members : [],
});
} else if (contentPart.type === "image") {
const image = contentPart as {
type: "image";
url: string;
mimeType?: string;
};
parts.push({
type: "media",
url: image.url,
mimeType: image.mimeType,
});
} else if (contentPart.type === "error") {
const error = contentPart as {
type: "error";
code?: string;
message: string;
};
parts.push({
type: "error",
code: typeof error.code === "string" ? error.code : "unknown",
message: error.message,
});
} }
} }
return parts; return parts;

View file

@ -77,7 +77,15 @@ export type HistoryPart =
args: unknown; args: unknown;
state: string; state: string;
result?: unknown; result?: unknown;
}; }
| {
type: "teamActivity";
teamId: string;
status: string;
members: Array<{ id: string; name: string; role?: string; status: string; message?: string }>;
}
| { type: "media"; url: string; mimeType?: string }
| { type: "error"; code: string; message: string };
export interface ChannelStatus { export interface ChannelStatus {
id: string; id: string;

View file

@ -1,6 +1,7 @@
import { randomUUID } from "node:crypto"; import { randomUUID } from "node:crypto";
import type { ServerResponse } from "node:http"; import type { ServerResponse } from "node:http";
import type { AgentSessionEvent } from "../agent-session.js"; import type { AgentSessionEvent } from "../agent-session.js";
import type { GatewayEvent } from "./internal-types.js";
type TextStreamState = { type TextStreamState = {
started: boolean; started: boolean;
@ -324,3 +325,22 @@ export function errorVercelStream(
writeChunk(response, "[DONE]"); writeChunk(response, "[DONE]");
response.end(); response.end();
} }
/**
* Create a GatewayEvent listener that forwards `structured_part` events to the
* response as custom SSE chunks. Returns the listener function so the caller
* can subscribe it to managedSession.listeners and unsubscribe on cleanup.
*/
export function createGatewayStructuredPartListener(
response: ServerResponse,
): (event: GatewayEvent) => void {
return (event: GatewayEvent) => {
if (response.writableEnded) return;
if (event.type !== "structured_part") return;
writeChunk(response, {
type: "structured-part",
partType: event.partType,
payload: event.payload,
});
};
}