mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 00:03:00 +00:00
Merge branch 'feat/use-mistral-sdk'
This commit is contained in:
commit
a31065166d
17 changed files with 728 additions and 171 deletions
|
|
@ -20,7 +20,7 @@ import type { AssistantMessage } from "../types.js";
|
|||
* - MiniMax: "invalid params, context window exceeds limit"
|
||||
* - Kimi For Coding: "Your request exceeded model token limit: X (requested: Y)"
|
||||
* - Cerebras: Returns "400/413 status code (no body)" - handled separately below
|
||||
* - Mistral: Returns "400/413 status code (no body)" - handled separately below
|
||||
* - Mistral: "Prompt contains X tokens ... too large for model with Y maximum context length"
|
||||
* - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
|
||||
* - Ollama: Silently truncates input - not detectable via error message
|
||||
*/
|
||||
|
|
@ -37,6 +37,7 @@ const OVERFLOW_PATTERNS = [
|
|||
/greater than the context length/i, // LM Studio
|
||||
/context window exceeds limit/i, // MiniMax
|
||||
/exceeded model token limit/i, // Kimi For Coding
|
||||
/too large for model with \d+ maximum context length/i, // Mistral
|
||||
/context[_ ]length[_ ]exceeded/i, // Generic fallback
|
||||
/too many tokens/i, // Generic fallback
|
||||
/token limit exceeded/i, // Generic fallback
|
||||
|
|
@ -60,7 +61,7 @@ const OVERFLOW_PATTERNS = [
|
|||
* - xAI (Grok): "maximum prompt length is X but request contains Y"
|
||||
* - Groq: "reduce the length of the messages"
|
||||
* - Cerebras: 400/413 status code (no body)
|
||||
* - Mistral: 400/413 status code (no body)
|
||||
* - Mistral: "Prompt contains X tokens ... too large for model with Y maximum context length"
|
||||
* - OpenRouter (all backends): "maximum context length is X tokens"
|
||||
* - llama.cpp: "exceeds the available context size"
|
||||
* - LM Studio: "greater than the context length"
|
||||
|
|
@ -95,7 +96,7 @@ export function isContextOverflow(message: AssistantMessage, contextWindow?: num
|
|||
return true;
|
||||
}
|
||||
|
||||
// Cerebras and Mistral return 400/413 with no body for context overflow
|
||||
// Cerebras returns 400/413 with no body for context overflow
|
||||
// Note: 429 is rate limiting (requests/tokens per time), NOT context overflow
|
||||
if (/^4(00|13)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) {
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue