diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 1e11168f..db28b39f 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -4,6 +4,7 @@ ### Fixed +- Fixed 429 rate limit errors incorrectly triggering auto-compaction instead of retry with backoff ([#1038](https://github.com/badlogic/pi-mono/issues/1038)) - Fixed Anthropic provider to handle `sensitive` stop_reason returned by API ([#978](https://github.com/badlogic/pi-mono/issues/978)) - Fixed DeepSeek API compatibility by detecting `deepseek.com` URLs and disabling unsupported `developer` role ([#1048](https://github.com/badlogic/pi-mono/issues/1048)) - Fixed Anthropic provider to preserve input token counts when proxies omit them in `message_delta` events ([#1045](https://github.com/badlogic/pi-mono/issues/1045)) diff --git a/packages/ai/src/utils/overflow.ts b/packages/ai/src/utils/overflow.ts index 07c7400b..03d888b1 100644 --- a/packages/ai/src/utils/overflow.ts +++ b/packages/ai/src/utils/overflow.ts @@ -18,8 +18,8 @@ import type { AssistantMessage } from "../types.js"; * - LM Studio: "tokens to keep from the initial prompt is greater than the context length" * - GitHub Copilot: "prompt token count of X exceeds the limit of Y" * - MiniMax: "invalid params, context window exceeds limit" - * - Cerebras: Returns "400 status code (no body)" - handled separately below - * - Mistral: Returns "400 status code (no body)" - handled separately below + * - Cerebras: Returns "400/413 status code (no body)" - handled separately below + * - Mistral: Returns "400/413 status code (no body)" - handled separately below * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow * - Ollama: Silently truncates input - not detectable via error message */ @@ -57,8 +57,8 @@ const OVERFLOW_PATTERNS = [ * - Google Gemini: "input token count exceeds the maximum" * - xAI (Grok): "maximum prompt length is X but request contains Y" * - Groq: "reduce the length of the messages" - * - Cerebras: 400/413/429 status code (no body) - * - Mistral: 400/413/429 status code (no body) + * - Cerebras: 400/413 status code (no body) + * - Mistral: 400/413 status code (no body) * - OpenRouter (all backends): "maximum context length is X tokens" * - llama.cpp: "exceeds the available context size" * - LM Studio: "greater than the context length" @@ -92,9 +92,9 @@ export function isContextOverflow(message: AssistantMessage, contextWindow?: num return true; } - // Cerebras and Mistral return 400/413/429 with no body - check for status code pattern - // 429 can indicate token-based rate limiting which correlates with context overflow - if (/^4(00|13|29)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) { + // Cerebras and Mistral return 400/413 with no body for context overflow + // Note: 429 is rate limiting (requests/tokens per time), NOT context overflow + if (/^4(00|13)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) { return true; } }