diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index eaca341c..2aae1195 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -1656,7 +1656,7 @@ export class AgentSession { apiKey, signal: this._branchSummaryAbortController.signal, customInstructions: options.customInstructions, - maxTokens: branchSummarySettings.maxTokens, + reserveTokens: branchSummarySettings.reserveTokens, }); this._branchSummaryAbortController = undefined; if (result.aborted) { diff --git a/packages/coding-agent/src/core/compaction/branch-summarization.ts b/packages/coding-agent/src/core/compaction/branch-summarization.ts index c303ca08..a5897455 100644 --- a/packages/coding-agent/src/core/compaction/branch-summarization.ts +++ b/packages/coding-agent/src/core/compaction/branch-summarization.ts @@ -53,8 +53,8 @@ export interface GenerateBranchSummaryOptions { signal: AbortSignal; /** Optional custom instructions for summarization */ customInstructions?: string; - /** Maximum tokens to include in summary context (default 100000) */ - maxTokens?: number; + /** Tokens reserved for prompt + LLM response (default 16384) */ + reserveTokens?: number; } // ============================================================================ @@ -321,9 +321,13 @@ export async function generateBranchSummary( entries: SessionEntry[], options: GenerateBranchSummaryOptions, ): Promise { - const { model, apiKey, signal, customInstructions, maxTokens = 100000 } = options; + const { model, apiKey, signal, customInstructions, reserveTokens = 16384 } = options; - const { messages, fileOps } = prepareBranchEntries(entries, maxTokens); + // Token budget = context window minus reserved space for prompt + response + const contextWindow = model.contextWindow || 128000; + const tokenBudget = contextWindow - reserveTokens; + + const { messages, fileOps } = prepareBranchEntries(entries, tokenBudget); if (messages.length === 0) { return { summary: "No content to summarize" }; diff --git a/packages/coding-agent/src/core/settings-manager.ts b/packages/coding-agent/src/core/settings-manager.ts index 737a812e..4d58a5c1 100644 --- a/packages/coding-agent/src/core/settings-manager.ts +++ b/packages/coding-agent/src/core/settings-manager.ts @@ -9,7 +9,7 @@ export interface CompactionSettings { } export interface BranchSummarySettings { - maxTokens?: number; // default: 100000 (max tokens to include in branch summary context) + reserveTokens?: number; // default: 16384 (tokens reserved for prompt + LLM response) } export interface RetrySettings { @@ -260,9 +260,9 @@ export class SettingsManager { }; } - getBranchSummarySettings(): { maxTokens: number } { + getBranchSummarySettings(): { reserveTokens: number } { return { - maxTokens: this.settings.branchSummary?.maxTokens ?? 100000, + reserveTokens: this.settings.branchSummary?.reserveTokens ?? 16384, }; }