Use reserveTokens for branch summary (tokens left for prompt + response)

- tokenBudget = contextWindow - reserveTokens
- Default 16384, same as compaction
- Consistent naming with CompactionSettings.reserveTokens
This commit is contained in:
Mario Zechner 2025-12-29 21:36:23 +01:00
parent f5f39f08f1
commit dc5fc4fc40
3 changed files with 12 additions and 8 deletions

View file

@ -1656,7 +1656,7 @@ export class AgentSession {
apiKey, apiKey,
signal: this._branchSummaryAbortController.signal, signal: this._branchSummaryAbortController.signal,
customInstructions: options.customInstructions, customInstructions: options.customInstructions,
maxTokens: branchSummarySettings.maxTokens, reserveTokens: branchSummarySettings.reserveTokens,
}); });
this._branchSummaryAbortController = undefined; this._branchSummaryAbortController = undefined;
if (result.aborted) { if (result.aborted) {

View file

@ -53,8 +53,8 @@ export interface GenerateBranchSummaryOptions {
signal: AbortSignal; signal: AbortSignal;
/** Optional custom instructions for summarization */ /** Optional custom instructions for summarization */
customInstructions?: string; customInstructions?: string;
/** Maximum tokens to include in summary context (default 100000) */ /** Tokens reserved for prompt + LLM response (default 16384) */
maxTokens?: number; reserveTokens?: number;
} }
// ============================================================================ // ============================================================================
@ -321,9 +321,13 @@ export async function generateBranchSummary(
entries: SessionEntry[], entries: SessionEntry[],
options: GenerateBranchSummaryOptions, options: GenerateBranchSummaryOptions,
): Promise<BranchSummaryResult> { ): Promise<BranchSummaryResult> {
const { model, apiKey, signal, customInstructions, maxTokens = 100000 } = options; const { model, apiKey, signal, customInstructions, reserveTokens = 16384 } = options;
const { messages, fileOps } = prepareBranchEntries(entries, maxTokens); // Token budget = context window minus reserved space for prompt + response
const contextWindow = model.contextWindow || 128000;
const tokenBudget = contextWindow - reserveTokens;
const { messages, fileOps } = prepareBranchEntries(entries, tokenBudget);
if (messages.length === 0) { if (messages.length === 0) {
return { summary: "No content to summarize" }; return { summary: "No content to summarize" };

View file

@ -9,7 +9,7 @@ export interface CompactionSettings {
} }
export interface BranchSummarySettings { export interface BranchSummarySettings {
maxTokens?: number; // default: 100000 (max tokens to include in branch summary context) reserveTokens?: number; // default: 16384 (tokens reserved for prompt + LLM response)
} }
export interface RetrySettings { export interface RetrySettings {
@ -260,9 +260,9 @@ export class SettingsManager {
}; };
} }
getBranchSummarySettings(): { maxTokens: number } { getBranchSummarySettings(): { reserveTokens: number } {
return { return {
maxTokens: this.settings.branchSummary?.maxTokens ?? 100000, reserveTokens: this.settings.branchSummary?.reserveTokens ?? 16384,
}; };
} }