diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 62584252..048c43ac 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -6359,6 +6359,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-70b-instruct": { + id: "meta-llama/llama-3.1-70b-instruct", + name: "Meta: Llama 3.1 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-8b-instruct": { id: "meta-llama/llama-3.1-8b-instruct", name: "Meta: Llama 3.1 8B Instruct", @@ -6393,23 +6410,6 @@ export const MODELS = { contextWindow: 10000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-70b-instruct": { - id: "meta-llama/llama-3.1-70b-instruct", - name: "Meta: Llama 3.1 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", @@ -6546,23 +6546,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-4o-2024-05-13": { - id: "openai/gpt-4o-2024-05-13", - name: "OpenAI: GPT-4o (2024-05-13)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 5, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-4o": { id: "openai/gpt-4o", name: "OpenAI: GPT-4o", @@ -6597,6 +6580,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 64000, } satisfies Model<"openai-completions">, + "openai/gpt-4o-2024-05-13": { + id: "openai/gpt-4o-2024-05-13", + name: "OpenAI: GPT-4o (2024-05-13)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 5, + output: 15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "meta-llama/llama-3-70b-instruct": { id: "meta-llama/llama-3-70b-instruct", name: "Meta: Llama 3 70B Instruct", @@ -6716,23 +6716,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-3.5-turbo-0613": { - id: "openai/gpt-3.5-turbo-0613", - name: "OpenAI: GPT-3.5 Turbo (older v0613)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 4095, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-4-turbo-preview": { id: "openai/gpt-4-turbo-preview", name: "OpenAI: GPT-4 Turbo Preview", @@ -6750,6 +6733,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "openai/gpt-3.5-turbo-0613": { + id: "openai/gpt-3.5-turbo-0613", + name: "OpenAI: GPT-3.5 Turbo (older v0613)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 4095, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-tiny": { id: "mistralai/mistral-tiny", name: "Mistral Tiny", diff --git a/packages/coding-agent/examples/hooks/README.md b/packages/coding-agent/examples/hooks/README.md index 79e2a6a7..37701083 100644 --- a/packages/coding-agent/examples/hooks/README.md +++ b/packages/coding-agent/examples/hooks/README.md @@ -25,6 +25,9 @@ Prevents session changes when there are uncommitted git changes. Blocks clear/sw ### auto-commit-on-exit.ts Automatically commits changes when the agent exits (shutdown event). Uses the last assistant message to generate a commit message. +### full-compaction.ts +Custom context compaction that summarizes the entire conversation instead of keeping recent turns. Uses the `before_compact` hook event to intercept compaction and generate a comprehensive summary using `complete()` from the AI package. Useful when you want maximum context window space at the cost of losing exact conversation history. + ## Usage ```bash diff --git a/packages/coding-agent/examples/hooks/full-compaction.ts b/packages/coding-agent/examples/hooks/full-compaction.ts new file mode 100644 index 00000000..52e2f970 --- /dev/null +++ b/packages/coding-agent/examples/hooks/full-compaction.ts @@ -0,0 +1,89 @@ +/** + * Full Context Compaction Hook + * + * Replaces the default compaction behavior with a full summary of the entire context. + * Instead of keeping the last 20k tokens of conversation turns, this hook: + * 1. Summarizes ALL messages being compacted into a single comprehensive summary + * 2. Discards all old turns completely + * + * This is useful when you want maximum context window space for new work + * at the cost of losing exact conversation history. + * + * Usage: + * pi --hook examples/hooks/full-compaction.ts + */ + +import { complete } from "@mariozechner/pi-ai"; +import { messageTransformer } from "@mariozechner/pi-coding-agent"; +import type { HookAPI } from "@mariozechner/pi-coding-agent/hooks"; + +export default function (pi: HookAPI) { + pi.on("session", async (event, ctx) => { + if (event.reason !== "before_compact") return; + + const { messagesToSummarize, tokensBefore, model, apiKey, cutPoint } = event; + + ctx.ui.notify(`Compacting ${tokensBefore.toLocaleString()} tokens with full summary...`, "info"); + + // Transform app messages to LLM-compatible format + const transformedMessages = messageTransformer(messagesToSummarize); + + // Build messages that ask for a comprehensive summary + const summaryMessages = [ + ...transformedMessages, + { + role: "user" as const, + content: [ + { + type: "text" as const, + text: `You are a conversation summarizer. Create a comprehensive summary of this conversation that captures: + +1. The main goals and objectives discussed +2. Key decisions made and their rationale +3. Important code changes, file modifications, or technical details +4. Current state of any ongoing work +5. Any blockers, issues, or open questions +6. Next steps that were planned or suggested + +Be thorough but concise. The summary will replace the entire conversation history, so include all information needed to continue the work effectively. + +Format the summary as structured markdown with clear sections.`, + }, + ], + timestamp: Date.now(), + }, + ]; + + try { + // Use the same model and API key that would be used for compaction + const response = await complete(model, { messages: summaryMessages }, { apiKey, maxTokens: 8192 }); + + const summary = response.content + .filter((c): c is { type: "text"; text: string } => c.type === "text") + .map((c) => c.text) + .join("\n"); + + if (!summary.trim()) { + ctx.ui.notify("Compaction summary was empty, using default compaction", "warning"); + return; // Fall back to default compaction + } + + // Return a compaction entry that discards ALL old messages + // firstKeptEntryIndex points to after all summarized content + return { + compactionEntry: { + type: "compaction" as const, + timestamp: new Date().toISOString(), + summary, + firstKeptEntryIndex: cutPoint.firstKeptEntryIndex, + tokensBefore, + }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + ctx.ui.notify(`Compaction failed: ${message}`, "error"); + // Fall back to default compaction on error + return; + } + }); +}