diff --git a/packages/agent/CHANGELOG.md b/packages/agent/CHANGELOG.md index ba554804..b7869af2 100644 --- a/packages/agent/CHANGELOG.md +++ b/packages/agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- `sessionId` option on `Agent` to forward session identifiers to LLM providers for session-based caching. + ## [0.37.2] - 2026-01-05 ## [0.37.1] - 2026-01-05 diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index 4ec7910c..2db01d81 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -60,6 +60,12 @@ export interface AgentOptions { */ streamFn?: StreamFn; + /** + * Optional session identifier forwarded to LLM providers. + * Used by providers that support session-based caching (e.g., OpenAI Codex). + */ + sessionId?: string; + /** * Resolves an API key dynamically for each LLM call. * Useful for expiring tokens (e.g., GitHub Copilot OAuth). @@ -89,6 +95,7 @@ export class Agent { private steeringMode: "all" | "one-at-a-time"; private followUpMode: "all" | "one-at-a-time"; public streamFn: StreamFn; + private _sessionId?: string; public getApiKey?: (provider: string) => Promise | string | undefined; private runningPrompt?: Promise; private resolveRunningPrompt?: () => void; @@ -100,9 +107,25 @@ export class Agent { this.steeringMode = opts.steeringMode || "one-at-a-time"; this.followUpMode = opts.followUpMode || "one-at-a-time"; this.streamFn = opts.streamFn || streamSimple; + this._sessionId = opts.sessionId; this.getApiKey = opts.getApiKey; } + /** + * Get the current session ID used for provider caching. + */ + get sessionId(): string | undefined { + return this._sessionId; + } + + /** + * Set the session ID for provider caching. + * Call this when switching sessions (new session, branch, resume). + */ + set sessionId(value: string | undefined) { + this._sessionId = value; + } + get state(): AgentState { return this._state; } @@ -286,6 +309,7 @@ export class Agent { const config: AgentLoopConfig = { model, reasoning, + sessionId: this._sessionId, convertToLlm: this.convertToLlm, transformContext: this.transformContext, getApiKey: this.getApiKey, diff --git a/packages/agent/test/agent.test.ts b/packages/agent/test/agent.test.ts index 3332d16e..44df9117 100644 --- a/packages/agent/test/agent.test.ts +++ b/packages/agent/test/agent.test.ts @@ -229,4 +229,30 @@ describe("Agent", () => { agent.abort(); await firstPrompt.catch(() => {}); }); + + it("forwards sessionId to streamFn options", async () => { + let receivedSessionId: string | undefined; + const agent = new Agent({ + sessionId: "session-abc", + streamFn: (_model, _context, options) => { + receivedSessionId = options?.sessionId; + const stream = new MockAssistantStream(); + queueMicrotask(() => { + const message = createAssistantMessage("ok"); + stream.push({ type: "done", reason: "stop", message }); + }); + return stream; + }, + }); + + await agent.prompt("hello"); + expect(receivedSessionId).toBe("session-abc"); + + // Test setter + agent.sessionId = "session-def"; + expect(agent.sessionId).toBe("session-def"); + + await agent.prompt("hello again"); + expect(receivedSessionId).toBe("session-def"); + }); }); diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index ba8e1e36..8aff03f7 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- `sessionId` option in `StreamOptions` for providers that support session-based caching. OpenAI Codex provider uses this to set `prompt_cache_key` and routing headers. + ## [0.37.2] - 2026-01-05 ### Fixed diff --git a/packages/ai/README.md b/packages/ai/README.md index 1c3821e2..2ce2158c 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -4,9 +4,50 @@ Unified LLM API with automatic model discovery, provider configuration, token an **Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows. +## Table of Contents + +- [Supported Providers](#supported-providers) +- [Installation](#installation) +- [Quick Start](#quick-start) +- [Tools](#tools) + - [Defining Tools](#defining-tools) + - [Handling Tool Calls](#handling-tool-calls) + - [Streaming Tool Calls with Partial JSON](#streaming-tool-calls-with-partial-json) + - [Validating Tool Arguments](#validating-tool-arguments) + - [Complete Event Reference](#complete-event-reference) +- [Image Input](#image-input) +- [Thinking/Reasoning](#thinkingreasoning) + - [Unified Interface](#unified-interface-streamsimplecompletesimple) + - [Provider-Specific Options](#provider-specific-options-streamcomplete) + - [Streaming Thinking Content](#streaming-thinking-content) +- [Stop Reasons](#stop-reasons) +- [Error Handling](#error-handling) + - [Aborting Requests](#aborting-requests) + - [Continuing After Abort](#continuing-after-abort) +- [APIs, Models, and Providers](#apis-models-and-providers) + - [Providers and Models](#providers-and-models) + - [Querying Providers and Models](#querying-providers-and-models) + - [Custom Models](#custom-models) + - [OpenAI Compatibility Settings](#openai-compatibility-settings) + - [Type Safety](#type-safety) +- [Cross-Provider Handoffs](#cross-provider-handoffs) +- [Context Serialization](#context-serialization) +- [Browser Usage](#browser-usage) + - [Environment Variables](#environment-variables-nodejs-only) + - [Checking Environment Variables](#checking-environment-variables) +- [OAuth Providers](#oauth-providers) + - [Vertex AI (ADC)](#vertex-ai-adc) + - [CLI Login](#cli-login) + - [Programmatic OAuth](#programmatic-oauth) + - [Login Flow Example](#login-flow-example) + - [Using OAuth Tokens](#using-oauth-tokens) + - [Provider Notes](#provider-notes) +- [License](#license) + ## Supported Providers - **OpenAI** +- **OpenAI Codex** (ChatGPT Plus/Pro subscription, requires OAuth, see below) - **Anthropic** - **Google** - **Vertex AI** (Gemini via Vertex AI) @@ -16,6 +57,8 @@ Unified LLM API with automatic model discovery, provider configuration, token an - **xAI** - **OpenRouter** - **GitHub Copilot** (requires OAuth, see below) +- **Google Gemini CLI** (requires OAuth, see below) +- **Antigravity** (requires OAuth, see below) - **Any OpenAI-compatible API**: Ollama, vLLM, LM Studio, etc. ## Installation @@ -806,17 +849,19 @@ const response = await complete(model, { In Node.js environments, you can set environment variables to avoid passing API keys: -```bash -OPENAI_API_KEY=sk-... -ANTHROPIC_API_KEY=sk-ant-... -GEMINI_API_KEY=... -MISTRAL_API_KEY=... -GROQ_API_KEY=gsk_... -CEREBRAS_API_KEY=csk-... -XAI_API_KEY=xai-... -ZAI_API_KEY=... -OPENROUTER_API_KEY=sk-or-... -``` +| Provider | Environment Variable(s) | +|----------|------------------------| +| OpenAI | `OPENAI_API_KEY` | +| Anthropic | `ANTHROPIC_API_KEY` or `ANTHROPIC_OAUTH_TOKEN` | +| Google | `GEMINI_API_KEY` | +| Vertex AI | `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) + `GOOGLE_CLOUD_LOCATION` + ADC | +| Mistral | `MISTRAL_API_KEY` | +| Groq | `GROQ_API_KEY` | +| Cerebras | `CEREBRAS_API_KEY` | +| xAI | `XAI_API_KEY` | +| OpenRouter | `OPENROUTER_API_KEY` | +| zAI | `ZAI_API_KEY` | +| GitHub Copilot | `COPILOT_GITHUB_TOKEN` or `GH_TOKEN` or `GITHUB_TOKEN` | When set, the library automatically uses these keys: @@ -845,6 +890,7 @@ const key = getEnvApiKey('openai'); // checks OPENAI_API_KEY Several providers require OAuth authentication instead of static API keys: - **Anthropic** (Claude Pro/Max subscription) +- **OpenAI Codex** (ChatGPT Plus/Pro subscription, access to GPT-5.x Codex models) - **GitHub Copilot** (Copilot subscription) - **Google Gemini CLI** (Free Gemini 2.0/2.5 via Google Cloud Code Assist) - **Antigravity** (Free Gemini 3, Claude, GPT-OSS via Google Cloud) @@ -873,6 +919,7 @@ The library provides login and token refresh functions. Credential storage is th import { // Login functions (return credentials, do not store) loginAnthropic, + loginOpenAICodex, loginGitHubCopilot, loginGeminiCli, loginAntigravity, @@ -882,7 +929,7 @@ import { getOAuthApiKey, // (provider, credentialsMap) => { newCredentials, apiKey } | null // Types - type OAuthProvider, // 'anthropic' | 'github-copilot' | 'google-gemini-cli' | 'google-antigravity' + type OAuthProvider, // 'anthropic' | 'openai-codex' | 'github-copilot' | 'google-gemini-cli' | 'google-antigravity' type OAuthCredentials, } from '@mariozechner/pi-ai'; ``` @@ -937,6 +984,8 @@ const response = await complete(model, { ### Provider Notes +**OpenAI Codex**: Requires a ChatGPT Plus or Pro subscription. Provides access to GPT-5.x Codex models with extended context windows and reasoning capabilities. The library automatically handles session-based prompt caching when `sessionId` is provided in stream options. + **GitHub Copilot**: If you get "The requested model is not supported" error, enable the model manually in VS Code: open Copilot Chat, click the model selector, select the model (warning icon), and click "Enable". **Google Gemini CLI / Antigravity**: These use Google Cloud OAuth. The `apiKey` returned by `getOAuthApiKey()` is a JSON string containing both the token and project ID, which the library handles automatically. diff --git a/packages/ai/src/providers/openai-codex-responses.ts b/packages/ai/src/providers/openai-codex-responses.ts index 660ed5b2..0f74b635 100644 --- a/packages/ai/src/providers/openai-codex-responses.ts +++ b/packages/ai/src/providers/openai-codex-responses.ts @@ -96,6 +96,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses" model: model.id, input: messages, stream: true, + prompt_cache_key: options?.sessionId, }; if (options?.maxTokens) { @@ -132,7 +133,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses" const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt); const reasoningEffort = transformedBody.reasoning?.effort ?? null; - const headers = createCodexHeaders(model.headers, accountId, apiKey, transformedBody.prompt_cache_key); + const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId); logCodexDebug("codex request", { url, model: params.model, diff --git a/packages/ai/src/providers/openai-codex/prompts/codex.ts b/packages/ai/src/providers/openai-codex/prompts/codex.ts index b93899b9..db94e780 100644 --- a/packages/ai/src/providers/openai-codex/prompts/codex.ts +++ b/packages/ai/src/providers/openai-codex/prompts/codex.ts @@ -115,7 +115,7 @@ export async function getCodexInstructions(normalizedModel = "gpt-5.1-codex"): P cachedTimestamp = metadata.lastChecked; } - const CACHE_TTL_MS = 15 * 60 * 1000; + const CACHE_TTL_MS = 24 * 60 * 60 * 1000; if (cachedTimestamp && Date.now() - cachedTimestamp < CACHE_TTL_MS && existsSync(cacheFile)) { return readFileSync(cacheFile, "utf8"); } @@ -183,45 +183,3 @@ export async function getCodexInstructions(normalizedModel = "gpt-5.1-codex"): P throw new Error(`No cached Codex instructions available for ${modelFamily}`); } } - -export const TOOL_REMAP_MESSAGE = ` - -YOU ARE IN A DIFFERENT ENVIRONMENT. These instructions override ALL previous tool references. - - - - -❌ APPLY_PATCH DOES NOT EXIST → ✅ USE "edit" INSTEAD -- NEVER use: apply_patch, applyPatch -- ALWAYS use: edit tool for ALL file modifications - - - -❌ UPDATE_PLAN DOES NOT EXIST -- NEVER use: update_plan, updatePlan, read_plan, readPlan, todowrite, todoread -- There is no plan tool in this environment - - - - -File Operations: - • read - Read file contents - • edit - Modify files with exact find/replace - • write - Create or overwrite files - -Search/Discovery: - • grep - Search file contents for patterns (read-only) - • find - Find files by glob pattern (read-only) - • ls - List directory contents (read-only) - -Execution: - • bash - Run shell commands - - - -Before file modifications: -1. Am I using "edit" NOT "apply_patch"? -2. Am I avoiding plan tools entirely? -3. Am I using only the tools listed above? - -`; diff --git a/packages/ai/src/providers/openai-codex/request-transformer.ts b/packages/ai/src/providers/openai-codex/request-transformer.ts index 9aebcf1e..32b21e59 100644 --- a/packages/ai/src/providers/openai-codex/request-transformer.ts +++ b/packages/ai/src/providers/openai-codex/request-transformer.ts @@ -35,6 +35,7 @@ export interface RequestBody { }; include?: string[]; prompt_cache_key?: string; + prompt_cache_retention?: "in_memory" | "24h"; max_output_tokens?: number; max_completion_tokens?: number; [key: string]: unknown; diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts index 09453d5f..79bee984 100644 --- a/packages/ai/src/stream.ts +++ b/packages/ai/src/stream.ts @@ -177,6 +177,7 @@ function mapOptionsForApi( maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000), signal: options?.signal, apiKey: apiKey || options?.apiKey, + sessionId: options?.sessionId, }; // Helper to clamp xhigh to high for providers that don't support it diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index e0221c68..5c81bce9 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -64,6 +64,12 @@ export interface StreamOptions { maxTokens?: number; signal?: AbortSignal; apiKey?: string; + /** + * Optional session identifier for providers that support session-based caching. + * Providers can use this to enable prompt caching, request routing, or other + * session-aware features. Ignored by providers that don't support it. + */ + sessionId?: string; } // Unified options with reasoning passed to streamSimple() and completeSimple() diff --git a/packages/ai/test/openai-codex-stream.test.ts b/packages/ai/test/openai-codex-stream.test.ts index abe44850..85459939 100644 --- a/packages/ai/test/openai-codex-stream.test.ts +++ b/packages/ai/test/openai-codex-stream.test.ts @@ -129,4 +129,201 @@ describe("openai-codex streaming", () => { expect(sawTextDelta).toBe(true); expect(sawDone).toBe(true); }); + + it("sets conversation_id/session_id headers and prompt_cache_key when sessionId is provided", async () => { + const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-")); + process.env.PI_CODING_AGENT_DIR = tempDir; + + const payload = Buffer.from( + JSON.stringify({ "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" } }), + "utf8", + ).toString("base64"); + const token = `aaa.${payload}.bbb`; + + const sse = `${[ + `data: ${JSON.stringify({ + type: "response.output_item.added", + item: { type: "message", id: "msg_1", role: "assistant", status: "in_progress", content: [] }, + })}`, + `data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`, + `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`, + `data: ${JSON.stringify({ + type: "response.output_item.done", + item: { + type: "message", + id: "msg_1", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: "Hello" }], + }, + })}`, + `data: ${JSON.stringify({ + type: "response.completed", + response: { + status: "completed", + usage: { + input_tokens: 5, + output_tokens: 3, + total_tokens: 8, + input_tokens_details: { cached_tokens: 0 }, + }, + }, + })}`, + ].join("\n\n")}\n\n`; + + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)); + controller.close(); + }, + }); + + const sessionId = "test-session-123"; + const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => { + const url = typeof input === "string" ? input : input.toString(); + if (url === "https://api.github.com/repos/openai/codex/releases/latest") { + return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), { status: 200 }); + } + if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) { + return new Response("PROMPT", { status: 200, headers: { etag: '"etag"' } }); + } + if (url === "https://chatgpt.com/backend-api/codex/responses") { + const headers = init?.headers instanceof Headers ? init.headers : undefined; + // Verify sessionId is set in headers + expect(headers?.get("conversation_id")).toBe(sessionId); + expect(headers?.get("session_id")).toBe(sessionId); + + // Verify sessionId is set in request body as prompt_cache_key + const body = typeof init?.body === "string" ? (JSON.parse(init.body) as Record) : null; + expect(body?.prompt_cache_key).toBe(sessionId); + expect(body?.prompt_cache_retention).toBe("in-memory"); + + return new Response(stream, { + status: 200, + headers: { "content-type": "text/event-stream" }, + }); + } + return new Response("not found", { status: 404 }); + }); + + global.fetch = fetchMock as typeof fetch; + + const model: Model<"openai-codex-responses"> = { + id: "gpt-5.1-codex", + name: "GPT-5.1 Codex", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: "https://chatgpt.com/backend-api", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 400000, + maxTokens: 128000, + }; + + const context: Context = { + systemPrompt: "You are a helpful assistant.", + messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }], + }; + + const streamResult = streamOpenAICodexResponses(model, context, { apiKey: token, sessionId }); + await streamResult.result(); + }); + + it("does not set conversation_id/session_id headers when sessionId is not provided", async () => { + const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-")); + process.env.PI_CODING_AGENT_DIR = tempDir; + + const payload = Buffer.from( + JSON.stringify({ "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" } }), + "utf8", + ).toString("base64"); + const token = `aaa.${payload}.bbb`; + + const sse = `${[ + `data: ${JSON.stringify({ + type: "response.output_item.added", + item: { type: "message", id: "msg_1", role: "assistant", status: "in_progress", content: [] }, + })}`, + `data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`, + `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`, + `data: ${JSON.stringify({ + type: "response.output_item.done", + item: { + type: "message", + id: "msg_1", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: "Hello" }], + }, + })}`, + `data: ${JSON.stringify({ + type: "response.completed", + response: { + status: "completed", + usage: { + input_tokens: 5, + output_tokens: 3, + total_tokens: 8, + input_tokens_details: { cached_tokens: 0 }, + }, + }, + })}`, + ].join("\n\n")}\n\n`; + + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)); + controller.close(); + }, + }); + + const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => { + const url = typeof input === "string" ? input : input.toString(); + if (url === "https://api.github.com/repos/openai/codex/releases/latest") { + return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), { status: 200 }); + } + if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) { + return new Response("PROMPT", { status: 200, headers: { etag: '"etag"' } }); + } + if (url === "https://chatgpt.com/backend-api/codex/responses") { + const headers = init?.headers instanceof Headers ? init.headers : undefined; + // Verify headers are not set when sessionId is not provided + expect(headers?.has("conversation_id")).toBe(false); + expect(headers?.has("session_id")).toBe(false); + + return new Response(stream, { + status: 200, + headers: { "content-type": "text/event-stream" }, + }); + } + return new Response("not found", { status: 404 }); + }); + + global.fetch = fetchMock as typeof fetch; + + const model: Model<"openai-codex-responses"> = { + id: "gpt-5.1-codex", + name: "GPT-5.1 Codex", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: "https://chatgpt.com/backend-api", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 400000, + maxTokens: 128000, + }; + + const context: Context = { + systemPrompt: "You are a helpful assistant.", + messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }], + }; + + // No sessionId provided + const streamResult = streamOpenAICodexResponses(model, context, { apiKey: token }); + await streamResult.result(); + }); }); diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 91f35dd4..b86f3a76 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Session ID is now forwarded to LLM providers for session-based caching (used by OpenAI Codex for prompt caching). + ### Fixed - Add `minimatch` as a direct dependency for explicit imports. diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index 92194eaa..d87bf83e 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -856,6 +856,7 @@ export class AgentSession { await this.abort(); this.agent.reset(); this.sessionManager.newSession(options); + this.agent.sessionId = this.sessionManager.getSessionId(); this._steeringMessages = []; this._followUpMessages = []; this._pendingNextTurnMessages = []; @@ -1666,6 +1667,7 @@ export class AgentSession { // Set new session this.sessionManager.setSessionFile(sessionPath); + this.agent.sessionId = this.sessionManager.getSessionId(); // Reload messages const sessionContext = this.sessionManager.buildSessionContext(); @@ -1745,6 +1747,7 @@ export class AgentSession { } else { this.sessionManager.createBranchedSession(selectedEntry.parentId); } + this.agent.sessionId = this.sessionManager.getSessionId(); // Reload messages from entries (works for both file and in-memory mode) const sessionContext = this.sessionManager.buildSessionContext(); diff --git a/packages/coding-agent/src/core/sdk.ts b/packages/coding-agent/src/core/sdk.ts index e84901b0..ce11b71d 100644 --- a/packages/coding-agent/src/core/sdk.ts +++ b/packages/coding-agent/src/core/sdk.ts @@ -613,6 +613,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} tools: activeToolsArray, }, convertToLlm, + sessionId: sessionManager.getSessionId(), transformContext: extensionRunner ? async (messages) => { return extensionRunner.emitContext(messages);