feat(ai,agent,coding-agent): add sessionId for provider session-based caching

- Add sessionId to StreamOptions for providers that support session-based caching - OpenAI Codex provider uses sessionId for prompt_cache_key and routing headers - Agent class now accepts and forwards sessionId to stream functions - coding-agent passes session ID from SessionManager and updates on session changes - Update ai package README with table of contents, OpenAI Codex OAuth docs, and env vars table - Increase Codex instructions cache TTL from 15 minutes to 24 hours - Add tests for sessionId forwarding in ai and agent packages
2026-04-15 06:04:40 +00:00 · 2026-01-06 11:08:42 +01:00 · 2026-01-06 11:08:42 +01:00 · edb0da9611
commit edb0da9611
parent 858c6bae8a
14 changed files with 335 additions and 56 deletions
--- a/packages/agent/CHANGELOG.md
+++ b/packages/agent/CHANGELOG.md
@ -2,6 +2,10 @@

 ## [Unreleased]

+### Added
+
+- `sessionId` option on `Agent` to forward session identifiers to LLM providers for session-based caching.
+
 ## [0.37.2] - 2026-01-05

 ## [0.37.1] - 2026-01-05
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@ -60,6 +60,12 @@ export interface AgentOptions {
 	 */
 	streamFn?: StreamFn;

+	/**
+	 * Optional session identifier forwarded to LLM providers.
+	 * Used by providers that support session-based caching (e.g., OpenAI Codex).
+	 */
+	sessionId?: string;
+
 	/**
 	 * Resolves an API key dynamically for each LLM call.
 	 * Useful for expiring tokens (e.g., GitHub Copilot OAuth).
@ -89,6 +95,7 @@ export class Agent {
 	private steeringMode: "all" | "one-at-a-time";
 	private followUpMode: "all" | "one-at-a-time";
 	public streamFn: StreamFn;
+	private _sessionId?: string;
 	public getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
 	private runningPrompt?: Promise<void>;
 	private resolveRunningPrompt?: () => void;
@ -100,9 +107,25 @@ export class Agent {
 		this.steeringMode = opts.steeringMode || "one-at-a-time";
 		this.followUpMode = opts.followUpMode || "one-at-a-time";
 		this.streamFn = opts.streamFn || streamSimple;
+		this._sessionId = opts.sessionId;
 		this.getApiKey = opts.getApiKey;
 	}

+	/**
+	 * Get the current session ID used for provider caching.
+	 */
+	get sessionId(): string | undefined {
+		return this._sessionId;
+	}
+
+	/**
+	 * Set the session ID for provider caching.
+	 * Call this when switching sessions (new session, branch, resume).
+	 */
+	set sessionId(value: string | undefined) {
+		this._sessionId = value;
+	}
+
 	get state(): AgentState {
 		return this._state;
 	}
@ -286,6 +309,7 @@ export class Agent {
 		const config: AgentLoopConfig = {
 			model,
 			reasoning,
+			sessionId: this._sessionId,
 			convertToLlm: this.convertToLlm,
 			transformContext: this.transformContext,
 			getApiKey: this.getApiKey,
--- a/packages/agent/test/agent.test.ts
+++ b/packages/agent/test/agent.test.ts
@ -229,4 +229,30 @@ describe("Agent", () => {
 		agent.abort();
 		await firstPrompt.catch(() => {});
 	});
+
+	it("forwards sessionId to streamFn options", async () => {
+		let receivedSessionId: string | undefined;
+		const agent = new Agent({
+			sessionId: "session-abc",
+			streamFn: (_model, _context, options) => {
+				receivedSessionId = options?.sessionId;
+				const stream = new MockAssistantStream();
+				queueMicrotask(() => {
+					const message = createAssistantMessage("ok");
+					stream.push({ type: "done", reason: "stop", message });
+				});
+				return stream;
+			},
+		});
+
+		await agent.prompt("hello");
+		expect(receivedSessionId).toBe("session-abc");
+
+		// Test setter
+		agent.sessionId = "session-def";
+		expect(agent.sessionId).toBe("session-def");
+
+		await agent.prompt("hello again");
+		expect(receivedSessionId).toBe("session-def");
+	});
 });
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -2,6 +2,10 @@

 ## [Unreleased]

+### Added
+
+- `sessionId` option in `StreamOptions` for providers that support session-based caching. OpenAI Codex provider uses this to set `prompt_cache_key` and routing headers.
+
 ## [0.37.2] - 2026-01-05

 ### Fixed
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@ -4,9 +4,50 @@ Unified LLM API with automatic model discovery, provider configuration, token an

 **Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows.

+## Table of Contents
+
+- [Supported Providers](#supported-providers)
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Tools](#tools)
+  - [Defining Tools](#defining-tools)
+  - [Handling Tool Calls](#handling-tool-calls)
+  - [Streaming Tool Calls with Partial JSON](#streaming-tool-calls-with-partial-json)
+  - [Validating Tool Arguments](#validating-tool-arguments)
+  - [Complete Event Reference](#complete-event-reference)
+- [Image Input](#image-input)
+- [Thinking/Reasoning](#thinkingreasoning)
+  - [Unified Interface](#unified-interface-streamsimplecompletesimple)
+  - [Provider-Specific Options](#provider-specific-options-streamcomplete)
+  - [Streaming Thinking Content](#streaming-thinking-content)
+- [Stop Reasons](#stop-reasons)
+- [Error Handling](#error-handling)
+  - [Aborting Requests](#aborting-requests)
+  - [Continuing After Abort](#continuing-after-abort)
+- [APIs, Models, and Providers](#apis-models-and-providers)
+  - [Providers and Models](#providers-and-models)
+  - [Querying Providers and Models](#querying-providers-and-models)
+  - [Custom Models](#custom-models)
+  - [OpenAI Compatibility Settings](#openai-compatibility-settings)
+  - [Type Safety](#type-safety)
+- [Cross-Provider Handoffs](#cross-provider-handoffs)
+- [Context Serialization](#context-serialization)
+- [Browser Usage](#browser-usage)
+  - [Environment Variables](#environment-variables-nodejs-only)
+  - [Checking Environment Variables](#checking-environment-variables)
+- [OAuth Providers](#oauth-providers)
+  - [Vertex AI (ADC)](#vertex-ai-adc)
+  - [CLI Login](#cli-login)
+  - [Programmatic OAuth](#programmatic-oauth)
+  - [Login Flow Example](#login-flow-example)
+  - [Using OAuth Tokens](#using-oauth-tokens)
+  - [Provider Notes](#provider-notes)
+- [License](#license)
+
 ## Supported Providers

 - **OpenAI**
+- **OpenAI Codex** (ChatGPT Plus/Pro subscription, requires OAuth, see below)
 - **Anthropic**
 - **Google**
 - **Vertex AI** (Gemini via Vertex AI)
@ -16,6 +57,8 @@ Unified LLM API with automatic model discovery, provider configuration, token an
 - **xAI**
 - **OpenRouter**
 - **GitHub Copilot** (requires OAuth, see below)
+- **Google Gemini CLI** (requires OAuth, see below)
+- **Antigravity** (requires OAuth, see below)
 - **Any OpenAI-compatible API**: Ollama, vLLM, LM Studio, etc.

 ## Installation
@ -806,17 +849,19 @@ const response = await complete(model, {

 In Node.js environments, you can set environment variables to avoid passing API keys:

-```bash
-OPENAI_API_KEY=sk-...
-ANTHROPIC_API_KEY=sk-ant-...
-GEMINI_API_KEY=...
-MISTRAL_API_KEY=...
-GROQ_API_KEY=gsk_...
-CEREBRAS_API_KEY=csk-...
-XAI_API_KEY=xai-...
-ZAI_API_KEY=...
-OPENROUTER_API_KEY=sk-or-...
-```
+| Provider | Environment Variable(s) |
+|----------|------------------------|
+| OpenAI | `OPENAI_API_KEY` |
+| Anthropic | `ANTHROPIC_API_KEY` or `ANTHROPIC_OAUTH_TOKEN` |
+| Google | `GEMINI_API_KEY` |
+| Vertex AI | `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) + `GOOGLE_CLOUD_LOCATION` + ADC |
+| Mistral | `MISTRAL_API_KEY` |
+| Groq | `GROQ_API_KEY` |
+| Cerebras | `CEREBRAS_API_KEY` |
+| xAI | `XAI_API_KEY` |
+| OpenRouter | `OPENROUTER_API_KEY` |
+| zAI | `ZAI_API_KEY` |
+| GitHub Copilot | `COPILOT_GITHUB_TOKEN` or `GH_TOKEN` or `GITHUB_TOKEN` |

 When set, the library automatically uses these keys:

@ -845,6 +890,7 @@ const key = getEnvApiKey('openai');  // checks OPENAI_API_KEY
 Several providers require OAuth authentication instead of static API keys:

 - **Anthropic** (Claude Pro/Max subscription)
+- **OpenAI Codex** (ChatGPT Plus/Pro subscription, access to GPT-5.x Codex models)
 - **GitHub Copilot** (Copilot subscription)
 - **Google Gemini CLI** (Free Gemini 2.0/2.5 via Google Cloud Code Assist)
 - **Antigravity** (Free Gemini 3, Claude, GPT-OSS via Google Cloud)
@ -873,6 +919,7 @@ The library provides login and token refresh functions. Credential storage is th
 import {
  // Login functions (return credentials, do not store)
  loginAnthropic,
+  loginOpenAICodex,
  loginGitHubCopilot,
  loginGeminiCli,
  loginAntigravity,
@ -882,7 +929,7 @@ import {
  getOAuthApiKey,      // (provider, credentialsMap) => { newCredentials, apiKey } | null

  // Types
-  type OAuthProvider,  // 'anthropic' | 'github-copilot' | 'google-gemini-cli' | 'google-antigravity'
+  type OAuthProvider,  // 'anthropic' | 'openai-codex' | 'github-copilot' | 'google-gemini-cli' | 'google-antigravity'
  type OAuthCredentials,
 } from '@mariozechner/pi-ai';
 ```
@ -937,6 +984,8 @@ const response = await complete(model, {

 ### Provider Notes

+**OpenAI Codex**: Requires a ChatGPT Plus or Pro subscription. Provides access to GPT-5.x Codex models with extended context windows and reasoning capabilities. The library automatically handles session-based prompt caching when `sessionId` is provided in stream options.
+
 **GitHub Copilot**: If you get "The requested model is not supported" error, enable the model manually in VS Code: open Copilot Chat, click the model selector, select the model (warning icon), and click "Enable".

 **Google Gemini CLI / Antigravity**: These use Google Cloud OAuth. The `apiKey` returned by `getOAuthApiKey()` is a JSON string containing both the token and project ID, which the library handles automatically.
--- a/packages/ai/src/providers/openai-codex-responses.ts
+++ b/packages/ai/src/providers/openai-codex-responses.ts
@ -96,6 +96,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 				model: model.id,
 				input: messages,
 				stream: true,
+				prompt_cache_key: options?.sessionId,
 			};

 			if (options?.maxTokens) {
@ -132,7 +133,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 			const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);

 			const reasoningEffort = transformedBody.reasoning?.effort ?? null;
-			const headers = createCodexHeaders(model.headers, accountId, apiKey, transformedBody.prompt_cache_key);
+			const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
 			logCodexDebug("codex request", {
 				url,
 				model: params.model,
--- a/packages/ai/src/providers/openai-codex/prompts/codex.ts
+++ b/packages/ai/src/providers/openai-codex/prompts/codex.ts
@ -115,7 +115,7 @@ export async function getCodexInstructions(normalizedModel = "gpt-5.1-codex"): P
 			cachedTimestamp = metadata.lastChecked;
 		}

-		const CACHE_TTL_MS = 15 * 60 * 1000;
+		const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
 		if (cachedTimestamp && Date.now() - cachedTimestamp < CACHE_TTL_MS && existsSync(cacheFile)) {
 			return readFileSync(cacheFile, "utf8");
 		}
@ -183,45 +183,3 @@ export async function getCodexInstructions(normalizedModel = "gpt-5.1-codex"): P
 		throw new Error(`No cached Codex instructions available for ${modelFamily}`);
 	}
 }
-
-export const TOOL_REMAP_MESSAGE = `<user_instructions priority="0">
-<environment_override priority="0">
-YOU ARE IN A DIFFERENT ENVIRONMENT. These instructions override ALL previous tool references.
-</environment_override>
-
-<tool_replacements priority="0">
-<critical_rule priority="0">
-❌ APPLY_PATCH DOES NOT EXIST → ✅ USE "edit" INSTEAD
- NEVER use: apply_patch, applyPatch
- ALWAYS use: edit tool for ALL file modifications
-</critical_rule>
-
-<critical_rule priority="0">
-❌ UPDATE_PLAN DOES NOT EXIST
- NEVER use: update_plan, updatePlan, read_plan, readPlan, todowrite, todoread
- There is no plan tool in this environment
-</critical_rule>
-</tool_replacements>
-
-<available_tools priority="0">
-File Operations:
-  • read  - Read file contents
-  • edit  - Modify files with exact find/replace
-  • write - Create or overwrite files
-
-Search/Discovery:
-  • grep  - Search file contents for patterns (read-only)
-  • find  - Find files by glob pattern (read-only)
-  • ls    - List directory contents (read-only)
-
-Execution:
-  • bash  - Run shell commands
-</available_tools>
-
-<verification_checklist priority="0">
-Before file modifications:
-1. Am I using "edit" NOT "apply_patch"?
-2. Am I avoiding plan tools entirely?
-3. Am I using only the tools listed above?
-</verification_checklist>
-</user_instructions>`;
--- a/packages/ai/src/providers/openai-codex/request-transformer.ts
+++ b/packages/ai/src/providers/openai-codex/request-transformer.ts
@ -35,6 +35,7 @@ export interface RequestBody {
 	};
 	include?: string[];
 	prompt_cache_key?: string;
+	prompt_cache_retention?: "in_memory" | "24h";
 	max_output_tokens?: number;
 	max_completion_tokens?: number;
 	[key: string]: unknown;
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -177,6 +177,7 @@ function mapOptionsForApi<TApi extends Api>(
 		maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
 		signal: options?.signal,
 		apiKey: apiKey || options?.apiKey,
+		sessionId: options?.sessionId,
 	};

 	// Helper to clamp xhigh to high for providers that don't support it
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -64,6 +64,12 @@ export interface StreamOptions {
 	maxTokens?: number;
 	signal?: AbortSignal;
 	apiKey?: string;
+	/**
+	 * Optional session identifier for providers that support session-based caching.
+	 * Providers can use this to enable prompt caching, request routing, or other
+	 * session-aware features. Ignored by providers that don't support it.
+	 */
+	sessionId?: string;
 }

 // Unified options with reasoning passed to streamSimple() and completeSimple()
--- a/packages/ai/test/openai-codex-stream.test.ts
+++ b/packages/ai/test/openai-codex-stream.test.ts
@ -129,4 +129,201 @@ describe("openai-codex streaming", () => {
 		expect(sawTextDelta).toBe(true);
 		expect(sawDone).toBe(true);
 	});
+
+	it("sets conversation_id/session_id headers and prompt_cache_key when sessionId is provided", async () => {
+		const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+		process.env.PI_CODING_AGENT_DIR = tempDir;
+
+		const payload = Buffer.from(
+			JSON.stringify({ "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" } }),
+			"utf8",
+		).toString("base64");
+		const token = `aaa.${payload}.bbb`;
+
+		const sse = `${[
+			`data: ${JSON.stringify({
+				type: "response.output_item.added",
+				item: { type: "message", id: "msg_1", role: "assistant", status: "in_progress", content: [] },
+			})}`,
+			`data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`,
+			`data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`,
+			`data: ${JSON.stringify({
+				type: "response.output_item.done",
+				item: {
+					type: "message",
+					id: "msg_1",
+					role: "assistant",
+					status: "completed",
+					content: [{ type: "output_text", text: "Hello" }],
+				},
+			})}`,
+			`data: ${JSON.stringify({
+				type: "response.completed",
+				response: {
+					status: "completed",
+					usage: {
+						input_tokens: 5,
+						output_tokens: 3,
+						total_tokens: 8,
+						input_tokens_details: { cached_tokens: 0 },
+					},
+				},
+			})}`,
+		].join("\n\n")}\n\n`;
+
+		const encoder = new TextEncoder();
+		const stream = new ReadableStream<Uint8Array>({
+			start(controller) {
+				controller.enqueue(encoder.encode(sse));
+				controller.close();
+			},
+		});
+
+		const sessionId = "test-session-123";
+		const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => {
+			const url = typeof input === "string" ? input : input.toString();
+			if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
+				return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), { status: 200 });
+			}
+			if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
+				return new Response("PROMPT", { status: 200, headers: { etag: '"etag"' } });
+			}
+			if (url === "https://chatgpt.com/backend-api/codex/responses") {
+				const headers = init?.headers instanceof Headers ? init.headers : undefined;
+				// Verify sessionId is set in headers
+				expect(headers?.get("conversation_id")).toBe(sessionId);
+				expect(headers?.get("session_id")).toBe(sessionId);
+
+				// Verify sessionId is set in request body as prompt_cache_key
+				const body = typeof init?.body === "string" ? (JSON.parse(init.body) as Record<string, unknown>) : null;
+				expect(body?.prompt_cache_key).toBe(sessionId);
+				expect(body?.prompt_cache_retention).toBe("in-memory");
+
+				return new Response(stream, {
+					status: 200,
+					headers: { "content-type": "text/event-stream" },
+				});
+			}
+			return new Response("not found", { status: 404 });
+		});
+
+		global.fetch = fetchMock as typeof fetch;
+
+		const model: Model<"openai-codex-responses"> = {
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: "https://chatgpt.com/backend-api",
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 400000,
+			maxTokens: 128000,
+		};
+
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+		};
+
+		const streamResult = streamOpenAICodexResponses(model, context, { apiKey: token, sessionId });
+		await streamResult.result();
+	});
+
+	it("does not set conversation_id/session_id headers when sessionId is not provided", async () => {
+		const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+		process.env.PI_CODING_AGENT_DIR = tempDir;
+
+		const payload = Buffer.from(
+			JSON.stringify({ "https://api.openai.com/auth": { chatgpt_account_id: "acc_test" } }),
+			"utf8",
+		).toString("base64");
+		const token = `aaa.${payload}.bbb`;
+
+		const sse = `${[
+			`data: ${JSON.stringify({
+				type: "response.output_item.added",
+				item: { type: "message", id: "msg_1", role: "assistant", status: "in_progress", content: [] },
+			})}`,
+			`data: ${JSON.stringify({ type: "response.content_part.added", part: { type: "output_text", text: "" } })}`,
+			`data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Hello" })}`,
+			`data: ${JSON.stringify({
+				type: "response.output_item.done",
+				item: {
+					type: "message",
+					id: "msg_1",
+					role: "assistant",
+					status: "completed",
+					content: [{ type: "output_text", text: "Hello" }],
+				},
+			})}`,
+			`data: ${JSON.stringify({
+				type: "response.completed",
+				response: {
+					status: "completed",
+					usage: {
+						input_tokens: 5,
+						output_tokens: 3,
+						total_tokens: 8,
+						input_tokens_details: { cached_tokens: 0 },
+					},
+				},
+			})}`,
+		].join("\n\n")}\n\n`;
+
+		const encoder = new TextEncoder();
+		const stream = new ReadableStream<Uint8Array>({
+			start(controller) {
+				controller.enqueue(encoder.encode(sse));
+				controller.close();
+			},
+		});
+
+		const fetchMock = vi.fn(async (input: string | URL, init?: RequestInit) => {
+			const url = typeof input === "string" ? input : input.toString();
+			if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
+				return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), { status: 200 });
+			}
+			if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
+				return new Response("PROMPT", { status: 200, headers: { etag: '"etag"' } });
+			}
+			if (url === "https://chatgpt.com/backend-api/codex/responses") {
+				const headers = init?.headers instanceof Headers ? init.headers : undefined;
+				// Verify headers are not set when sessionId is not provided
+				expect(headers?.has("conversation_id")).toBe(false);
+				expect(headers?.has("session_id")).toBe(false);
+
+				return new Response(stream, {
+					status: 200,
+					headers: { "content-type": "text/event-stream" },
+				});
+			}
+			return new Response("not found", { status: 404 });
+		});
+
+		global.fetch = fetchMock as typeof fetch;
+
+		const model: Model<"openai-codex-responses"> = {
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: "https://chatgpt.com/backend-api",
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 400000,
+			maxTokens: 128000,
+		};
+
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+		};
+
+		// No sessionId provided
+		const streamResult = streamOpenAICodexResponses(model, context, { apiKey: token });
+		await streamResult.result();
+	});
 });
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -2,6 +2,10 @@

 ## [Unreleased]

+### Added
+
+- Session ID is now forwarded to LLM providers for session-based caching (used by OpenAI Codex for prompt caching).
+
 ### Fixed

 - Add `minimatch` as a direct dependency for explicit imports.
--- a/packages/coding-agent/src/core/agent-session.ts
+++ b/packages/coding-agent/src/core/agent-session.ts
@ -856,6 +856,7 @@ export class AgentSession {
 		await this.abort();
 		this.agent.reset();
 		this.sessionManager.newSession(options);
+		this.agent.sessionId = this.sessionManager.getSessionId();
 		this._steeringMessages = [];
 		this._followUpMessages = [];
 		this._pendingNextTurnMessages = [];
@ -1666,6 +1667,7 @@ export class AgentSession {

 		// Set new session
 		this.sessionManager.setSessionFile(sessionPath);
+		this.agent.sessionId = this.sessionManager.getSessionId();

 		// Reload messages
 		const sessionContext = this.sessionManager.buildSessionContext();
@ -1745,6 +1747,7 @@ export class AgentSession {
 		} else {
 			this.sessionManager.createBranchedSession(selectedEntry.parentId);
 		}
+		this.agent.sessionId = this.sessionManager.getSessionId();

 		// Reload messages from entries (works for both file and in-memory mode)
 		const sessionContext = this.sessionManager.buildSessionContext();
--- a/packages/coding-agent/src/core/sdk.ts
+++ b/packages/coding-agent/src/core/sdk.ts
@ -613,6 +613,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			tools: activeToolsArray,
 		},
 		convertToLlm,
+		sessionId: sessionManager.getSessionId(),
 		transformContext: extensionRunner
 			? async (messages) => {
 					return extensionRunner.emitContext(messages);