diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md
index 7883aa9e..378d6ea7 100644
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 ### Fixed
 
+- **OpenAI Token Counting**: Fixed `usage.input` to exclude cached tokens for OpenAI providers. Previously, `input` included cached tokens, causing double-counting when calculating total context size via `input + cacheRead`. Now `input` represents non-cached input tokens across all providers, making `input + output + cacheRead + cacheWrite` the correct formula for total context size.
+
 - **Fixed Claude Opus 4.5 cache pricing** (was 3x too expensive)
   - Corrected cache_read: $1.50 → $0.50 per MTok
   - Corrected cache_write: $18.75 → $6.25 per MTok
diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
index 5d4aaa9a..22f57503 100644
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@@ -105,12 +105,14 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 
 			for await (const chunk of openaiStream) {
 				if (chunk.usage) {
+					const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
 					output.usage = {
-						input: chunk.usage.prompt_tokens || 0,
+						// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
+						input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
 						output:
 							(chunk.usage.completion_tokens || 0) +
 							(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
-						cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
+						cacheRead: cachedTokens,
 						cacheWrite: 0,
 						cost: {
 							input: 0,
diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts
index 59d8cba1..45569b38 100644
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@@ -253,10 +253,12 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 				else if (event.type === "response.completed") {
 					const response = event.response;
 					if (response?.usage) {
+						const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
 						output.usage = {
-							input: response.usage.input_tokens || 0,
+							// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
+							input: (response.usage.input_tokens || 0) - cachedTokens,
 							output: response.usage.output_tokens || 0,
-							cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
+							cacheRead: cachedTokens,
 							cacheWrite: 0,
 							cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 						};
diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md
index ba83c9e0..b8242fe0 100644
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Added
+
+- **Branch Source Tracking**: Branched sessions now store `branchedFrom` in the session header, containing the path to the original session file. Useful for tracing session lineage.
+
 ## [0.12.5] - 2025-12-03
 
 ### Added
diff --git a/packages/coding-agent/docs/compaction.md b/packages/coding-agent/docs/compaction.md
index de0431b2..0bece293 100644
--- a/packages/coding-agent/docs/compaction.md
+++ b/packages/coding-agent/docs/compaction.md
@@ -246,17 +246,18 @@ interface Settings {
 ```
 
 **Why these defaults:**
-- `reserveTokens: 16384` - Room for summary output (~8k) plus safety margin (~8k)
+- `reserveTokens: 16384` - Room for summary output (~13k) plus safety margin (~3k)
 - `keepRecentTokens: 20000` - Preserves recent context verbatim, summary focuses on older content
 
 ### Token Calculation
 
-Context tokens are calculated from assistant messages as:
+Context tokens are calculated from the **last non-aborted assistant message** using the same formula as the footer:
+
 ```
 contextTokens = usage.input + usage.output + usage.cacheRead + usage.cacheWrite
 ```
 
-The diff between consecutive (non-aborted, non-error) assistant messages gives the tokens added by that turn. Verified against actual session files.
+This gives total context size across all providers. The `input` field represents non-cached input tokens, so adding `cacheRead` and `cacheWrite` gives the true total input.
 
 **Trigger condition:**
 ```typescript
@@ -353,12 +354,12 @@ u5, a5, u6, a6, t6, a6, u7, a7
 
 Session loader finds COMPACTION 2 (latest), builds:
 ```
-[summary2_as_user_msg], u7, a7
+[summary2_as_user_msg], u6, a6, t6, a6, u7, a7
 ```
 
 Note: COMPACTION 2's summary incorporates COMPACTION 1's summary because the summarization model received the full current context (which included summary1 as first message).
 
-When calculating `keepLastMessages` for COMPACTION 2, we only count messages between COMPACTION 1 and COMPACTION 2 (cannot cross the boundary).
+**Boundary rule:** When calculating `keepLastMessages` for COMPACTION 2, we only count messages between COMPACTION 1 and COMPACTION 2. If `keepLastMessages` exceeds the available messages (e.g., keepLastMessages=10 but only 6 messages exist after COMPACTION 1), we take all available messages up to the boundary. We never cross a compaction boundary.
 
 ### Summarization
 
@@ -367,6 +368,7 @@ Use **pi-ai directly** (not the full agent loop) for summarization:
 - Set `maxTokens` to `0.8 * reserveTokens` (leaves 20% for prompt overhead and safety margin)
 - Pass abort signal for cancellation
 - Use the currently selected model
+- **Reasoning disabled** (thinking level "off") since we just need a summary, not extended reasoning
 
 With default `reserveTokens: 16384`, maxTokens = ~13107.
 
@@ -407,8 +409,20 @@ Works in all modes:
 
 The `/branch` command lets users create a new session from a previous user message. With compaction:
 
-- **Branch UI shows ALL user messages** in the session file, both before and after any compaction events
-- **Branching copies the session file** up to the selected user message, including all compaction events and messages
+- **Branch UI reads from session file directly** (not from `state.messages`) to show ALL user messages, including those before compaction events
+- **Branching copies the raw session file** line-by-line up to (but excluding) the selected user message, preserving all compaction events and intermediate entries
+
+#### Why read from session file instead of state.messages
+
+After compaction, `state.messages` only contains `[summary_user_msg, ...kept_messages, ...new_messages]`. The pre-compaction messages are not in state. To allow branching to any historical point, we must read the session file directly.
+
+#### Reworked createBranchedSession
+
+Current implementation iterates `state.messages` and writes fresh entries. New implementation:
+1. Read session file line by line
+2. For each line, check if it's the target user message
+3. Copy all lines up to (but excluding) the target user message
+4. The target user message text goes into the editor
 
 #### Example: Branching After Compaction
 
@@ -423,24 +437,25 @@ User branches at u3. New session file:
 ```
 u1, a1, u2, a2
 [COMPACTION: summary="...", keepLastMessages=2]
-u3
 ```
 
 Session loader builds context for new session:
 ```
-[summary_as_user_msg], u2, a2, u3
+[summary_as_user_msg], u2, a2
 ```
 
+User's editor contains u3's text for editing/resubmission.
+
 #### Example: Branching Before Compaction
 
 Same session file, user branches at u2. New session file:
 ```
-u1, a1, u2
+u1, a1
 ```
 
 No compaction in new session. Session loader builds:
 ```
-u1, a1, u2
+u1, a1
 ```
 
 This effectively "undoes" the compaction, letting users recover if important context was lost.
@@ -449,6 +464,12 @@ This effectively "undoes" the compaction, letting users recover if important con
 
 Auto-compaction is checked in the agent subscription callback after each `message_end` event for assistant messages. If context tokens exceed the threshold, compaction runs.
 
+**Why abort mid-turn:** If auto-compaction triggers after an assistant message that contains tool calls, we abort immediately rather than waiting for tool results. Waiting would risk:
+1. Tool results filling remaining context, leaving no room for the summary
+2. Context overflow before the next check point (agent_end)
+
+The abort causes some work loss, but the summary captures progress up to that point.
+
 **Trigger flow (similar to `/clear` command):**
 
 ```typescript
@@ -468,7 +489,7 @@ async handleAutoCompaction(): Promise<void> {
   this.statusContainer.clear();
   
   // 4. Perform compaction on current state:
-  //    - Generate summary using pi-ai directly
+  //    - Generate summary using pi-ai directly (no tools, reasoning off)
   //    - Write compaction event to session file
   //    - Rebuild agent messages (summary as user msg + kept messages)
   //    - Rebuild UI to reflect new state
@@ -486,12 +507,13 @@ This mirrors the `/clear` command pattern: unsubscribe first to prevent processi
 
 1. Add `compaction` field to `Settings` interface and `SettingsManager`
 2. Add `CompactionEvent` type to session manager
-3. Update session loader to handle compaction events
-4. Add `/compact` command handler
-5. Add `/autocompact` command with selector UI
-6. Add auto-compaction check in subscription callback after assistant `message_end`
-7. Implement `handleAutoCompaction()` following the unsubscribe/abort/wait/compact/resubscribe pattern
-8. Implement summarization function using pi-ai
-9. Add compaction event to RPC/JSON output types
-10. Update footer to show when auto-compact is disabled
-11. Ensure `/branch` UI shows all user messages (including pre-compaction)
+3. Update session loader to handle compaction events (find latest, apply keepLastMessages with boundary rule)
+4. Rework `createBranchedSession` to copy raw session file lines instead of re-serializing from state
+5. Update `/branch` UI to read user messages from session file directly
+6. Add `/compact` command handler
+7. Add `/autocompact` command with selector UI
+8. Add auto-compaction check in subscription callback after assistant `message_end`
+9. Implement `handleAutoCompaction()` following the unsubscribe/abort/wait/compact/resubscribe pattern
+10. Implement summarization function using pi-ai (no tools, reasoning off)
+11. Add compaction event to RPC/JSON output types
+12. Update footer to show when auto-compact is disabled
diff --git a/packages/coding-agent/docs/session.md b/packages/coding-agent/docs/session.md
index 7b949728..66499b77 100644
--- a/packages/coding-agent/docs/session.md
+++ b/packages/coding-agent/docs/session.md
@@ -26,6 +26,12 @@ First line of the file. Defines session metadata.
 {"type":"session","id":"uuid","timestamp":"2024-12-03T14:00:00.000Z","cwd":"/path/to/project","provider":"anthropic","modelId":"claude-sonnet-4-5","thinkingLevel":"off"}
 ```
 
+For branched sessions, includes the source session path:
+
+```json
+{"type":"session","id":"uuid","timestamp":"2024-12-03T14:00:00.000Z","cwd":"/path/to/project","provider":"anthropic","modelId":"claude-sonnet-4-5","thinkingLevel":"off","branchedFrom":"/path/to/original/session.jsonl"}
+```
+
 ### SessionMessageEntry
 
 A message in the conversation. The `message` field contains an `AppMessage` (see [rpc.md](./rpc.md#message-types)).
diff --git a/packages/coding-agent/src/session-manager.ts b/packages/coding-agent/src/session-manager.ts
index 6cd565b6..36b5dc8a 100644
--- a/packages/coding-agent/src/session-manager.ts
+++ b/packages/coding-agent/src/session-manager.ts
@@ -20,6 +20,7 @@ export interface SessionHeader {
 	provider: string;
 	modelId: string;
 	thinkingLevel: string;
+	branchedFrom?: string; // Path to the session file this was branched from
 }
 
 export interface SessionMessageEntry {
@@ -430,6 +431,7 @@ export class SessionManager {
 			provider: state.model.provider,
 			modelId: state.model.id,
 			thinkingLevel: state.thinkingLevel,
+			branchedFrom: this.sessionFile,
 		};
 		appendFileSync(newSessionFile, JSON.stringify(entry) + "\n");