From f82e82da9340f030b9d8658f7cab17aef19b45b0 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Sun, 10 Aug 2025 02:13:13 +0200
Subject: [PATCH] docs: Improve reasoning support table clarity

- Remove redundant 'Reasoning Tokens' column (all models count them)
- Group by provider for better readability
- Clarify model limitations vs API limitations
- Simplify check marks to focus on thinking content availability
---
 packages/agent/README.md    | 55 +++++++++++++++----------------------
 packages/agent/src/agent.ts | 54 ++++++++++++++++--------------------
 todos/todos.md              | 10 +++++++
 3 files changed, 55 insertions(+), 64 deletions(-)

diff --git a/packages/agent/README.md b/packages/agent/README.md
index 7969f061..4b2f2d16 100644
--- a/packages/agent/README.md
+++ b/packages/agent/README.md
@@ -33,19 +33,16 @@ pi-agent
 # Continue most recently modified session in current directory
 pi-agent --continue "Follow up question"
 
-# GPT-OSS via Groq (supports reasoning with both APIs)
+# GPT-OSS via Groq
 pi-agent --base-url https://api.groq.com/openai/v1 --api-key $GROQ_API_KEY --model openai/gpt-oss-120b
 
 # GLM 4.5 via OpenRouter
 pi-agent --base-url https://openrouter.ai/api/v1 --api-key $OPENROUTER_API_KEY --model z-ai/glm-4.5
 
-# Claude via Anthropic's OpenAI compatibility layer
-# Note: No prompt caching or thinking content support. For full features, use the native Anthropic API.
-# See: https://docs.anthropic.com/en/api/openai-sdk
+# Claude via Anthropic's OpenAI compatibility layer. See: https://docs.anthropic.com/en/api/openai-sdk
 pi-agent --base-url https://api.anthropic.com/v1 --api-key $ANTHROPIC_API_KEY --model claude-opus-4-1-20250805
 
-# Gemini via Google AI (set GEMINI_API_KEY environment variable)
-# Note: Gemini 2.5 models support reasoning with thinking content automatically configured
+# Gemini via Google AI
 pi-agent --base-url https://generativelanguage.googleapis.com/v1beta/openai/ --api-key $GEMINI_API_KEY --model gemini-2.5-flash
 ```
 
@@ -108,9 +105,6 @@ Commands you can send via stdin in interactive JSON mode:
 --help, -h              Show help message
 ```
 
-### Environment Variables
-- `OPENAI_API_KEY` - OpenAI API key (used if --api-key not provided)
-
 ## Session Persistence
 
 Sessions are automatically saved to `~/.pi/sessions/` and include:
@@ -143,18 +137,13 @@ When using `--json`, the agent outputs these event types:
 - `user_message` - User input
 - `assistant_start` - Assistant begins responding
 - `assistant_message` - Assistant's response
-- `thinking` - Reasoning/thinking (for models that support it, requires `--api responses`)
+- `reasoning` - Reasoning/thinking (for models that support it)
 - `tool_call` - Tool being called
 - `tool_result` - Result from tool
 - `token_usage` - Token usage statistics (includes `reasoningTokens` for models with reasoning)
 - `error` - Error occurred
 - `interrupted` - Processing was interrupted
 
-**Note:**
-- OpenAI's Chat Completions API (`--api completions`, the default) only returns reasoning token *counts* but not the actual thinking content. To see thinking events, use the Responses API with `--api responses` for supported models (o1, o3, gpt-5).
-- Anthropic's OpenAI compatibility layer doesn't return thinking content. Use the native Anthropic API for full extended thinking features.
-- Gemini 2.5 models automatically include thinking content when reasoning is detected - pi-agent handles the `extra_body` configuration for you.
-
 The complete TypeScript type definition for `AgentEvent` can be found in [`src/agent.ts`](src/agent.ts#L6).
 
 ## Build an Interactive UI with JSON Mode
@@ -295,24 +284,6 @@ agent.on('error', (err) => {
 console.log('Pi Agent Interactive Chat');
 ```
 
-## Reasoning
-
-Pi-agent supports reasoning/thinking tokens for models that provide this capability:
-
-### Supported Providers
-
-| Provider | API | Reasoning Tokens | Thinking Content | Notes |
-|----------|-----|------------------|------------------|-------|
-| OpenAI (o1, o3) | Responses | ✅ | ✅ | Full support via `reasoning` events |
-| OpenAI (o1, o3) | Chat Completions | ✅ | ❌ | Token counts only, no content |
-| OpenAI (gpt-5) | Responses | ✅ | ⚠️ | Model returns empty summaries |
-| OpenAI (gpt-5) | Chat Completions | ✅ | ❌ | Token counts only |
-| Groq (gpt-oss) | Responses | ✅ | ❌ | No reasoning.summary support |
-| Groq (gpt-oss) | Chat Completions | ✅ | ✅ | Via `reasoning_format: "parsed"` |
-| Gemini 2.5 | Chat Completions | ✅ | ✅ | Via `extra_body.google.thinking_config` |
-| Anthropic | OpenAI Compat | ❌ | ❌ | Not supported in compatibility layer |
-| OpenRouter | Various | ✅ | ✅ | Model-dependent, see provider docs |
-
 ### Usage Examples
 
 ```bash
@@ -339,6 +310,24 @@ When reasoning is active, you'll see:
 - `token_usage` events include `reasoningTokens` field
 - Console/TUI show reasoning tokens with ⚡ symbol
 
+## Reasoning
+
+Pi-agent supports reasoning/thinking tokens for models that provide this capability:
+
+### Supported Providers
+
+| Provider | Model | API | Thinking Content | Notes |
+|----------|-------|-----|------------------|-------|
+| **OpenAI** | o1, o3 | Responses | ✅ Full | Thinking events + token counts |
+| | o1, o3 | Chat Completions | ❌ | Token counts only |
+| | gpt-5 | Both APIs | ❌ | Model limitation (empty summaries) |
+| **Groq** | gpt-oss | Chat Completions | ✅ Full | Via `reasoning_format: "parsed"` |
+| | gpt-oss | Responses | ❌ | API doesn't support reasoning.summary |
+| **Gemini** | 2.5 models | Chat Completions | ✅ Full | Auto-configured via extra_body |
+| **Anthropic** | Claude | OpenAI Compat | ❌ | Use native API for thinking |
+| **OpenRouter** | Various | Both APIs | Varies | Depends on underlying model |
+
+
 ### Technical Details
 
 The agent automatically:
diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts
index ed1d3e50..8f6e613c 100644
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@@ -152,9 +152,14 @@ function adjustRequestForProvider(
 			if (api === "completions" && supportsReasoning && requestOptions.reasoning_effort) {
 				// Convert reasoning_effort to OpenRouter's reasoning format
 				requestOptions.reasoning = {
-					effort: requestOptions.reasoning_effort === "low" ? "low" : 
-					       requestOptions.reasoning_effort === "minimal" ? "low" : 
-					       requestOptions.reasoning_effort === "medium" ? "medium" : "high"
+					effort:
+						requestOptions.reasoning_effort === "low"
+							? "low"
+							: requestOptions.reasoning_effort === "minimal"
+								? "low"
+								: requestOptions.reasoning_effort === "medium"
+									? "medium"
+									: "high",
 				};
 				delete requestOptions.reasoning_effort;
 			}
@@ -253,8 +258,6 @@ export async function callModelResponsesApi(
 	supportsReasoning?: boolean,
 	baseURL?: string,
 ): Promise<void> {
-	await eventReceiver?.on({ type: "assistant_start" });
-
 	let conversationDone = false;
 
 	while (!conversationDone) {
@@ -399,8 +402,6 @@ export async function callModelChatCompletionsApi(
 	supportsReasoning?: boolean,
 	baseURL?: string,
 ): Promise<void> {
-	await eventReceiver?.on({ type: "assistant_start" });
-
 	let assistantResponded = false;
 
 	while (!assistantResponded) {
@@ -510,8 +511,7 @@ export class Agent {
 	private sessionManager?: SessionManager;
 	private comboReceiver: AgentEventReceiver;
 	private abortController: AbortController | null = null;
-	private supportsReasoningResponses: boolean | null = null; // Cache reasoning support for responses API
-	private supportsReasoningCompletions: boolean | null = null; // Cache reasoning support for completions API
+	private supportsReasoning: boolean | null = null;
 
 	constructor(config: AgentConfig, renderer?: AgentEventReceiver, sessionManager?: SessionManager) {
 		this.config = config;
@@ -564,44 +564,36 @@ export class Agent {
 		this.abortController = new AbortController();
 
 		try {
-			if (this.config.api === "responses") {
-				// Check reasoning support only once per agent instance
-				if (this.supportsReasoningResponses === null) {
-					this.supportsReasoningResponses = await checkReasoningSupport(
-						this.client,
-						this.config.model,
-						"responses",
-						this.config.baseURL,
-					);
-				}
+			await this.comboReceiver.on({ type: "assistant_start" });
 
+			// Check reasoning support only once per agent instance
+			if (this.supportsReasoning === null) {
+				this.supportsReasoning = await checkReasoningSupport(
+					this.client,
+					this.config.model,
+					this.config.api,
+					this.config.baseURL,
+				);
+			}
+
+			if (this.config.api === "responses") {
 				await callModelResponsesApi(
 					this.client,
 					this.config.model,
 					this.messages,
 					this.abortController.signal,
 					this.comboReceiver,
-					this.supportsReasoningResponses,
+					this.supportsReasoning,
 					this.config.baseURL,
 				);
 			} else {
-				// Check reasoning support for completions API
-				if (this.supportsReasoningCompletions === null) {
-					this.supportsReasoningCompletions = await checkReasoningSupport(
-						this.client,
-						this.config.model,
-						"completions",
-						this.config.baseURL,
-					);
-				}
-
 				await callModelChatCompletionsApi(
 					this.client,
 					this.config.model,
 					this.messages,
 					this.abortController.signal,
 					this.comboReceiver,
-					this.supportsReasoningCompletions,
+					this.supportsReasoning,
 					this.config.baseURL,
 				);
 			}
diff --git a/todos/todos.md b/todos/todos.md
index c14d0851..b05ae11e 100644
--- a/todos/todos.md
+++ b/todos/todos.md
@@ -1,3 +1,13 @@
+- agent: ultrathink to temporarily set reasoning_effort?
+
+- agent: need to figure out a models max context lenght
+
+- agent: compaction & micro compactionexi
+
+- agent: token usage output sucks, make it better
+    - current: ↑1,706 ↓409 ⚒ 2
+    - maybe: ↑ 1,706 - ↓ 409 - ⚒ 2 (or dot?)
+
 - agent: test for basic functionality, including thinking, completions & responses API support for all the known providers and their endpoints.
 
 - agent: token usage gets overwritten with each message that has usage data. however, if the latest data doesn't have a specific usage field, we record undefined i think? also,   {"type":"token_usage" "inputTokens":240,"outputTokens":35,"totalTokens":275,"cacheReadTokens":0,"cacheWriteTokens":0} doesn't contain reasoningToken? do we lack initialization? See case "token_usage": in renderers. probably need to check if lastXXX > current and use lastXXX.