From f82e82da9340f030b9d8658f7cab17aef19b45b0 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Sun, 10 Aug 2025 02:13:13 +0200 Subject: [PATCH] docs: Improve reasoning support table clarity - Remove redundant 'Reasoning Tokens' column (all models count them) - Group by provider for better readability - Clarify model limitations vs API limitations - Simplify check marks to focus on thinking content availability --- packages/agent/README.md | 55 +++++++++++++++---------------------- packages/agent/src/agent.ts | 54 ++++++++++++++++-------------------- todos/todos.md | 10 +++++++ 3 files changed, 55 insertions(+), 64 deletions(-) diff --git a/packages/agent/README.md b/packages/agent/README.md index 7969f061..4b2f2d16 100644 --- a/packages/agent/README.md +++ b/packages/agent/README.md @@ -33,19 +33,16 @@ pi-agent # Continue most recently modified session in current directory pi-agent --continue "Follow up question" -# GPT-OSS via Groq (supports reasoning with both APIs) +# GPT-OSS via Groq pi-agent --base-url https://api.groq.com/openai/v1 --api-key $GROQ_API_KEY --model openai/gpt-oss-120b # GLM 4.5 via OpenRouter pi-agent --base-url https://openrouter.ai/api/v1 --api-key $OPENROUTER_API_KEY --model z-ai/glm-4.5 -# Claude via Anthropic's OpenAI compatibility layer -# Note: No prompt caching or thinking content support. For full features, use the native Anthropic API. -# See: https://docs.anthropic.com/en/api/openai-sdk +# Claude via Anthropic's OpenAI compatibility layer. See: https://docs.anthropic.com/en/api/openai-sdk pi-agent --base-url https://api.anthropic.com/v1 --api-key $ANTHROPIC_API_KEY --model claude-opus-4-1-20250805 -# Gemini via Google AI (set GEMINI_API_KEY environment variable) -# Note: Gemini 2.5 models support reasoning with thinking content automatically configured +# Gemini via Google AI pi-agent --base-url https://generativelanguage.googleapis.com/v1beta/openai/ --api-key $GEMINI_API_KEY --model gemini-2.5-flash ``` @@ -108,9 +105,6 @@ Commands you can send via stdin in interactive JSON mode: --help, -h Show help message ``` -### Environment Variables -- `OPENAI_API_KEY` - OpenAI API key (used if --api-key not provided) - ## Session Persistence Sessions are automatically saved to `~/.pi/sessions/` and include: @@ -143,18 +137,13 @@ When using `--json`, the agent outputs these event types: - `user_message` - User input - `assistant_start` - Assistant begins responding - `assistant_message` - Assistant's response -- `thinking` - Reasoning/thinking (for models that support it, requires `--api responses`) +- `reasoning` - Reasoning/thinking (for models that support it) - `tool_call` - Tool being called - `tool_result` - Result from tool - `token_usage` - Token usage statistics (includes `reasoningTokens` for models with reasoning) - `error` - Error occurred - `interrupted` - Processing was interrupted -**Note:** -- OpenAI's Chat Completions API (`--api completions`, the default) only returns reasoning token *counts* but not the actual thinking content. To see thinking events, use the Responses API with `--api responses` for supported models (o1, o3, gpt-5). -- Anthropic's OpenAI compatibility layer doesn't return thinking content. Use the native Anthropic API for full extended thinking features. -- Gemini 2.5 models automatically include thinking content when reasoning is detected - pi-agent handles the `extra_body` configuration for you. - The complete TypeScript type definition for `AgentEvent` can be found in [`src/agent.ts`](src/agent.ts#L6). ## Build an Interactive UI with JSON Mode @@ -295,24 +284,6 @@ agent.on('error', (err) => { console.log('Pi Agent Interactive Chat'); ``` -## Reasoning - -Pi-agent supports reasoning/thinking tokens for models that provide this capability: - -### Supported Providers - -| Provider | API | Reasoning Tokens | Thinking Content | Notes | -|----------|-----|------------------|------------------|-------| -| OpenAI (o1, o3) | Responses | ✅ | ✅ | Full support via `reasoning` events | -| OpenAI (o1, o3) | Chat Completions | ✅ | ❌ | Token counts only, no content | -| OpenAI (gpt-5) | Responses | ✅ | ⚠️ | Model returns empty summaries | -| OpenAI (gpt-5) | Chat Completions | ✅ | ❌ | Token counts only | -| Groq (gpt-oss) | Responses | ✅ | ❌ | No reasoning.summary support | -| Groq (gpt-oss) | Chat Completions | ✅ | ✅ | Via `reasoning_format: "parsed"` | -| Gemini 2.5 | Chat Completions | ✅ | ✅ | Via `extra_body.google.thinking_config` | -| Anthropic | OpenAI Compat | ❌ | ❌ | Not supported in compatibility layer | -| OpenRouter | Various | ✅ | ✅ | Model-dependent, see provider docs | - ### Usage Examples ```bash @@ -339,6 +310,24 @@ When reasoning is active, you'll see: - `token_usage` events include `reasoningTokens` field - Console/TUI show reasoning tokens with ⚡ symbol +## Reasoning + +Pi-agent supports reasoning/thinking tokens for models that provide this capability: + +### Supported Providers + +| Provider | Model | API | Thinking Content | Notes | +|----------|-------|-----|------------------|-------| +| **OpenAI** | o1, o3 | Responses | ✅ Full | Thinking events + token counts | +| | o1, o3 | Chat Completions | ❌ | Token counts only | +| | gpt-5 | Both APIs | ❌ | Model limitation (empty summaries) | +| **Groq** | gpt-oss | Chat Completions | ✅ Full | Via `reasoning_format: "parsed"` | +| | gpt-oss | Responses | ❌ | API doesn't support reasoning.summary | +| **Gemini** | 2.5 models | Chat Completions | ✅ Full | Auto-configured via extra_body | +| **Anthropic** | Claude | OpenAI Compat | ❌ | Use native API for thinking | +| **OpenRouter** | Various | Both APIs | Varies | Depends on underlying model | + + ### Technical Details The agent automatically: diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index ed1d3e50..8f6e613c 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -152,9 +152,14 @@ function adjustRequestForProvider( if (api === "completions" && supportsReasoning && requestOptions.reasoning_effort) { // Convert reasoning_effort to OpenRouter's reasoning format requestOptions.reasoning = { - effort: requestOptions.reasoning_effort === "low" ? "low" : - requestOptions.reasoning_effort === "minimal" ? "low" : - requestOptions.reasoning_effort === "medium" ? "medium" : "high" + effort: + requestOptions.reasoning_effort === "low" + ? "low" + : requestOptions.reasoning_effort === "minimal" + ? "low" + : requestOptions.reasoning_effort === "medium" + ? "medium" + : "high", }; delete requestOptions.reasoning_effort; } @@ -253,8 +258,6 @@ export async function callModelResponsesApi( supportsReasoning?: boolean, baseURL?: string, ): Promise { - await eventReceiver?.on({ type: "assistant_start" }); - let conversationDone = false; while (!conversationDone) { @@ -399,8 +402,6 @@ export async function callModelChatCompletionsApi( supportsReasoning?: boolean, baseURL?: string, ): Promise { - await eventReceiver?.on({ type: "assistant_start" }); - let assistantResponded = false; while (!assistantResponded) { @@ -510,8 +511,7 @@ export class Agent { private sessionManager?: SessionManager; private comboReceiver: AgentEventReceiver; private abortController: AbortController | null = null; - private supportsReasoningResponses: boolean | null = null; // Cache reasoning support for responses API - private supportsReasoningCompletions: boolean | null = null; // Cache reasoning support for completions API + private supportsReasoning: boolean | null = null; constructor(config: AgentConfig, renderer?: AgentEventReceiver, sessionManager?: SessionManager) { this.config = config; @@ -564,44 +564,36 @@ export class Agent { this.abortController = new AbortController(); try { - if (this.config.api === "responses") { - // Check reasoning support only once per agent instance - if (this.supportsReasoningResponses === null) { - this.supportsReasoningResponses = await checkReasoningSupport( - this.client, - this.config.model, - "responses", - this.config.baseURL, - ); - } + await this.comboReceiver.on({ type: "assistant_start" }); + // Check reasoning support only once per agent instance + if (this.supportsReasoning === null) { + this.supportsReasoning = await checkReasoningSupport( + this.client, + this.config.model, + this.config.api, + this.config.baseURL, + ); + } + + if (this.config.api === "responses") { await callModelResponsesApi( this.client, this.config.model, this.messages, this.abortController.signal, this.comboReceiver, - this.supportsReasoningResponses, + this.supportsReasoning, this.config.baseURL, ); } else { - // Check reasoning support for completions API - if (this.supportsReasoningCompletions === null) { - this.supportsReasoningCompletions = await checkReasoningSupport( - this.client, - this.config.model, - "completions", - this.config.baseURL, - ); - } - await callModelChatCompletionsApi( this.client, this.config.model, this.messages, this.abortController.signal, this.comboReceiver, - this.supportsReasoningCompletions, + this.supportsReasoning, this.config.baseURL, ); } diff --git a/todos/todos.md b/todos/todos.md index c14d0851..b05ae11e 100644 --- a/todos/todos.md +++ b/todos/todos.md @@ -1,3 +1,13 @@ +- agent: ultrathink to temporarily set reasoning_effort? + +- agent: need to figure out a models max context lenght + +- agent: compaction & micro compactionexi + +- agent: token usage output sucks, make it better + - current: ↑1,706 ↓409 ⚒ 2 + - maybe: ↑ 1,706 - ↓ 409 - ⚒ 2 (or dot?) + - agent: test for basic functionality, including thinking, completions & responses API support for all the known providers and their endpoints. - agent: token usage gets overwritten with each message that has usage data. however, if the latest data doesn't have a specific usage field, we record undefined i think? also, {"type":"token_usage" "inputTokens":240,"outputTokens":35,"totalTokens":275,"cacheReadTokens":0,"cacheWriteTokens":0} doesn't contain reasoningToken? do we lack initialization? See case "token_usage": in renderers. probably need to check if lastXXX > current and use lastXXX.