agent: Add reasoning token support for OpenAI reasoning models

- Extract and display reasoning tokens from both Chat Completions and Responses APIs
- Add smart preflight detection to check reasoning support per model/API (cached per agent)
- Support both reasoning_text (o1/o3) and summary_text (gpt-5) formats
- Display reasoning tokens with  symbol in console and TUI renderers
- Only send reasoning parameters to models that support them
- Fix event type from "thinking" to "reasoning" for consistency

Note: Chat Completions API only returns reasoning token counts, not content (by design).
Only Responses API exposes actual thinking/reasoning events.
This commit is contained in:
Mario Zechner 2025-08-10 00:32:30 +02:00
parent 9157411034
commit 62d9eefc2a
8 changed files with 284 additions and 15 deletions

View file

@ -6,7 +6,7 @@ import { executeTool, toolsForChat, toolsForResponses } from "./tools/tools.js";
export type AgentEvent =
| { type: "session_start"; sessionId: string; model: string; api: string; baseURL: string; systemPrompt: string }
| { type: "assistant_start" }
| { type: "thinking"; text: string }
| { type: "reasoning"; text: string }
| { type: "tool_call"; toolCallId: string; name: string; args: string }
| { type: "tool_result"; toolCallId: string; result: string; isError: boolean }
| { type: "assistant_message"; text: string }
@ -20,6 +20,7 @@ export type AgentEvent =
totalTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
reasoningTokens: number;
};
export interface AgentEventReceiver {
@ -40,15 +41,76 @@ export interface ToolCall {
id: string;
}
// Cache for model reasoning support detection per API type
const modelReasoningSupport = new Map<string, { completions?: boolean; responses?: boolean }>();
async function checkReasoningSupport(
client: OpenAI,
model: string,
api: "completions" | "responses",
): Promise<boolean> {
// Check cache first
const cacheKey = model;
const cached = modelReasoningSupport.get(cacheKey);
if (cached && cached[api] !== undefined) {
return cached[api]!;
}
let supportsReasoning = false;
if (api === "responses") {
// Try a minimal request with reasoning parameter for Responses API
try {
await client.responses.create({
model,
input: "test",
max_output_tokens: 1024,
reasoning: {
effort: "low", // Use low instead of minimal to ensure we get summaries
},
});
supportsReasoning = true;
} catch (error) {
supportsReasoning = false;
}
} else {
// For Chat Completions API, try with reasoning_effort parameter
try {
await client.chat.completions.create({
model,
messages: [{ role: "user", content: "test" }],
max_completion_tokens: 1,
reasoning_effort: "minimal",
});
supportsReasoning = true;
} catch (error) {
supportsReasoning = false;
}
}
// Update cache
const existing = modelReasoningSupport.get(cacheKey) || {};
existing[api] = supportsReasoning;
modelReasoningSupport.set(cacheKey, existing);
return supportsReasoning;
}
export async function callModelResponsesApi(
client: OpenAI,
model: string,
messages: any[],
signal?: AbortSignal,
eventReceiver?: AgentEventReceiver,
supportsReasoning?: boolean,
): Promise<void> {
await eventReceiver?.on({ type: "assistant_start" });
// Use provided reasoning support or detect it
if (supportsReasoning === undefined) {
supportsReasoning = await checkReasoningSupport(client, model, "responses");
}
let conversationDone = false;
while (!conversationDone) {
@ -65,11 +127,13 @@ export async function callModelResponsesApi(
tools: toolsForResponses as any,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: {
effort: "medium", // Use auto reasoning effort
summary: "auto",
},
max_output_tokens: 2000, // TODO make configurable
...(supportsReasoning && {
reasoning: {
effort: "medium", // Use auto reasoning effort
summary: "auto", // Request reasoning summaries
},
}),
},
{ signal },
);
@ -82,8 +146,9 @@ export async function callModelResponsesApi(
inputTokens: usage.input_tokens || 0,
outputTokens: usage.output_tokens || 0,
totalTokens: usage.total_tokens || 0,
cacheReadTokens: usage.input_tokens_details.cached_tokens || 0,
cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0,
cacheWriteTokens: 0, // Not available in API
reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0,
});
}
@ -101,9 +166,11 @@ export async function callModelResponsesApi(
switch (item.type) {
case "reasoning": {
for (const content of item.content || []) {
if (content.type === "reasoning_text") {
await eventReceiver?.on({ type: "thinking", text: content.text });
// Handle both content (o1/o3) and summary (gpt-5) formats
const reasoningItems = item.content || item.summary || [];
for (const content of reasoningItems) {
if (content.type === "reasoning_text" || content.type === "summary_text") {
await eventReceiver?.on({ type: "reasoning", text: content.text });
}
}
break;
@ -182,9 +249,15 @@ export async function callModelChatCompletionsApi(
messages: any[],
signal?: AbortSignal,
eventReceiver?: AgentEventReceiver,
supportsReasoning?: boolean,
): Promise<void> {
await eventReceiver?.on({ type: "assistant_start" });
// Use provided reasoning support or detect it
if (supportsReasoning === undefined) {
supportsReasoning = await checkReasoningSupport(client, model, "completions");
}
let assistantResponded = false;
while (!assistantResponded) {
@ -200,6 +273,9 @@ export async function callModelChatCompletionsApi(
tools: toolsForChat,
tool_choice: "auto",
max_completion_tokens: 2000, // TODO make configurable
...(supportsReasoning && {
reasoning_effort: "medium",
}),
},
{ signal },
);
@ -216,6 +292,7 @@ export async function callModelChatCompletionsApi(
totalTokens: usage.total_tokens || 0,
cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0,
cacheWriteTokens: 0, // Not available in API
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0,
});
}
@ -279,6 +356,8 @@ export class Agent {
private sessionManager?: SessionManager;
private comboReceiver: AgentEventReceiver;
private abortController: AbortController | null = null;
private supportsReasoningResponses: boolean | null = null; // Cache reasoning support for responses API
private supportsReasoningCompletions: boolean | null = null; // Cache reasoning support for completions API
constructor(config: AgentConfig, renderer?: AgentEventReceiver, sessionManager?: SessionManager) {
this.config = config;
@ -332,25 +411,46 @@ export class Agent {
try {
if (this.config.api === "responses") {
// Check reasoning support only once per agent instance
if (this.supportsReasoningResponses === null) {
this.supportsReasoningResponses = await checkReasoningSupport(
this.client,
this.config.model,
"responses",
);
}
await callModelResponsesApi(
this.client,
this.config.model,
this.messages,
this.abortController.signal,
this.comboReceiver,
this.supportsReasoningResponses,
);
} else {
// Check reasoning support for completions API
if (this.supportsReasoningCompletions === null) {
this.supportsReasoningCompletions = await checkReasoningSupport(
this.client,
this.config.model,
"completions",
);
}
await callModelChatCompletionsApi(
this.client,
this.config.model,
this.messages,
this.abortController.signal,
this.comboReceiver,
this.supportsReasoningCompletions,
);
}
} catch (e: any) {
} catch (e) {
// Check if this was an interruption
if (e.message === "Interrupted" || this.abortController.signal.aborted) {
const errorMessage = e instanceof Error ? e.message : String(e);
if (errorMessage === "Interrupted" || this.abortController.signal.aborted) {
return;
}
throw e;
@ -385,7 +485,7 @@ export class Agent {
});
break;
case "thinking":
case "reasoning":
// Add reasoning message
this.messages.push({
type: "reasoning",

View file

@ -13,6 +13,7 @@ export class ConsoleRenderer implements AgentEventReceiver {
private lastOutputTokens = 0;
private lastCacheReadTokens = 0;
private lastCacheWriteTokens = 0;
private lastReasoningTokens = 0;
private startAnimation(text: string = "Thinking"): void {
if (this.isAnimating || !this.isTTY) return;
@ -54,6 +55,11 @@ export class ConsoleRenderer implements AgentEventReceiver {
`${this.lastInputTokens.toLocaleString()}${this.lastOutputTokens.toLocaleString()}`,
);
// Add reasoning tokens if present
if (this.lastReasoningTokens > 0) {
metricsText += chalk.dim(`${this.lastReasoningTokens.toLocaleString()}`);
}
// Add cache info if available
if (this.lastCacheReadTokens > 0 || this.lastCacheWriteTokens > 0) {
const cacheText: string[] = [];
@ -96,7 +102,7 @@ export class ConsoleRenderer implements AgentEventReceiver {
this.startAnimation();
break;
case "thinking":
case "reasoning":
this.stopAnimation();
console.log(chalk.dim("[thinking]"));
console.log(chalk.dim(event.text));
@ -162,6 +168,7 @@ export class ConsoleRenderer implements AgentEventReceiver {
this.lastOutputTokens = event.outputTokens;
this.lastCacheReadTokens = event.cacheReadTokens;
this.lastCacheWriteTokens = event.cacheWriteTokens;
this.lastReasoningTokens = event.reasoningTokens;
// Don't stop animation for this event
break;
}

View file

@ -61,6 +61,7 @@ export class TuiRenderer implements AgentEventReceiver {
private lastOutputTokens = 0;
private lastCacheReadTokens = 0;
private lastCacheWriteTokens = 0;
private lastReasoningTokens = 0;
private toolCallCount = 0;
private tokenStatusComponent: TextComponent | null = null;
@ -185,7 +186,7 @@ export class TuiRenderer implements AgentEventReceiver {
this.statusContainer.addChild(this.currentLoadingAnimation);
break;
case "thinking": {
case "reasoning": {
// Show thinking in dim text
const thinkingContainer = new Container();
thinkingContainer.addChild(new TextComponent(chalk.dim("[thinking]")));
@ -264,6 +265,7 @@ export class TuiRenderer implements AgentEventReceiver {
this.lastOutputTokens = event.outputTokens;
this.lastCacheReadTokens = event.cacheReadTokens;
this.lastCacheWriteTokens = event.cacheWriteTokens;
this.lastReasoningTokens = event.reasoningTokens;
this.updateTokenDisplay();
break;
@ -291,6 +293,11 @@ export class TuiRenderer implements AgentEventReceiver {
// Build token display text
let tokenText = chalk.dim(`${this.lastInputTokens.toLocaleString()}${this.lastOutputTokens.toLocaleString()}`);
// Add reasoning tokens if present
if (this.lastReasoningTokens > 0) {
tokenText += chalk.dim(`${this.lastReasoningTokens.toLocaleString()}`);
}
// Add cache info if available
if (this.lastCacheReadTokens > 0 || this.lastCacheWriteTokens > 0) {
const cacheText: string[] = [];

View file

@ -142,6 +142,7 @@ export class SessionManager implements AgentEventReceiver {
totalTokens: 0,
cacheReadTokens: 0,
cacheWriteTokens: 0,
reasoningTokens: 0,
};
const lines = readFileSync(this.sessionFile, "utf8").trim().split("\n");