mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 07:04:45 +00:00
agent: Add reasoning token support for OpenAI reasoning models
- Extract and display reasoning tokens from both Chat Completions and Responses APIs
- Add smart preflight detection to check reasoning support per model/API (cached per agent)
- Support both reasoning_text (o1/o3) and summary_text (gpt-5) formats
- Display reasoning tokens with ⚡ symbol in console and TUI renderers
- Only send reasoning parameters to models that support them
- Fix event type from "thinking" to "reasoning" for consistency
Note: Chat Completions API only returns reasoning token counts, not content (by design).
Only Responses API exposes actual thinking/reasoning events.
This commit is contained in:
parent
9157411034
commit
62d9eefc2a
8 changed files with 284 additions and 15 deletions
|
|
@ -6,7 +6,7 @@ import { executeTool, toolsForChat, toolsForResponses } from "./tools/tools.js";
|
|||
export type AgentEvent =
|
||||
| { type: "session_start"; sessionId: string; model: string; api: string; baseURL: string; systemPrompt: string }
|
||||
| { type: "assistant_start" }
|
||||
| { type: "thinking"; text: string }
|
||||
| { type: "reasoning"; text: string }
|
||||
| { type: "tool_call"; toolCallId: string; name: string; args: string }
|
||||
| { type: "tool_result"; toolCallId: string; result: string; isError: boolean }
|
||||
| { type: "assistant_message"; text: string }
|
||||
|
|
@ -20,6 +20,7 @@ export type AgentEvent =
|
|||
totalTokens: number;
|
||||
cacheReadTokens: number;
|
||||
cacheWriteTokens: number;
|
||||
reasoningTokens: number;
|
||||
};
|
||||
|
||||
export interface AgentEventReceiver {
|
||||
|
|
@ -40,15 +41,76 @@ export interface ToolCall {
|
|||
id: string;
|
||||
}
|
||||
|
||||
// Cache for model reasoning support detection per API type
|
||||
const modelReasoningSupport = new Map<string, { completions?: boolean; responses?: boolean }>();
|
||||
|
||||
async function checkReasoningSupport(
|
||||
client: OpenAI,
|
||||
model: string,
|
||||
api: "completions" | "responses",
|
||||
): Promise<boolean> {
|
||||
// Check cache first
|
||||
const cacheKey = model;
|
||||
const cached = modelReasoningSupport.get(cacheKey);
|
||||
if (cached && cached[api] !== undefined) {
|
||||
return cached[api]!;
|
||||
}
|
||||
|
||||
let supportsReasoning = false;
|
||||
|
||||
if (api === "responses") {
|
||||
// Try a minimal request with reasoning parameter for Responses API
|
||||
try {
|
||||
await client.responses.create({
|
||||
model,
|
||||
input: "test",
|
||||
max_output_tokens: 1024,
|
||||
reasoning: {
|
||||
effort: "low", // Use low instead of minimal to ensure we get summaries
|
||||
},
|
||||
});
|
||||
supportsReasoning = true;
|
||||
} catch (error) {
|
||||
supportsReasoning = false;
|
||||
}
|
||||
} else {
|
||||
// For Chat Completions API, try with reasoning_effort parameter
|
||||
try {
|
||||
await client.chat.completions.create({
|
||||
model,
|
||||
messages: [{ role: "user", content: "test" }],
|
||||
max_completion_tokens: 1,
|
||||
reasoning_effort: "minimal",
|
||||
});
|
||||
supportsReasoning = true;
|
||||
} catch (error) {
|
||||
supportsReasoning = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Update cache
|
||||
const existing = modelReasoningSupport.get(cacheKey) || {};
|
||||
existing[api] = supportsReasoning;
|
||||
modelReasoningSupport.set(cacheKey, existing);
|
||||
|
||||
return supportsReasoning;
|
||||
}
|
||||
|
||||
export async function callModelResponsesApi(
|
||||
client: OpenAI,
|
||||
model: string,
|
||||
messages: any[],
|
||||
signal?: AbortSignal,
|
||||
eventReceiver?: AgentEventReceiver,
|
||||
supportsReasoning?: boolean,
|
||||
): Promise<void> {
|
||||
await eventReceiver?.on({ type: "assistant_start" });
|
||||
|
||||
// Use provided reasoning support or detect it
|
||||
if (supportsReasoning === undefined) {
|
||||
supportsReasoning = await checkReasoningSupport(client, model, "responses");
|
||||
}
|
||||
|
||||
let conversationDone = false;
|
||||
|
||||
while (!conversationDone) {
|
||||
|
|
@ -65,11 +127,13 @@ export async function callModelResponsesApi(
|
|||
tools: toolsForResponses as any,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: {
|
||||
effort: "medium", // Use auto reasoning effort
|
||||
summary: "auto",
|
||||
},
|
||||
max_output_tokens: 2000, // TODO make configurable
|
||||
...(supportsReasoning && {
|
||||
reasoning: {
|
||||
effort: "medium", // Use auto reasoning effort
|
||||
summary: "auto", // Request reasoning summaries
|
||||
},
|
||||
}),
|
||||
},
|
||||
{ signal },
|
||||
);
|
||||
|
|
@ -82,8 +146,9 @@ export async function callModelResponsesApi(
|
|||
inputTokens: usage.input_tokens || 0,
|
||||
outputTokens: usage.output_tokens || 0,
|
||||
totalTokens: usage.total_tokens || 0,
|
||||
cacheReadTokens: usage.input_tokens_details.cached_tokens || 0,
|
||||
cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0,
|
||||
cacheWriteTokens: 0, // Not available in API
|
||||
reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -101,9 +166,11 @@ export async function callModelResponsesApi(
|
|||
|
||||
switch (item.type) {
|
||||
case "reasoning": {
|
||||
for (const content of item.content || []) {
|
||||
if (content.type === "reasoning_text") {
|
||||
await eventReceiver?.on({ type: "thinking", text: content.text });
|
||||
// Handle both content (o1/o3) and summary (gpt-5) formats
|
||||
const reasoningItems = item.content || item.summary || [];
|
||||
for (const content of reasoningItems) {
|
||||
if (content.type === "reasoning_text" || content.type === "summary_text") {
|
||||
await eventReceiver?.on({ type: "reasoning", text: content.text });
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -182,9 +249,15 @@ export async function callModelChatCompletionsApi(
|
|||
messages: any[],
|
||||
signal?: AbortSignal,
|
||||
eventReceiver?: AgentEventReceiver,
|
||||
supportsReasoning?: boolean,
|
||||
): Promise<void> {
|
||||
await eventReceiver?.on({ type: "assistant_start" });
|
||||
|
||||
// Use provided reasoning support or detect it
|
||||
if (supportsReasoning === undefined) {
|
||||
supportsReasoning = await checkReasoningSupport(client, model, "completions");
|
||||
}
|
||||
|
||||
let assistantResponded = false;
|
||||
|
||||
while (!assistantResponded) {
|
||||
|
|
@ -200,6 +273,9 @@ export async function callModelChatCompletionsApi(
|
|||
tools: toolsForChat,
|
||||
tool_choice: "auto",
|
||||
max_completion_tokens: 2000, // TODO make configurable
|
||||
...(supportsReasoning && {
|
||||
reasoning_effort: "medium",
|
||||
}),
|
||||
},
|
||||
{ signal },
|
||||
);
|
||||
|
|
@ -216,6 +292,7 @@ export async function callModelChatCompletionsApi(
|
|||
totalTokens: usage.total_tokens || 0,
|
||||
cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0,
|
||||
cacheWriteTokens: 0, // Not available in API
|
||||
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -279,6 +356,8 @@ export class Agent {
|
|||
private sessionManager?: SessionManager;
|
||||
private comboReceiver: AgentEventReceiver;
|
||||
private abortController: AbortController | null = null;
|
||||
private supportsReasoningResponses: boolean | null = null; // Cache reasoning support for responses API
|
||||
private supportsReasoningCompletions: boolean | null = null; // Cache reasoning support for completions API
|
||||
|
||||
constructor(config: AgentConfig, renderer?: AgentEventReceiver, sessionManager?: SessionManager) {
|
||||
this.config = config;
|
||||
|
|
@ -332,25 +411,46 @@ export class Agent {
|
|||
|
||||
try {
|
||||
if (this.config.api === "responses") {
|
||||
// Check reasoning support only once per agent instance
|
||||
if (this.supportsReasoningResponses === null) {
|
||||
this.supportsReasoningResponses = await checkReasoningSupport(
|
||||
this.client,
|
||||
this.config.model,
|
||||
"responses",
|
||||
);
|
||||
}
|
||||
|
||||
await callModelResponsesApi(
|
||||
this.client,
|
||||
this.config.model,
|
||||
this.messages,
|
||||
this.abortController.signal,
|
||||
this.comboReceiver,
|
||||
this.supportsReasoningResponses,
|
||||
);
|
||||
} else {
|
||||
// Check reasoning support for completions API
|
||||
if (this.supportsReasoningCompletions === null) {
|
||||
this.supportsReasoningCompletions = await checkReasoningSupport(
|
||||
this.client,
|
||||
this.config.model,
|
||||
"completions",
|
||||
);
|
||||
}
|
||||
|
||||
await callModelChatCompletionsApi(
|
||||
this.client,
|
||||
this.config.model,
|
||||
this.messages,
|
||||
this.abortController.signal,
|
||||
this.comboReceiver,
|
||||
this.supportsReasoningCompletions,
|
||||
);
|
||||
}
|
||||
} catch (e: any) {
|
||||
} catch (e) {
|
||||
// Check if this was an interruption
|
||||
if (e.message === "Interrupted" || this.abortController.signal.aborted) {
|
||||
const errorMessage = e instanceof Error ? e.message : String(e);
|
||||
if (errorMessage === "Interrupted" || this.abortController.signal.aborted) {
|
||||
return;
|
||||
}
|
||||
throw e;
|
||||
|
|
@ -385,7 +485,7 @@ export class Agent {
|
|||
});
|
||||
break;
|
||||
|
||||
case "thinking":
|
||||
case "reasoning":
|
||||
// Add reasoning message
|
||||
this.messages.push({
|
||||
type: "reasoning",
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ export class ConsoleRenderer implements AgentEventReceiver {
|
|||
private lastOutputTokens = 0;
|
||||
private lastCacheReadTokens = 0;
|
||||
private lastCacheWriteTokens = 0;
|
||||
private lastReasoningTokens = 0;
|
||||
|
||||
private startAnimation(text: string = "Thinking"): void {
|
||||
if (this.isAnimating || !this.isTTY) return;
|
||||
|
|
@ -54,6 +55,11 @@ export class ConsoleRenderer implements AgentEventReceiver {
|
|||
`↑${this.lastInputTokens.toLocaleString()} ↓${this.lastOutputTokens.toLocaleString()}`,
|
||||
);
|
||||
|
||||
// Add reasoning tokens if present
|
||||
if (this.lastReasoningTokens > 0) {
|
||||
metricsText += chalk.dim(` ⚡${this.lastReasoningTokens.toLocaleString()}`);
|
||||
}
|
||||
|
||||
// Add cache info if available
|
||||
if (this.lastCacheReadTokens > 0 || this.lastCacheWriteTokens > 0) {
|
||||
const cacheText: string[] = [];
|
||||
|
|
@ -96,7 +102,7 @@ export class ConsoleRenderer implements AgentEventReceiver {
|
|||
this.startAnimation();
|
||||
break;
|
||||
|
||||
case "thinking":
|
||||
case "reasoning":
|
||||
this.stopAnimation();
|
||||
console.log(chalk.dim("[thinking]"));
|
||||
console.log(chalk.dim(event.text));
|
||||
|
|
@ -162,6 +168,7 @@ export class ConsoleRenderer implements AgentEventReceiver {
|
|||
this.lastOutputTokens = event.outputTokens;
|
||||
this.lastCacheReadTokens = event.cacheReadTokens;
|
||||
this.lastCacheWriteTokens = event.cacheWriteTokens;
|
||||
this.lastReasoningTokens = event.reasoningTokens;
|
||||
// Don't stop animation for this event
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ export class TuiRenderer implements AgentEventReceiver {
|
|||
private lastOutputTokens = 0;
|
||||
private lastCacheReadTokens = 0;
|
||||
private lastCacheWriteTokens = 0;
|
||||
private lastReasoningTokens = 0;
|
||||
private toolCallCount = 0;
|
||||
private tokenStatusComponent: TextComponent | null = null;
|
||||
|
||||
|
|
@ -185,7 +186,7 @@ export class TuiRenderer implements AgentEventReceiver {
|
|||
this.statusContainer.addChild(this.currentLoadingAnimation);
|
||||
break;
|
||||
|
||||
case "thinking": {
|
||||
case "reasoning": {
|
||||
// Show thinking in dim text
|
||||
const thinkingContainer = new Container();
|
||||
thinkingContainer.addChild(new TextComponent(chalk.dim("[thinking]")));
|
||||
|
|
@ -264,6 +265,7 @@ export class TuiRenderer implements AgentEventReceiver {
|
|||
this.lastOutputTokens = event.outputTokens;
|
||||
this.lastCacheReadTokens = event.cacheReadTokens;
|
||||
this.lastCacheWriteTokens = event.cacheWriteTokens;
|
||||
this.lastReasoningTokens = event.reasoningTokens;
|
||||
this.updateTokenDisplay();
|
||||
break;
|
||||
|
||||
|
|
@ -291,6 +293,11 @@ export class TuiRenderer implements AgentEventReceiver {
|
|||
// Build token display text
|
||||
let tokenText = chalk.dim(`↑${this.lastInputTokens.toLocaleString()} ↓${this.lastOutputTokens.toLocaleString()}`);
|
||||
|
||||
// Add reasoning tokens if present
|
||||
if (this.lastReasoningTokens > 0) {
|
||||
tokenText += chalk.dim(` ⚡${this.lastReasoningTokens.toLocaleString()}`);
|
||||
}
|
||||
|
||||
// Add cache info if available
|
||||
if (this.lastCacheReadTokens > 0 || this.lastCacheWriteTokens > 0) {
|
||||
const cacheText: string[] = [];
|
||||
|
|
|
|||
|
|
@ -142,6 +142,7 @@ export class SessionManager implements AgentEventReceiver {
|
|||
totalTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
cacheWriteTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
};
|
||||
|
||||
const lines = readFileSync(this.sessionFile, "utf8").trim().split("\n");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue