mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 09:01:14 +00:00
When switching between OpenAI models (e.g., gpt-5-mini to gpt-5.2-codex), function_call IDs with fc_ prefix trigger pairing validation errors because OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items. The fix omits the id field for function_calls from different models, which avoids the pairing validation while keeping call_id for matching with function_call_output. Fixes #886
292 lines
9.2 KiB
TypeScript
292 lines
9.2 KiB
TypeScript
import { Type } from "@sinclair/typebox";
|
|
import { describe, expect, it } from "vitest";
|
|
import { getModel } from "../src/models.js";
|
|
import { complete, getEnvApiKey } from "../src/stream.js";
|
|
import type { AssistantMessage, Context, Message, Tool, ToolCall } from "../src/types.js";
|
|
|
|
const testToolSchema = Type.Object({
|
|
value: Type.Number({ description: "A number to double" }),
|
|
});
|
|
|
|
const testTool: Tool<typeof testToolSchema> = {
|
|
name: "double_number",
|
|
description: "Doubles a number and returns the result",
|
|
parameters: testToolSchema,
|
|
};
|
|
|
|
describe.skipIf(!process.env.OPENAI_API_KEY || !process.env.ANTHROPIC_API_KEY)(
|
|
"OpenAI Responses reasoning replay e2e",
|
|
() => {
|
|
it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => {
|
|
const model = getModel("openai", "gpt-5-mini");
|
|
|
|
const apiKey = getEnvApiKey("openai");
|
|
if (!apiKey) {
|
|
throw new Error("Missing OPENAI_API_KEY");
|
|
}
|
|
|
|
const userMessage: Message = {
|
|
role: "user",
|
|
content: "Use the double_number tool to double 21.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
const assistantResponse = await complete(
|
|
model,
|
|
{
|
|
systemPrompt: "You are a helpful assistant. Use the tool.",
|
|
messages: [userMessage],
|
|
tools: [testTool],
|
|
},
|
|
{
|
|
apiKey,
|
|
reasoningEffort: "high",
|
|
},
|
|
);
|
|
|
|
const thinkingBlock = assistantResponse.content.find(
|
|
(block) => block.type === "thinking" && block.thinkingSignature,
|
|
);
|
|
if (!thinkingBlock || thinkingBlock.type !== "thinking") {
|
|
throw new Error("Missing thinking signature from OpenAI Responses");
|
|
}
|
|
|
|
const corruptedAssistant: AssistantMessage = {
|
|
...assistantResponse,
|
|
content: [thinkingBlock],
|
|
stopReason: "aborted",
|
|
};
|
|
|
|
const followUp: Message = {
|
|
role: "user",
|
|
content: "Say hello to confirm you can continue.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
const context: Context = {
|
|
systemPrompt: "You are a helpful assistant.",
|
|
messages: [userMessage, corruptedAssistant, followUp],
|
|
tools: [testTool],
|
|
};
|
|
|
|
const response = await complete(model, context, {
|
|
apiKey,
|
|
reasoningEffort: "high",
|
|
});
|
|
|
|
// The key assertion: no 400 error from orphaned reasoning item
|
|
expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
|
|
expect(response.errorMessage).toBeFalsy();
|
|
// Model should respond (text or tool call)
|
|
expect(response.content.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it("handles same-provider different-model handoff with tool calls", { retry: 2 }, async () => {
|
|
// This tests the scenario where:
|
|
// 1. Model A (gpt-5-mini) generates reasoning + function_call
|
|
// 2. User switches to Model B (gpt-5.2-codex) - same provider, different model
|
|
// 3. transform-messages: isSameModel=false, thinking converted to text
|
|
// 4. But tool call ID still has OpenAI pairing history (fc_xxx paired with rs_xxx)
|
|
// 5. Without fix: OpenAI returns 400 "function_call without required reasoning item"
|
|
// 6. With fix: tool calls/results converted to text, conversation continues
|
|
|
|
const modelA = getModel("openai", "gpt-5-mini");
|
|
const modelB = getModel("openai", "gpt-5.2-codex");
|
|
|
|
const apiKey = getEnvApiKey("openai");
|
|
if (!apiKey) {
|
|
throw new Error("Missing OPENAI_API_KEY");
|
|
}
|
|
|
|
const userMessage: Message = {
|
|
role: "user",
|
|
content: "Use the double_number tool to double 21.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
// Get a real response from Model A with reasoning + tool call
|
|
const assistantResponse = await complete(
|
|
modelA,
|
|
{
|
|
systemPrompt: "You are a helpful assistant. Always use the tool when asked.",
|
|
messages: [userMessage],
|
|
tools: [testTool],
|
|
},
|
|
{
|
|
apiKey,
|
|
reasoningEffort: "high",
|
|
},
|
|
);
|
|
|
|
const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as
|
|
| ToolCall
|
|
| undefined;
|
|
|
|
if (!toolCallBlock) {
|
|
throw new Error("Missing tool call from OpenAI Responses - model did not use the tool");
|
|
}
|
|
|
|
// Provide a tool result
|
|
const toolResult: Message = {
|
|
role: "toolResult",
|
|
toolCallId: toolCallBlock.id,
|
|
toolName: toolCallBlock.name,
|
|
content: [{ type: "text", text: "42" }],
|
|
isError: false,
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
const followUp: Message = {
|
|
role: "user",
|
|
content: "What was the result? Answer with just the number.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
// Now continue with Model B (different model, same provider)
|
|
const context: Context = {
|
|
systemPrompt: "You are a helpful assistant. Answer concisely.",
|
|
messages: [userMessage, assistantResponse, toolResult, followUp],
|
|
tools: [testTool],
|
|
};
|
|
|
|
let capturedPayload: any = null;
|
|
const response = await complete(modelB, context, {
|
|
apiKey,
|
|
reasoningEffort: "high",
|
|
onPayload: (payload) => {
|
|
capturedPayload = payload;
|
|
},
|
|
});
|
|
|
|
// The key assertion: no 400 error from orphaned function_call
|
|
expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
|
|
expect(response.errorMessage).toBeFalsy();
|
|
expect(response.content.length).toBeGreaterThan(0);
|
|
|
|
// Log what was sent for debugging
|
|
const input = capturedPayload?.input as any[];
|
|
const functionCalls = input?.filter((item: any) => item.type === "function_call") || [];
|
|
const reasoningItems = input?.filter((item: any) => item.type === "reasoning") || [];
|
|
|
|
console.log("Payload sent to API:");
|
|
console.log("- function_calls:", functionCalls.length);
|
|
console.log("- reasoning items:", reasoningItems.length);
|
|
console.log("- full input:", JSON.stringify(input, null, 2));
|
|
|
|
// Verify the model understood the context
|
|
const responseText = response.content
|
|
.filter((b) => b.type === "text")
|
|
.map((b) => (b as any).text)
|
|
.join("");
|
|
expect(responseText).toContain("42");
|
|
});
|
|
|
|
it("handles cross-provider handoff from Anthropic to OpenAI Codex", { retry: 2 }, async () => {
|
|
// This tests cross-provider handoff:
|
|
// 1. Anthropic model generates thinking + function_call (toolu_xxx ID)
|
|
// 2. User switches to OpenAI Codex
|
|
// 3. transform-messages: isSameModel=false, thinking converted to text
|
|
// 4. Tool call ID is Anthropic format (toolu_xxx), no OpenAI pairing history
|
|
// 5. Should work because foreign IDs have no pairing expectation
|
|
|
|
const anthropicModel = getModel("anthropic", "claude-sonnet-4-5");
|
|
const codexModel = getModel("openai", "gpt-5.2-codex");
|
|
|
|
const anthropicApiKey = getEnvApiKey("anthropic");
|
|
const openaiApiKey = getEnvApiKey("openai");
|
|
if (!anthropicApiKey || !openaiApiKey) {
|
|
throw new Error("Missing API keys");
|
|
}
|
|
|
|
const userMessage: Message = {
|
|
role: "user",
|
|
content: "Use the double_number tool to double 21.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
// Get a real response from Anthropic with thinking + tool call
|
|
const assistantResponse = await complete(
|
|
anthropicModel,
|
|
{
|
|
systemPrompt: "You are a helpful assistant. Always use the tool when asked.",
|
|
messages: [userMessage],
|
|
tools: [testTool],
|
|
},
|
|
{
|
|
apiKey: anthropicApiKey,
|
|
thinkingEnabled: true,
|
|
thinkingBudgetTokens: 5000,
|
|
},
|
|
);
|
|
|
|
const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as
|
|
| ToolCall
|
|
| undefined;
|
|
|
|
if (!toolCallBlock) {
|
|
throw new Error("Missing tool call from Anthropic - model did not use the tool");
|
|
}
|
|
|
|
console.log("Anthropic tool call ID:", toolCallBlock.id);
|
|
|
|
// Provide a tool result
|
|
const toolResult: Message = {
|
|
role: "toolResult",
|
|
toolCallId: toolCallBlock.id,
|
|
toolName: toolCallBlock.name,
|
|
content: [{ type: "text", text: "42" }],
|
|
isError: false,
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
const followUp: Message = {
|
|
role: "user",
|
|
content: "What was the result? Answer with just the number.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
// Now continue with Codex (different provider)
|
|
const context: Context = {
|
|
systemPrompt: "You are a helpful assistant. Answer concisely.",
|
|
messages: [userMessage, assistantResponse, toolResult, followUp],
|
|
tools: [testTool],
|
|
};
|
|
|
|
let capturedPayload: any = null;
|
|
const response = await complete(codexModel, context, {
|
|
apiKey: openaiApiKey,
|
|
reasoningEffort: "high",
|
|
onPayload: (payload) => {
|
|
capturedPayload = payload;
|
|
},
|
|
});
|
|
|
|
// Log what was sent
|
|
const input = capturedPayload?.input as any[];
|
|
const functionCalls = input?.filter((item: any) => item.type === "function_call") || [];
|
|
const reasoningItems = input?.filter((item: any) => item.type === "reasoning") || [];
|
|
|
|
console.log("Payload sent to Codex:");
|
|
console.log("- function_calls:", functionCalls.length);
|
|
console.log("- reasoning items:", reasoningItems.length);
|
|
if (functionCalls.length > 0) {
|
|
console.log(
|
|
"- function_call IDs:",
|
|
functionCalls.map((fc: any) => fc.id),
|
|
);
|
|
}
|
|
|
|
// The key assertion: no 400 error
|
|
expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
|
|
expect(response.errorMessage).toBeFalsy();
|
|
expect(response.content.length).toBeGreaterThan(0);
|
|
|
|
// Verify the model understood the context
|
|
const responseText = response.content
|
|
.filter((b) => b.type === "text")
|
|
.map((b) => (b as any).text)
|
|
.join("");
|
|
expect(responseText).toContain("42");
|
|
});
|
|
},
|
|
);
|