co-mono/packages/ai/test/tool-call-without-result.test.ts
Mario Zechner bb50738f7e fix(ai): append system prompt to codex bridge message instead of converting to input
Previously the system prompt was converted to an input message in convertMessages,
then stripped out by filterPiSystemPrompts. Now the system prompt is passed directly
to transformRequestBody and appended after CODEX_PI_BRIDGE in the bridge message.
2026-01-05 06:03:07 +01:00

259 lines
9.3 KiB
TypeScript

import { Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
resolveApiKey("openai-codex"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken, openaiCodexToken] = oauthTokens;
// Simple calculate tool
const calculateSchema = Type.Object({
expression: Type.String({ description: "The mathematical expression to evaluate" }),
});
const calculateTool: Tool = {
name: "calculate",
description: "Evaluate mathematical expressions",
parameters: calculateSchema,
};
async function testToolCallWithoutResult<TApi extends Api>(
model: Model<TApi>,
options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
) {
// Step 1: Create context with the calculate tool
const context: Context = {
systemPrompt: "You are a helpful assistant. Use the calculate tool when asked to perform calculations.",
messages: [],
tools: [calculateTool],
};
// Step 2: Ask the LLM to make a tool call
context.messages.push({
role: "user",
content: "Please calculate 25 * 18 using the calculate tool.",
timestamp: Date.now(),
});
// Step 3: Get the assistant's response (should contain a tool call)
const firstResponse = await complete(model, context, options);
context.messages.push(firstResponse);
console.log("First response:", JSON.stringify(firstResponse, null, 2));
// Verify the response contains a tool call
const hasToolCall = firstResponse.content.some((block) => block.type === "toolCall");
expect(hasToolCall).toBe(true);
if (!hasToolCall) {
throw new Error("Expected assistant to make a tool call, but none was found");
}
// Step 4: Send a user message WITHOUT providing tool result
// This simulates the scenario where a tool call was aborted/cancelled
context.messages.push({
role: "user",
content: "Never mind, just tell me what is 2+2?",
timestamp: Date.now(),
});
// Step 5: The fix should filter out the orphaned tool call, and the request should succeed
const secondResponse = await complete(model, context, options);
console.log("Second response:", JSON.stringify(secondResponse, null, 2));
// The request should succeed (not error) - that's the main thing we're testing
expect(secondResponse.stopReason).not.toBe("error");
// Should have some content in the response
expect(secondResponse.content.length).toBeGreaterThan(0);
// The LLM may choose to answer directly or make a new tool call - either is fine
// The important thing is it didn't fail with the orphaned tool call error
const textContent = secondResponse.content
.filter((block) => block.type === "text")
.map((block) => (block.type === "text" ? block.text : ""))
.join(" ");
const toolCalls = secondResponse.content.filter((block) => block.type === "toolCall").length;
expect(toolCalls || textContent.length).toBeGreaterThan(0);
console.log("Answer:", textContent);
// Verify the stop reason is either "stop" or "toolUse" (new tool call)
expect(["stop", "toolUse"]).toContain(secondResponse.stopReason);
}
describe("Tool Call Without Result Tests", () => {
// =========================================================================
// API Key-based providers
// =========================================================================
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
const model = getModel("google", "gemini-2.5-flash");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
const model: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
const model = getModel("openai", "gpt-5-mini");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
const model = getModel("xai", "grok-3-fast");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
const model = getModel("groq", "openai/gpt-oss-20b");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
const model = getModel("cerebras", "gpt-oss-120b");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
const model = getModel("zai", "glm-4.5-flash");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
const model = getModel("mistral", "devstral-medium-latest");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
describe("Anthropic OAuth Provider", () => {
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
it.skipIf(!anthropicOAuthToken)(
"should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
await testToolCallWithoutResult(model, { apiKey: anthropicOAuthToken });
},
);
});
describe("GitHub Copilot Provider", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("github-copilot", "gpt-4o");
await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("github-copilot", "claude-sonnet-4");
await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
},
);
});
describe("Google Gemini CLI Provider", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-gemini-cli", "gemini-2.5-flash");
await testToolCallWithoutResult(model, { apiKey: geminiCliToken });
},
);
});
describe("Google Antigravity Provider", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-antigravity", "gemini-3-flash");
await testToolCallWithoutResult(model, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-antigravity", "claude-sonnet-4-5");
await testToolCallWithoutResult(model, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-antigravity", "gpt-oss-120b-medium");
await testToolCallWithoutResult(model, { apiKey: antigravityToken });
},
);
});
describe("OpenAI Codex Provider", () => {
it.skipIf(!openaiCodexToken)(
"gpt-5.2-xhigh - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("openai-codex", "gpt-5.2-xhigh");
await testToolCallWithoutResult(model, { apiKey: openaiCodexToken });
},
);
});
});