mirror of
https://github.com/harivansh-afk/clanker-agent.git
synced 2026-04-15 03:00:44 +00:00
- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
568 lines
16 KiB
TypeScript
568 lines
16 KiB
TypeScript
/**
|
|
* Cross-Provider Handoff Test
|
|
*
|
|
* Tests that contexts generated by one provider/model can be consumed by another.
|
|
* This catches issues like:
|
|
* - Tool call ID format incompatibilities (e.g., OpenAI Codex pipe characters)
|
|
* - Thinking block transformation issues
|
|
* - Message format incompatibilities
|
|
*
|
|
* Strategy:
|
|
* 1. beforeAll: For each provider/model, generate a "small context" (if not cached):
|
|
* - User message asking to use a tool
|
|
* - Assistant response with thinking + tool call
|
|
* - Tool result
|
|
* - Final assistant response
|
|
*
|
|
* 2. Test: For each target provider/model:
|
|
* - Concatenate ALL other contexts into one
|
|
* - Ask the model to "say hi"
|
|
* - If it fails, there's a compatibility issue
|
|
*
|
|
* Fixtures are generated fresh on each run.
|
|
*/
|
|
|
|
import { Type } from "@sinclair/typebox";
|
|
import { writeFileSync } from "fs";
|
|
import { beforeAll, describe, expect, it } from "vitest";
|
|
import { getModel } from "../src/models.js";
|
|
import { completeSimple, getEnvApiKey } from "../src/stream.js";
|
|
import type {
|
|
Api,
|
|
AssistantMessage,
|
|
Message,
|
|
Model,
|
|
Tool,
|
|
ToolResultMessage,
|
|
} from "../src/types.js";
|
|
import { hasAzureOpenAICredentials } from "./azure-utils.js";
|
|
import { resolveApiKey } from "./oauth.js";
|
|
|
|
// Simple tool for testing
|
|
const testToolSchema = Type.Object({
|
|
value: Type.Number({ description: "A number to double" }),
|
|
});
|
|
|
|
const testTool: Tool<typeof testToolSchema> = {
|
|
name: "double_number",
|
|
description: "Doubles a number and returns the result",
|
|
parameters: testToolSchema,
|
|
};
|
|
|
|
// Provider/model pairs to test
|
|
interface ProviderModelPair {
|
|
provider: string;
|
|
model: string;
|
|
label: string;
|
|
apiOverride?: Api;
|
|
}
|
|
|
|
const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [
|
|
// Anthropic
|
|
{
|
|
provider: "anthropic",
|
|
model: "claude-sonnet-4-5",
|
|
label: "anthropic-claude-sonnet-4-5",
|
|
},
|
|
// Google
|
|
{
|
|
provider: "google",
|
|
model: "gemini-3-flash-preview",
|
|
label: "google-gemini-3-flash-preview",
|
|
},
|
|
// OpenAI
|
|
{
|
|
provider: "openai",
|
|
model: "gpt-4o-mini",
|
|
label: "openai-completions-gpt-4o-mini",
|
|
apiOverride: "openai-completions",
|
|
},
|
|
{
|
|
provider: "openai",
|
|
model: "gpt-5-mini",
|
|
label: "openai-responses-gpt-5-mini",
|
|
},
|
|
{
|
|
provider: "azure-openai-responses",
|
|
model: "gpt-4o-mini",
|
|
label: "azure-openai-responses-gpt-4o-mini",
|
|
},
|
|
// OpenAI Codex
|
|
{
|
|
provider: "openai-codex",
|
|
model: "gpt-5.2-codex",
|
|
label: "openai-codex-gpt-5.2-codex",
|
|
},
|
|
// Google Antigravity
|
|
{
|
|
provider: "google-antigravity",
|
|
model: "gemini-3-flash",
|
|
label: "antigravity-gemini-3-flash",
|
|
},
|
|
{
|
|
provider: "google-antigravity",
|
|
model: "claude-sonnet-4-5",
|
|
label: "antigravity-claude-sonnet-4-5",
|
|
},
|
|
// GitHub Copilot
|
|
{
|
|
provider: "github-copilot",
|
|
model: "claude-sonnet-4.5",
|
|
label: "copilot-claude-sonnet-4.5",
|
|
},
|
|
{
|
|
provider: "github-copilot",
|
|
model: "gpt-5.1-codex",
|
|
label: "copilot-gpt-5.1-codex",
|
|
},
|
|
{
|
|
provider: "github-copilot",
|
|
model: "gemini-3-flash-preview",
|
|
label: "copilot-gemini-3-flash-preview",
|
|
},
|
|
{
|
|
provider: "github-copilot",
|
|
model: "grok-code-fast-1",
|
|
label: "copilot-grok-code-fast-1",
|
|
},
|
|
// Amazon Bedrock
|
|
{
|
|
provider: "amazon-bedrock",
|
|
model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
label: "bedrock-claude-sonnet-4-5",
|
|
},
|
|
// xAI
|
|
{ provider: "xai", model: "grok-code-fast-1", label: "xai-grok-code-fast-1" },
|
|
// Cerebras
|
|
{ provider: "cerebras", model: "zai-glm-4.7", label: "cerebras-zai-glm-4.7" },
|
|
// Groq
|
|
{
|
|
provider: "groq",
|
|
model: "openai/gpt-oss-120b",
|
|
label: "groq-gpt-oss-120b",
|
|
},
|
|
// Hugging Face
|
|
{
|
|
provider: "huggingface",
|
|
model: "moonshotai/Kimi-K2.5",
|
|
label: "huggingface-kimi-k2.5",
|
|
},
|
|
// Kimi For Coding
|
|
{
|
|
provider: "kimi-coding",
|
|
model: "kimi-k2-thinking",
|
|
label: "kimi-coding-k2-thinking",
|
|
},
|
|
// Mistral
|
|
{
|
|
provider: "mistral",
|
|
model: "devstral-medium-latest",
|
|
label: "mistral-devstral-medium",
|
|
},
|
|
// MiniMax
|
|
{ provider: "minimax", model: "MiniMax-M2.1", label: "minimax-m2.1" },
|
|
// OpenCode Zen
|
|
{ provider: "opencode", model: "big-pickle", label: "zen-big-pickle" },
|
|
{
|
|
provider: "opencode",
|
|
model: "claude-sonnet-4-5",
|
|
label: "zen-claude-sonnet-4-5",
|
|
},
|
|
{
|
|
provider: "opencode",
|
|
model: "gemini-3-flash",
|
|
label: "zen-gemini-3-flash",
|
|
},
|
|
{ provider: "opencode", model: "glm-4.7-free", label: "zen-glm-4.7-free" },
|
|
{ provider: "opencode", model: "gpt-5.2-codex", label: "zen-gpt-5.2-codex" },
|
|
{
|
|
provider: "opencode",
|
|
model: "minimax-m2.1-free",
|
|
label: "zen-minimax-m2.1-free",
|
|
},
|
|
// OpenCode Go
|
|
{ provider: "opencode-go", model: "kimi-k2.5", label: "go-kimi-k2.5" },
|
|
{ provider: "opencode-go", model: "minimax-m2.5", label: "go-minimax-m2.5" },
|
|
];
|
|
|
|
// Cached context structure
|
|
interface CachedContext {
|
|
label: string;
|
|
provider: string;
|
|
model: string;
|
|
api: Api;
|
|
messages: Message[];
|
|
generatedAt: string;
|
|
}
|
|
|
|
/**
|
|
* Get API key for provider - checks OAuth storage first, then env vars
|
|
*/
|
|
async function getApiKey(provider: string): Promise<string | undefined> {
|
|
const oauthKey = await resolveApiKey(provider);
|
|
if (oauthKey) return oauthKey;
|
|
return getEnvApiKey(provider);
|
|
}
|
|
|
|
/**
|
|
* Synchronous check for API key availability (env vars only, for skipIf)
|
|
*/
|
|
function hasApiKey(provider: string): boolean {
|
|
if (provider === "azure-openai-responses") {
|
|
return hasAzureOpenAICredentials();
|
|
}
|
|
return !!getEnvApiKey(provider);
|
|
}
|
|
|
|
/**
|
|
* Check if any provider has API keys available (for skipIf at describe level)
|
|
*/
|
|
function hasAnyApiKey(): boolean {
|
|
return PROVIDER_MODEL_PAIRS.some((pair) => hasApiKey(pair.provider));
|
|
}
|
|
|
|
function dumpFailurePayload(params: {
|
|
label: string;
|
|
error: string;
|
|
payload?: unknown;
|
|
messages: Message[];
|
|
}): void {
|
|
const filename = `/tmp/pi-handoff-${params.label}-${Date.now()}.json`;
|
|
const body = {
|
|
label: params.label,
|
|
error: params.error,
|
|
payload: params.payload,
|
|
messages: params.messages,
|
|
};
|
|
writeFileSync(filename, JSON.stringify(body, null, 2));
|
|
console.log(`Wrote failure payload to ${filename}`);
|
|
}
|
|
|
|
/**
|
|
* Generate a context from a provider/model pair.
|
|
* Makes a real API call to get authentic tool call IDs and thinking blocks.
|
|
*/
|
|
async function generateContext(
|
|
pair: ProviderModelPair,
|
|
apiKey: string,
|
|
): Promise<{ messages: Message[]; api: Api } | null> {
|
|
const baseModel = (
|
|
getModel as (p: string, m: string) => Model<Api> | undefined
|
|
)(pair.provider, pair.model);
|
|
if (!baseModel) {
|
|
console.log(` Model not found: ${pair.provider}/${pair.model}`);
|
|
return null;
|
|
}
|
|
|
|
const model: Model<Api> = pair.apiOverride
|
|
? { ...baseModel, api: pair.apiOverride }
|
|
: baseModel;
|
|
|
|
const userMessage: Message = {
|
|
role: "user",
|
|
content: "Please double the number 21 using the double_number tool.",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
const supportsReasoning = model.reasoning === true;
|
|
let lastPayload: unknown;
|
|
let assistantResponse: AssistantMessage;
|
|
try {
|
|
assistantResponse = await completeSimple(
|
|
model,
|
|
{
|
|
systemPrompt:
|
|
"You are a helpful assistant. Use the provided tool to complete the task.",
|
|
messages: [userMessage],
|
|
tools: [testTool],
|
|
},
|
|
{
|
|
apiKey,
|
|
reasoning: supportsReasoning ? "high" : undefined,
|
|
onPayload: (payload) => {
|
|
lastPayload = payload;
|
|
},
|
|
},
|
|
);
|
|
} catch (error) {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
console.log(` Initial request failed: ${msg}`);
|
|
dumpFailurePayload({
|
|
label: `${pair.label}-initial`,
|
|
error: msg,
|
|
payload: lastPayload,
|
|
messages: [userMessage],
|
|
});
|
|
return null;
|
|
}
|
|
|
|
if (assistantResponse.stopReason === "error") {
|
|
console.log(` Initial request error: ${assistantResponse.errorMessage}`);
|
|
dumpFailurePayload({
|
|
label: `${pair.label}-initial`,
|
|
error: assistantResponse.errorMessage || "Unknown error",
|
|
payload: lastPayload,
|
|
messages: [userMessage],
|
|
});
|
|
return null;
|
|
}
|
|
|
|
const toolCall = assistantResponse.content.find((c) => c.type === "toolCall");
|
|
if (!toolCall || toolCall.type !== "toolCall") {
|
|
console.log(
|
|
` No tool call in response (stopReason: ${assistantResponse.stopReason})`,
|
|
);
|
|
return {
|
|
messages: [userMessage, assistantResponse],
|
|
api: model.api,
|
|
};
|
|
}
|
|
|
|
console.log(` Tool call ID: ${toolCall.id}`);
|
|
|
|
const toolResult: ToolResultMessage = {
|
|
role: "toolResult",
|
|
toolCallId: toolCall.id,
|
|
toolName: toolCall.name,
|
|
content: [{ type: "text", text: "42" }],
|
|
isError: false,
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
let finalResponse: AssistantMessage;
|
|
const messagesForFinal = [userMessage, assistantResponse, toolResult];
|
|
try {
|
|
finalResponse = await completeSimple(
|
|
model,
|
|
{
|
|
systemPrompt: "You are a helpful assistant.",
|
|
messages: messagesForFinal,
|
|
tools: [testTool],
|
|
},
|
|
{
|
|
apiKey,
|
|
reasoning: supportsReasoning ? "high" : undefined,
|
|
onPayload: (payload) => {
|
|
lastPayload = payload;
|
|
},
|
|
},
|
|
);
|
|
} catch (error) {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
console.log(` Final request failed: ${msg}`);
|
|
dumpFailurePayload({
|
|
label: `${pair.label}-final`,
|
|
error: msg,
|
|
payload: lastPayload,
|
|
messages: messagesForFinal,
|
|
});
|
|
return null;
|
|
}
|
|
|
|
if (finalResponse.stopReason === "error") {
|
|
console.log(` Final request error: ${finalResponse.errorMessage}`);
|
|
dumpFailurePayload({
|
|
label: `${pair.label}-final`,
|
|
error: finalResponse.errorMessage || "Unknown error",
|
|
payload: lastPayload,
|
|
messages: messagesForFinal,
|
|
});
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
messages: [userMessage, assistantResponse, toolResult, finalResponse],
|
|
api: model.api,
|
|
};
|
|
}
|
|
|
|
describe.skipIf(!hasAnyApiKey())("Cross-Provider Handoff", () => {
|
|
let contexts: Record<string, CachedContext>;
|
|
let availablePairs: ProviderModelPair[];
|
|
|
|
beforeAll(async () => {
|
|
contexts = {};
|
|
availablePairs = [];
|
|
|
|
console.log("\n=== Generating Fixtures ===\n");
|
|
|
|
for (const pair of PROVIDER_MODEL_PAIRS) {
|
|
const apiKey = await getApiKey(pair.provider);
|
|
if (!apiKey) {
|
|
console.log(`[${pair.label}] Skipping - no auth for ${pair.provider}`);
|
|
continue;
|
|
}
|
|
|
|
console.log(`[${pair.label}] Generating fixture...`);
|
|
const result = await generateContext(pair, apiKey);
|
|
|
|
if (!result || result.messages.length < 4) {
|
|
console.log(`[${pair.label}] Failed to generate fixture, skipping`);
|
|
continue;
|
|
}
|
|
|
|
contexts[pair.label] = {
|
|
label: pair.label,
|
|
provider: pair.provider,
|
|
model: pair.model,
|
|
api: result.api,
|
|
messages: result.messages,
|
|
generatedAt: new Date().toISOString(),
|
|
};
|
|
availablePairs.push(pair);
|
|
console.log(
|
|
`[${pair.label}] Generated ${result.messages.length} messages`,
|
|
);
|
|
}
|
|
|
|
console.log(
|
|
`\n=== ${availablePairs.length}/${PROVIDER_MODEL_PAIRS.length} contexts available ===\n`,
|
|
);
|
|
}, 300000);
|
|
|
|
it.skipIf(!hasAnyApiKey())(
|
|
"should have at least 2 fixtures to test handoffs",
|
|
() => {
|
|
expect(Object.keys(contexts).length).toBeGreaterThanOrEqual(2);
|
|
},
|
|
);
|
|
|
|
it.skipIf(!hasAnyApiKey())(
|
|
"should handle cross-provider handoffs for each target",
|
|
async () => {
|
|
const contextLabels = Object.keys(contexts);
|
|
|
|
if (contextLabels.length < 2) {
|
|
console.log("Not enough fixtures for handoff test, skipping");
|
|
return;
|
|
}
|
|
|
|
console.log("\n=== Testing Cross-Provider Handoffs ===\n");
|
|
|
|
const results: { target: string; success: boolean; error?: string }[] =
|
|
[];
|
|
|
|
for (const targetPair of availablePairs) {
|
|
const apiKey = await getApiKey(targetPair.provider);
|
|
if (!apiKey) {
|
|
console.log(`[Target: ${targetPair.label}] Skipping - no auth`);
|
|
continue;
|
|
}
|
|
|
|
// Collect messages from ALL OTHER contexts
|
|
const otherMessages: Message[] = [];
|
|
for (const [label, ctx] of Object.entries(contexts)) {
|
|
if (label === targetPair.label) continue;
|
|
otherMessages.push(...ctx.messages);
|
|
}
|
|
|
|
if (otherMessages.length === 0) {
|
|
console.log(
|
|
`[Target: ${targetPair.label}] Skipping - no other contexts`,
|
|
);
|
|
continue;
|
|
}
|
|
|
|
const allMessages: Message[] = [
|
|
...otherMessages,
|
|
{
|
|
role: "user",
|
|
content:
|
|
"Great, thanks for all that help! Now just say 'Hello, handoff successful!' to confirm you received everything.",
|
|
timestamp: Date.now(),
|
|
},
|
|
];
|
|
|
|
const baseModel = (
|
|
getModel as (p: string, m: string) => Model<Api> | undefined
|
|
)(targetPair.provider, targetPair.model);
|
|
if (!baseModel) {
|
|
console.log(`[Target: ${targetPair.label}] Model not found`);
|
|
continue;
|
|
}
|
|
|
|
const model: Model<Api> = targetPair.apiOverride
|
|
? { ...baseModel, api: targetPair.apiOverride }
|
|
: baseModel;
|
|
const supportsReasoning = model.reasoning === true;
|
|
|
|
console.log(
|
|
`[Target: ${targetPair.label}] Testing with ${otherMessages.length} messages from other providers...`,
|
|
);
|
|
|
|
let lastPayload: unknown;
|
|
try {
|
|
const response = await completeSimple(
|
|
model,
|
|
{
|
|
systemPrompt: "You are a helpful assistant.",
|
|
messages: allMessages,
|
|
tools: [testTool],
|
|
},
|
|
{
|
|
apiKey,
|
|
reasoning: supportsReasoning ? "high" : undefined,
|
|
onPayload: (payload) => {
|
|
lastPayload = payload;
|
|
},
|
|
},
|
|
);
|
|
|
|
if (response.stopReason === "error") {
|
|
console.log(
|
|
`[Target: ${targetPair.label}] FAILED: ${response.errorMessage}`,
|
|
);
|
|
dumpFailurePayload({
|
|
label: targetPair.label,
|
|
error: response.errorMessage || "Unknown error",
|
|
payload: lastPayload,
|
|
messages: allMessages,
|
|
});
|
|
results.push({
|
|
target: targetPair.label,
|
|
success: false,
|
|
error: response.errorMessage,
|
|
});
|
|
} else {
|
|
const text = response.content
|
|
.filter((c) => c.type === "text")
|
|
.map((c) => c.text)
|
|
.join(" ");
|
|
const preview = text.slice(0, 100).replace(/\n/g, " ");
|
|
console.log(`[Target: ${targetPair.label}] SUCCESS: ${preview}...`);
|
|
results.push({ target: targetPair.label, success: true });
|
|
}
|
|
} catch (error) {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
console.log(`[Target: ${targetPair.label}] EXCEPTION: ${msg}`);
|
|
dumpFailurePayload({
|
|
label: targetPair.label,
|
|
error: msg,
|
|
payload: lastPayload,
|
|
messages: allMessages,
|
|
});
|
|
results.push({
|
|
target: targetPair.label,
|
|
success: false,
|
|
error: msg,
|
|
});
|
|
}
|
|
}
|
|
|
|
console.log("\n=== Results Summary ===\n");
|
|
const successes = results.filter((r) => r.success);
|
|
const failures = results.filter((r) => !r.success);
|
|
|
|
console.log(`Passed: ${successes.length}/${results.length}`);
|
|
if (failures.length > 0) {
|
|
console.log("\nFailures:");
|
|
for (const f of failures) {
|
|
console.log(` - ${f.target}: ${f.error}`);
|
|
}
|
|
}
|
|
|
|
expect(failures.length).toBe(0);
|
|
},
|
|
600000,
|
|
);
|
|
});
|