mirror of
https://github.com/harivansh-afk/clanker-agent.git
synced 2026-04-17 10:02:23 +00:00
move pi-mono into companion-cloud as apps/companion-os
- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
0250f72976
579 changed files with 206942 additions and 0 deletions
568
packages/ai/test/cross-provider-handoff.test.ts
Normal file
568
packages/ai/test/cross-provider-handoff.test.ts
Normal file
|
|
@ -0,0 +1,568 @@
|
|||
/**
|
||||
* Cross-Provider Handoff Test
|
||||
*
|
||||
* Tests that contexts generated by one provider/model can be consumed by another.
|
||||
* This catches issues like:
|
||||
* - Tool call ID format incompatibilities (e.g., OpenAI Codex pipe characters)
|
||||
* - Thinking block transformation issues
|
||||
* - Message format incompatibilities
|
||||
*
|
||||
* Strategy:
|
||||
* 1. beforeAll: For each provider/model, generate a "small context" (if not cached):
|
||||
* - User message asking to use a tool
|
||||
* - Assistant response with thinking + tool call
|
||||
* - Tool result
|
||||
* - Final assistant response
|
||||
*
|
||||
* 2. Test: For each target provider/model:
|
||||
* - Concatenate ALL other contexts into one
|
||||
* - Ask the model to "say hi"
|
||||
* - If it fails, there's a compatibility issue
|
||||
*
|
||||
* Fixtures are generated fresh on each run.
|
||||
*/
|
||||
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { writeFileSync } from "fs";
|
||||
import { beforeAll, describe, expect, it } from "vitest";
|
||||
import { getModel } from "../src/models.js";
|
||||
import { completeSimple, getEnvApiKey } from "../src/stream.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Message,
|
||||
Model,
|
||||
Tool,
|
||||
ToolResultMessage,
|
||||
} from "../src/types.js";
|
||||
import { hasAzureOpenAICredentials } from "./azure-utils.js";
|
||||
import { resolveApiKey } from "./oauth.js";
|
||||
|
||||
// Simple tool for testing
|
||||
const testToolSchema = Type.Object({
|
||||
value: Type.Number({ description: "A number to double" }),
|
||||
});
|
||||
|
||||
const testTool: Tool<typeof testToolSchema> = {
|
||||
name: "double_number",
|
||||
description: "Doubles a number and returns the result",
|
||||
parameters: testToolSchema,
|
||||
};
|
||||
|
||||
// Provider/model pairs to test
|
||||
interface ProviderModelPair {
|
||||
provider: string;
|
||||
model: string;
|
||||
label: string;
|
||||
apiOverride?: Api;
|
||||
}
|
||||
|
||||
const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [
|
||||
// Anthropic
|
||||
{
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4-5",
|
||||
label: "anthropic-claude-sonnet-4-5",
|
||||
},
|
||||
// Google
|
||||
{
|
||||
provider: "google",
|
||||
model: "gemini-3-flash-preview",
|
||||
label: "google-gemini-3-flash-preview",
|
||||
},
|
||||
// OpenAI
|
||||
{
|
||||
provider: "openai",
|
||||
model: "gpt-4o-mini",
|
||||
label: "openai-completions-gpt-4o-mini",
|
||||
apiOverride: "openai-completions",
|
||||
},
|
||||
{
|
||||
provider: "openai",
|
||||
model: "gpt-5-mini",
|
||||
label: "openai-responses-gpt-5-mini",
|
||||
},
|
||||
{
|
||||
provider: "azure-openai-responses",
|
||||
model: "gpt-4o-mini",
|
||||
label: "azure-openai-responses-gpt-4o-mini",
|
||||
},
|
||||
// OpenAI Codex
|
||||
{
|
||||
provider: "openai-codex",
|
||||
model: "gpt-5.2-codex",
|
||||
label: "openai-codex-gpt-5.2-codex",
|
||||
},
|
||||
// Google Antigravity
|
||||
{
|
||||
provider: "google-antigravity",
|
||||
model: "gemini-3-flash",
|
||||
label: "antigravity-gemini-3-flash",
|
||||
},
|
||||
{
|
||||
provider: "google-antigravity",
|
||||
model: "claude-sonnet-4-5",
|
||||
label: "antigravity-claude-sonnet-4-5",
|
||||
},
|
||||
// GitHub Copilot
|
||||
{
|
||||
provider: "github-copilot",
|
||||
model: "claude-sonnet-4.5",
|
||||
label: "copilot-claude-sonnet-4.5",
|
||||
},
|
||||
{
|
||||
provider: "github-copilot",
|
||||
model: "gpt-5.1-codex",
|
||||
label: "copilot-gpt-5.1-codex",
|
||||
},
|
||||
{
|
||||
provider: "github-copilot",
|
||||
model: "gemini-3-flash-preview",
|
||||
label: "copilot-gemini-3-flash-preview",
|
||||
},
|
||||
{
|
||||
provider: "github-copilot",
|
||||
model: "grok-code-fast-1",
|
||||
label: "copilot-grok-code-fast-1",
|
||||
},
|
||||
// Amazon Bedrock
|
||||
{
|
||||
provider: "amazon-bedrock",
|
||||
model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
label: "bedrock-claude-sonnet-4-5",
|
||||
},
|
||||
// xAI
|
||||
{ provider: "xai", model: "grok-code-fast-1", label: "xai-grok-code-fast-1" },
|
||||
// Cerebras
|
||||
{ provider: "cerebras", model: "zai-glm-4.7", label: "cerebras-zai-glm-4.7" },
|
||||
// Groq
|
||||
{
|
||||
provider: "groq",
|
||||
model: "openai/gpt-oss-120b",
|
||||
label: "groq-gpt-oss-120b",
|
||||
},
|
||||
// Hugging Face
|
||||
{
|
||||
provider: "huggingface",
|
||||
model: "moonshotai/Kimi-K2.5",
|
||||
label: "huggingface-kimi-k2.5",
|
||||
},
|
||||
// Kimi For Coding
|
||||
{
|
||||
provider: "kimi-coding",
|
||||
model: "kimi-k2-thinking",
|
||||
label: "kimi-coding-k2-thinking",
|
||||
},
|
||||
// Mistral
|
||||
{
|
||||
provider: "mistral",
|
||||
model: "devstral-medium-latest",
|
||||
label: "mistral-devstral-medium",
|
||||
},
|
||||
// MiniMax
|
||||
{ provider: "minimax", model: "MiniMax-M2.1", label: "minimax-m2.1" },
|
||||
// OpenCode Zen
|
||||
{ provider: "opencode", model: "big-pickle", label: "zen-big-pickle" },
|
||||
{
|
||||
provider: "opencode",
|
||||
model: "claude-sonnet-4-5",
|
||||
label: "zen-claude-sonnet-4-5",
|
||||
},
|
||||
{
|
||||
provider: "opencode",
|
||||
model: "gemini-3-flash",
|
||||
label: "zen-gemini-3-flash",
|
||||
},
|
||||
{ provider: "opencode", model: "glm-4.7-free", label: "zen-glm-4.7-free" },
|
||||
{ provider: "opencode", model: "gpt-5.2-codex", label: "zen-gpt-5.2-codex" },
|
||||
{
|
||||
provider: "opencode",
|
||||
model: "minimax-m2.1-free",
|
||||
label: "zen-minimax-m2.1-free",
|
||||
},
|
||||
// OpenCode Go
|
||||
{ provider: "opencode-go", model: "kimi-k2.5", label: "go-kimi-k2.5" },
|
||||
{ provider: "opencode-go", model: "minimax-m2.5", label: "go-minimax-m2.5" },
|
||||
];
|
||||
|
||||
// Cached context structure
|
||||
interface CachedContext {
|
||||
label: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
api: Api;
|
||||
messages: Message[];
|
||||
generatedAt: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get API key for provider - checks OAuth storage first, then env vars
|
||||
*/
|
||||
async function getApiKey(provider: string): Promise<string | undefined> {
|
||||
const oauthKey = await resolveApiKey(provider);
|
||||
if (oauthKey) return oauthKey;
|
||||
return getEnvApiKey(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous check for API key availability (env vars only, for skipIf)
|
||||
*/
|
||||
function hasApiKey(provider: string): boolean {
|
||||
if (provider === "azure-openai-responses") {
|
||||
return hasAzureOpenAICredentials();
|
||||
}
|
||||
return !!getEnvApiKey(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if any provider has API keys available (for skipIf at describe level)
|
||||
*/
|
||||
function hasAnyApiKey(): boolean {
|
||||
return PROVIDER_MODEL_PAIRS.some((pair) => hasApiKey(pair.provider));
|
||||
}
|
||||
|
||||
function dumpFailurePayload(params: {
|
||||
label: string;
|
||||
error: string;
|
||||
payload?: unknown;
|
||||
messages: Message[];
|
||||
}): void {
|
||||
const filename = `/tmp/pi-handoff-${params.label}-${Date.now()}.json`;
|
||||
const body = {
|
||||
label: params.label,
|
||||
error: params.error,
|
||||
payload: params.payload,
|
||||
messages: params.messages,
|
||||
};
|
||||
writeFileSync(filename, JSON.stringify(body, null, 2));
|
||||
console.log(`Wrote failure payload to ${filename}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a context from a provider/model pair.
|
||||
* Makes a real API call to get authentic tool call IDs and thinking blocks.
|
||||
*/
|
||||
async function generateContext(
|
||||
pair: ProviderModelPair,
|
||||
apiKey: string,
|
||||
): Promise<{ messages: Message[]; api: Api } | null> {
|
||||
const baseModel = (
|
||||
getModel as (p: string, m: string) => Model<Api> | undefined
|
||||
)(pair.provider, pair.model);
|
||||
if (!baseModel) {
|
||||
console.log(` Model not found: ${pair.provider}/${pair.model}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const model: Model<Api> = pair.apiOverride
|
||||
? { ...baseModel, api: pair.apiOverride }
|
||||
: baseModel;
|
||||
|
||||
const userMessage: Message = {
|
||||
role: "user",
|
||||
content: "Please double the number 21 using the double_number tool.",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
const supportsReasoning = model.reasoning === true;
|
||||
let lastPayload: unknown;
|
||||
let assistantResponse: AssistantMessage;
|
||||
try {
|
||||
assistantResponse = await completeSimple(
|
||||
model,
|
||||
{
|
||||
systemPrompt:
|
||||
"You are a helpful assistant. Use the provided tool to complete the task.",
|
||||
messages: [userMessage],
|
||||
tools: [testTool],
|
||||
},
|
||||
{
|
||||
apiKey,
|
||||
reasoning: supportsReasoning ? "high" : undefined,
|
||||
onPayload: (payload) => {
|
||||
lastPayload = payload;
|
||||
},
|
||||
},
|
||||
);
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.log(` Initial request failed: ${msg}`);
|
||||
dumpFailurePayload({
|
||||
label: `${pair.label}-initial`,
|
||||
error: msg,
|
||||
payload: lastPayload,
|
||||
messages: [userMessage],
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
if (assistantResponse.stopReason === "error") {
|
||||
console.log(` Initial request error: ${assistantResponse.errorMessage}`);
|
||||
dumpFailurePayload({
|
||||
label: `${pair.label}-initial`,
|
||||
error: assistantResponse.errorMessage || "Unknown error",
|
||||
payload: lastPayload,
|
||||
messages: [userMessage],
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
const toolCall = assistantResponse.content.find((c) => c.type === "toolCall");
|
||||
if (!toolCall || toolCall.type !== "toolCall") {
|
||||
console.log(
|
||||
` No tool call in response (stopReason: ${assistantResponse.stopReason})`,
|
||||
);
|
||||
return {
|
||||
messages: [userMessage, assistantResponse],
|
||||
api: model.api,
|
||||
};
|
||||
}
|
||||
|
||||
console.log(` Tool call ID: ${toolCall.id}`);
|
||||
|
||||
const toolResult: ToolResultMessage = {
|
||||
role: "toolResult",
|
||||
toolCallId: toolCall.id,
|
||||
toolName: toolCall.name,
|
||||
content: [{ type: "text", text: "42" }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
let finalResponse: AssistantMessage;
|
||||
const messagesForFinal = [userMessage, assistantResponse, toolResult];
|
||||
try {
|
||||
finalResponse = await completeSimple(
|
||||
model,
|
||||
{
|
||||
systemPrompt: "You are a helpful assistant.",
|
||||
messages: messagesForFinal,
|
||||
tools: [testTool],
|
||||
},
|
||||
{
|
||||
apiKey,
|
||||
reasoning: supportsReasoning ? "high" : undefined,
|
||||
onPayload: (payload) => {
|
||||
lastPayload = payload;
|
||||
},
|
||||
},
|
||||
);
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.log(` Final request failed: ${msg}`);
|
||||
dumpFailurePayload({
|
||||
label: `${pair.label}-final`,
|
||||
error: msg,
|
||||
payload: lastPayload,
|
||||
messages: messagesForFinal,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
if (finalResponse.stopReason === "error") {
|
||||
console.log(` Final request error: ${finalResponse.errorMessage}`);
|
||||
dumpFailurePayload({
|
||||
label: `${pair.label}-final`,
|
||||
error: finalResponse.errorMessage || "Unknown error",
|
||||
payload: lastPayload,
|
||||
messages: messagesForFinal,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
messages: [userMessage, assistantResponse, toolResult, finalResponse],
|
||||
api: model.api,
|
||||
};
|
||||
}
|
||||
|
||||
describe.skipIf(!hasAnyApiKey())("Cross-Provider Handoff", () => {
|
||||
let contexts: Record<string, CachedContext>;
|
||||
let availablePairs: ProviderModelPair[];
|
||||
|
||||
beforeAll(async () => {
|
||||
contexts = {};
|
||||
availablePairs = [];
|
||||
|
||||
console.log("\n=== Generating Fixtures ===\n");
|
||||
|
||||
for (const pair of PROVIDER_MODEL_PAIRS) {
|
||||
const apiKey = await getApiKey(pair.provider);
|
||||
if (!apiKey) {
|
||||
console.log(`[${pair.label}] Skipping - no auth for ${pair.provider}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`[${pair.label}] Generating fixture...`);
|
||||
const result = await generateContext(pair, apiKey);
|
||||
|
||||
if (!result || result.messages.length < 4) {
|
||||
console.log(`[${pair.label}] Failed to generate fixture, skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
contexts[pair.label] = {
|
||||
label: pair.label,
|
||||
provider: pair.provider,
|
||||
model: pair.model,
|
||||
api: result.api,
|
||||
messages: result.messages,
|
||||
generatedAt: new Date().toISOString(),
|
||||
};
|
||||
availablePairs.push(pair);
|
||||
console.log(
|
||||
`[${pair.label}] Generated ${result.messages.length} messages`,
|
||||
);
|
||||
}
|
||||
|
||||
console.log(
|
||||
`\n=== ${availablePairs.length}/${PROVIDER_MODEL_PAIRS.length} contexts available ===\n`,
|
||||
);
|
||||
}, 300000);
|
||||
|
||||
it.skipIf(!hasAnyApiKey())(
|
||||
"should have at least 2 fixtures to test handoffs",
|
||||
() => {
|
||||
expect(Object.keys(contexts).length).toBeGreaterThanOrEqual(2);
|
||||
},
|
||||
);
|
||||
|
||||
it.skipIf(!hasAnyApiKey())(
|
||||
"should handle cross-provider handoffs for each target",
|
||||
async () => {
|
||||
const contextLabels = Object.keys(contexts);
|
||||
|
||||
if (contextLabels.length < 2) {
|
||||
console.log("Not enough fixtures for handoff test, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("\n=== Testing Cross-Provider Handoffs ===\n");
|
||||
|
||||
const results: { target: string; success: boolean; error?: string }[] =
|
||||
[];
|
||||
|
||||
for (const targetPair of availablePairs) {
|
||||
const apiKey = await getApiKey(targetPair.provider);
|
||||
if (!apiKey) {
|
||||
console.log(`[Target: ${targetPair.label}] Skipping - no auth`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Collect messages from ALL OTHER contexts
|
||||
const otherMessages: Message[] = [];
|
||||
for (const [label, ctx] of Object.entries(contexts)) {
|
||||
if (label === targetPair.label) continue;
|
||||
otherMessages.push(...ctx.messages);
|
||||
}
|
||||
|
||||
if (otherMessages.length === 0) {
|
||||
console.log(
|
||||
`[Target: ${targetPair.label}] Skipping - no other contexts`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const allMessages: Message[] = [
|
||||
...otherMessages,
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"Great, thanks for all that help! Now just say 'Hello, handoff successful!' to confirm you received everything.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
const baseModel = (
|
||||
getModel as (p: string, m: string) => Model<Api> | undefined
|
||||
)(targetPair.provider, targetPair.model);
|
||||
if (!baseModel) {
|
||||
console.log(`[Target: ${targetPair.label}] Model not found`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const model: Model<Api> = targetPair.apiOverride
|
||||
? { ...baseModel, api: targetPair.apiOverride }
|
||||
: baseModel;
|
||||
const supportsReasoning = model.reasoning === true;
|
||||
|
||||
console.log(
|
||||
`[Target: ${targetPair.label}] Testing with ${otherMessages.length} messages from other providers...`,
|
||||
);
|
||||
|
||||
let lastPayload: unknown;
|
||||
try {
|
||||
const response = await completeSimple(
|
||||
model,
|
||||
{
|
||||
systemPrompt: "You are a helpful assistant.",
|
||||
messages: allMessages,
|
||||
tools: [testTool],
|
||||
},
|
||||
{
|
||||
apiKey,
|
||||
reasoning: supportsReasoning ? "high" : undefined,
|
||||
onPayload: (payload) => {
|
||||
lastPayload = payload;
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
if (response.stopReason === "error") {
|
||||
console.log(
|
||||
`[Target: ${targetPair.label}] FAILED: ${response.errorMessage}`,
|
||||
);
|
||||
dumpFailurePayload({
|
||||
label: targetPair.label,
|
||||
error: response.errorMessage || "Unknown error",
|
||||
payload: lastPayload,
|
||||
messages: allMessages,
|
||||
});
|
||||
results.push({
|
||||
target: targetPair.label,
|
||||
success: false,
|
||||
error: response.errorMessage,
|
||||
});
|
||||
} else {
|
||||
const text = response.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join(" ");
|
||||
const preview = text.slice(0, 100).replace(/\n/g, " ");
|
||||
console.log(`[Target: ${targetPair.label}] SUCCESS: ${preview}...`);
|
||||
results.push({ target: targetPair.label, success: true });
|
||||
}
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.log(`[Target: ${targetPair.label}] EXCEPTION: ${msg}`);
|
||||
dumpFailurePayload({
|
||||
label: targetPair.label,
|
||||
error: msg,
|
||||
payload: lastPayload,
|
||||
messages: allMessages,
|
||||
});
|
||||
results.push({
|
||||
target: targetPair.label,
|
||||
success: false,
|
||||
error: msg,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
console.log("\n=== Results Summary ===\n");
|
||||
const successes = results.filter((r) => r.success);
|
||||
const failures = results.filter((r) => !r.success);
|
||||
|
||||
console.log(`Passed: ${successes.length}/${results.length}`);
|
||||
if (failures.length > 0) {
|
||||
console.log("\nFailures:");
|
||||
for (const f of failures) {
|
||||
console.log(` - ${f.target}: ${f.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
expect(failures.length).toBe(0);
|
||||
},
|
||||
600000,
|
||||
);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue