mirror of
https://github.com/harivansh-afk/clanker-agent.git
synced 2026-04-17 05:00:17 +00:00
move pi-mono into companion-cloud as apps/companion-os
- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
0250f72976
579 changed files with 206942 additions and 0 deletions
571
packages/agent/test/e2e.test.ts
Normal file
571
packages/agent/test/e2e.test.ts
Normal file
|
|
@ -0,0 +1,571 @@
|
|||
import type {
|
||||
AssistantMessage,
|
||||
Model,
|
||||
ToolResultMessage,
|
||||
UserMessage,
|
||||
} from "@mariozechner/pi-ai";
|
||||
import { getModel } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { Agent } from "../src/index.js";
|
||||
import { hasBedrockCredentials } from "./bedrock-utils.js";
|
||||
import { calculateTool } from "./utils/calculate.js";
|
||||
|
||||
async function basicPrompt(model: Model<any>) {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt: "You are a helpful assistant. Keep your responses concise.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [],
|
||||
},
|
||||
});
|
||||
|
||||
await agent.prompt("What is 2+2? Answer with just the number.");
|
||||
|
||||
expect(agent.state.isStreaming).toBe(false);
|
||||
expect(agent.state.messages.length).toBe(2);
|
||||
expect(agent.state.messages[0].role).toBe("user");
|
||||
expect(agent.state.messages[1].role).toBe("assistant");
|
||||
|
||||
const assistantMessage = agent.state.messages[1];
|
||||
if (assistantMessage.role !== "assistant")
|
||||
throw new Error("Expected assistant message");
|
||||
expect(assistantMessage.content.length).toBeGreaterThan(0);
|
||||
|
||||
const textContent = assistantMessage.content.find((c) => c.type === "text");
|
||||
expect(textContent).toBeDefined();
|
||||
if (textContent?.type !== "text") throw new Error("Expected text content");
|
||||
expect(textContent.text).toContain("4");
|
||||
}
|
||||
|
||||
async function toolExecution(model: Model<any>) {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt:
|
||||
"You are a helpful assistant. Always use the calculator tool for math.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [calculateTool],
|
||||
},
|
||||
});
|
||||
|
||||
await agent.prompt("Calculate 123 * 456 using the calculator tool.");
|
||||
|
||||
expect(agent.state.isStreaming).toBe(false);
|
||||
expect(agent.state.messages.length).toBeGreaterThanOrEqual(3);
|
||||
|
||||
const toolResultMsg = agent.state.messages.find(
|
||||
(m) => m.role === "toolResult",
|
||||
);
|
||||
expect(toolResultMsg).toBeDefined();
|
||||
if (toolResultMsg?.role !== "toolResult")
|
||||
throw new Error("Expected tool result message");
|
||||
const textContent =
|
||||
toolResultMsg.content
|
||||
?.filter((c) => c.type === "text")
|
||||
.map((c: any) => c.text)
|
||||
.join("\n") || "";
|
||||
expect(textContent).toBeDefined();
|
||||
|
||||
const expectedResult = 123 * 456;
|
||||
expect(textContent).toContain(String(expectedResult));
|
||||
|
||||
const finalMessage = agent.state.messages[agent.state.messages.length - 1];
|
||||
if (finalMessage.role !== "assistant")
|
||||
throw new Error("Expected final assistant message");
|
||||
const finalText = finalMessage.content.find((c) => c.type === "text");
|
||||
expect(finalText).toBeDefined();
|
||||
if (finalText?.type !== "text") throw new Error("Expected text content");
|
||||
// Check for number with or without comma formatting
|
||||
const hasNumber =
|
||||
finalText.text.includes(String(expectedResult)) ||
|
||||
finalText.text.includes("56,088") ||
|
||||
finalText.text.includes("56088");
|
||||
expect(hasNumber).toBe(true);
|
||||
}
|
||||
|
||||
async function abortExecution(model: Model<any>) {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt: "You are a helpful assistant.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [calculateTool],
|
||||
},
|
||||
});
|
||||
|
||||
const promptPromise = agent.prompt(
|
||||
"Calculate 100 * 200, then 300 * 400, then sum the results.",
|
||||
);
|
||||
|
||||
setTimeout(() => {
|
||||
agent.abort();
|
||||
}, 100);
|
||||
|
||||
await promptPromise;
|
||||
|
||||
expect(agent.state.isStreaming).toBe(false);
|
||||
expect(agent.state.messages.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
const lastMessage = agent.state.messages[agent.state.messages.length - 1];
|
||||
if (lastMessage.role !== "assistant")
|
||||
throw new Error("Expected assistant message");
|
||||
expect(lastMessage.stopReason).toBe("aborted");
|
||||
expect(lastMessage.errorMessage).toBeDefined();
|
||||
expect(agent.state.error).toBeDefined();
|
||||
expect(agent.state.error).toBe(lastMessage.errorMessage);
|
||||
}
|
||||
|
||||
async function stateUpdates(model: Model<any>) {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt: "You are a helpful assistant.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [],
|
||||
},
|
||||
});
|
||||
|
||||
const events: Array<string> = [];
|
||||
|
||||
agent.subscribe((event) => {
|
||||
events.push(event.type);
|
||||
});
|
||||
|
||||
await agent.prompt("Count from 1 to 5.");
|
||||
|
||||
// Should have received lifecycle events
|
||||
expect(events).toContain("agent_start");
|
||||
expect(events).toContain("agent_end");
|
||||
expect(events).toContain("message_start");
|
||||
expect(events).toContain("message_end");
|
||||
// May have message_update events during streaming
|
||||
const hasMessageUpdates = events.some((e) => e === "message_update");
|
||||
expect(hasMessageUpdates).toBe(true);
|
||||
|
||||
// Check final state
|
||||
expect(agent.state.isStreaming).toBe(false);
|
||||
expect(agent.state.messages.length).toBe(2); // User message + assistant response
|
||||
}
|
||||
|
||||
async function multiTurnConversation(model: Model<any>) {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt: "You are a helpful assistant.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [],
|
||||
},
|
||||
});
|
||||
|
||||
await agent.prompt("My name is Alice.");
|
||||
expect(agent.state.messages.length).toBe(2);
|
||||
|
||||
await agent.prompt("What is my name?");
|
||||
expect(agent.state.messages.length).toBe(4);
|
||||
|
||||
const lastMessage = agent.state.messages[3];
|
||||
if (lastMessage.role !== "assistant")
|
||||
throw new Error("Expected assistant message");
|
||||
const lastText = lastMessage.content.find((c) => c.type === "text");
|
||||
if (lastText?.type !== "text") throw new Error("Expected text content");
|
||||
expect(lastText.text.toLowerCase()).toContain("alice");
|
||||
}
|
||||
|
||||
describe("Agent E2E Tests", () => {
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)(
|
||||
"Google Provider (gemini-2.5-flash)",
|
||||
() => {
|
||||
const model = getModel("google", "gemini-2.5-flash");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)(
|
||||
"OpenAI Provider (gpt-4o-mini)",
|
||||
() => {
|
||||
const model = getModel("openai", "gpt-4o-mini");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
|
||||
"Anthropic Provider (claude-haiku-4-5)",
|
||||
() => {
|
||||
const model = getModel("anthropic", "claude-haiku-4-5");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-3)", () => {
|
||||
const model = getModel("xai", "grok-3");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)(
|
||||
"Groq Provider (openai/gpt-oss-20b)",
|
||||
() => {
|
||||
const model = getModel("groq", "openai/gpt-oss-20b");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)(
|
||||
"Cerebras Provider (gpt-oss-120b)",
|
||||
() => {
|
||||
const model = getModel("cerebras", "gpt-oss-120b");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)(
|
||||
"zAI Provider (glm-4.5-air)",
|
||||
() => {
|
||||
const model = getModel("zai", "glm-4.5-air");
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!hasBedrockCredentials())(
|
||||
"Amazon Bedrock Provider (claude-sonnet-4-5)",
|
||||
() => {
|
||||
const model = getModel(
|
||||
"amazon-bedrock",
|
||||
"global.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
);
|
||||
|
||||
it("should handle basic text prompt", async () => {
|
||||
await basicPrompt(model);
|
||||
});
|
||||
|
||||
it("should execute tools correctly", async () => {
|
||||
await toolExecution(model);
|
||||
});
|
||||
|
||||
it("should handle abort during execution", async () => {
|
||||
await abortExecution(model);
|
||||
});
|
||||
|
||||
it("should emit state updates during streaming", async () => {
|
||||
await stateUpdates(model);
|
||||
});
|
||||
|
||||
it("should maintain context across multiple turns", async () => {
|
||||
await multiTurnConversation(model);
|
||||
});
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
describe("Agent.continue()", () => {
|
||||
describe("validation", () => {
|
||||
it("should throw when no messages in context", async () => {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt: "Test",
|
||||
model: getModel("anthropic", "claude-haiku-4-5"),
|
||||
},
|
||||
});
|
||||
|
||||
await expect(agent.continue()).rejects.toThrow(
|
||||
"No messages to continue from",
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw when last message is assistant", async () => {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt: "Test",
|
||||
model: getModel("anthropic", "claude-haiku-4-5"),
|
||||
},
|
||||
});
|
||||
|
||||
const assistantMessage: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "Hello" }],
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
model: "claude-haiku-4-5",
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
agent.replaceMessages([assistantMessage]);
|
||||
|
||||
await expect(agent.continue()).rejects.toThrow(
|
||||
"Cannot continue from message role: assistant",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
|
||||
"continue from user message",
|
||||
() => {
|
||||
const model = getModel("anthropic", "claude-haiku-4-5");
|
||||
|
||||
it("should continue and get response when last message is user", async () => {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt:
|
||||
"You are a helpful assistant. Follow instructions exactly.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [],
|
||||
},
|
||||
});
|
||||
|
||||
// Manually add a user message without calling prompt()
|
||||
const userMessage: UserMessage = {
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "Say exactly: HELLO WORLD" }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
agent.replaceMessages([userMessage]);
|
||||
|
||||
// Continue from the user message
|
||||
await agent.continue();
|
||||
|
||||
expect(agent.state.isStreaming).toBe(false);
|
||||
expect(agent.state.messages.length).toBe(2);
|
||||
expect(agent.state.messages[0].role).toBe("user");
|
||||
expect(agent.state.messages[1].role).toBe("assistant");
|
||||
|
||||
const assistantMsg = agent.state.messages[1] as AssistantMessage;
|
||||
const textContent = assistantMsg.content.find((c) => c.type === "text");
|
||||
expect(textContent).toBeDefined();
|
||||
if (textContent?.type === "text") {
|
||||
expect(textContent.text.toUpperCase()).toContain("HELLO WORLD");
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
|
||||
"continue from tool result",
|
||||
() => {
|
||||
const model = getModel("anthropic", "claude-haiku-4-5");
|
||||
|
||||
it("should continue and process tool results", async () => {
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
systemPrompt:
|
||||
"You are a helpful assistant. After getting a calculation result, state the answer clearly.",
|
||||
model,
|
||||
thinkingLevel: "off",
|
||||
tools: [calculateTool],
|
||||
},
|
||||
});
|
||||
|
||||
// Set up a conversation state as if tool was just executed
|
||||
const userMessage: UserMessage = {
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "What is 5 + 3?" }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
const assistantMessage: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "Let me calculate that." },
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "calc-1",
|
||||
name: "calculate",
|
||||
arguments: { expression: "5 + 3" },
|
||||
},
|
||||
],
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
model: "claude-haiku-4-5",
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
const toolResult: ToolResultMessage = {
|
||||
role: "toolResult",
|
||||
toolCallId: "calc-1",
|
||||
toolName: "calculate",
|
||||
content: [{ type: "text", text: "5 + 3 = 8" }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
agent.replaceMessages([userMessage, assistantMessage, toolResult]);
|
||||
|
||||
// Continue from the tool result
|
||||
await agent.continue();
|
||||
|
||||
expect(agent.state.isStreaming).toBe(false);
|
||||
// Should have added an assistant response
|
||||
expect(agent.state.messages.length).toBeGreaterThanOrEqual(4);
|
||||
|
||||
const lastMessage =
|
||||
agent.state.messages[agent.state.messages.length - 1];
|
||||
expect(lastMessage.role).toBe("assistant");
|
||||
|
||||
if (lastMessage.role === "assistant") {
|
||||
const textContent = lastMessage.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => (c as { type: "text"; text: string }).text)
|
||||
.join(" ");
|
||||
// Should mention 8 in the response
|
||||
expect(textContent).toMatch(/8/);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue