move pi-mono into companion-cloud as apps/companion-os

- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-17 04:02:22 +00:00 · 2026-03-07 09:22:50 -08:00 · 2026-03-07 09:22:50 -08:00 · 0250f72976
commit 0250f72976
579 changed files with 206942 additions and 0 deletions
--- a/packages/agent/test/agent-loop.test.ts
+++ b/packages/agent/test/agent-loop.test.ts
@ -0,0 +1,629 @@
+import {
+  type AssistantMessage,
+  type AssistantMessageEvent,
+  EventStream,
+  type Message,
+  type Model,
+  type UserMessage,
+} from "@mariozechner/pi-ai";
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { agentLoop, agentLoopContinue } from "../src/agent-loop.js";
+import type {
+  AgentContext,
+  AgentEvent,
+  AgentLoopConfig,
+  AgentMessage,
+  AgentTool,
+} from "../src/types.js";
+
+// Mock stream for testing - mimics MockAssistantStream
+class MockAssistantStream extends EventStream<
+  AssistantMessageEvent,
+  AssistantMessage
+> {
+  constructor() {
+    super(
+      (event) => event.type === "done" || event.type === "error",
+      (event) => {
+        if (event.type === "done") return event.message;
+        if (event.type === "error") return event.error;
+        throw new Error("Unexpected event type");
+      },
+    );
+  }
+}
+
+function createUsage() {
+  return {
+    input: 0,
+    output: 0,
+    cacheRead: 0,
+    cacheWrite: 0,
+    totalTokens: 0,
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+  };
+}
+
+function createModel(): Model<"openai-responses"> {
+  return {
+    id: "mock",
+    name: "mock",
+    api: "openai-responses",
+    provider: "openai",
+    baseUrl: "https://example.invalid",
+    reasoning: false,
+    input: ["text"],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: 8192,
+    maxTokens: 2048,
+  };
+}
+
+function createAssistantMessage(
+  content: AssistantMessage["content"],
+  stopReason: AssistantMessage["stopReason"] = "stop",
+): AssistantMessage {
+  return {
+    role: "assistant",
+    content,
+    api: "openai-responses",
+    provider: "openai",
+    model: "mock",
+    usage: createUsage(),
+    stopReason,
+    timestamp: Date.now(),
+  };
+}
+
+function createUserMessage(text: string): UserMessage {
+  return {
+    role: "user",
+    content: text,
+    timestamp: Date.now(),
+  };
+}
+
+// Simple identity converter for tests - just passes through standard messages
+function identityConverter(messages: AgentMessage[]): Message[] {
+  return messages.filter(
+    (m) =>
+      m.role === "user" || m.role === "assistant" || m.role === "toolResult",
+  ) as Message[];
+}
+
+describe("agentLoop with AgentMessage", () => {
+  it("should emit events with AgentMessage types", async () => {
+    const context: AgentContext = {
+      systemPrompt: "You are helpful.",
+      messages: [],
+      tools: [],
+    };
+
+    const userPrompt: AgentMessage = createUserMessage("Hello");
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: identityConverter,
+    };
+
+    const streamFn = () => {
+      const stream = new MockAssistantStream();
+      queueMicrotask(() => {
+        const message = createAssistantMessage([
+          { type: "text", text: "Hi there!" },
+        ]);
+        stream.push({ type: "done", reason: "stop", message });
+      });
+      return stream;
+    };
+
+    const events: AgentEvent[] = [];
+    const stream = agentLoop(
+      [userPrompt],
+      context,
+      config,
+      undefined,
+      streamFn,
+    );
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    const messages = await stream.result();
+
+    // Should have user message and assistant message
+    expect(messages.length).toBe(2);
+    expect(messages[0].role).toBe("user");
+    expect(messages[1].role).toBe("assistant");
+
+    // Verify event sequence
+    const eventTypes = events.map((e) => e.type);
+    expect(eventTypes).toContain("agent_start");
+    expect(eventTypes).toContain("turn_start");
+    expect(eventTypes).toContain("message_start");
+    expect(eventTypes).toContain("message_end");
+    expect(eventTypes).toContain("turn_end");
+    expect(eventTypes).toContain("agent_end");
+  });
+
+  it("should handle custom message types via convertToLlm", async () => {
+    // Create a custom message type
+    interface CustomNotification {
+      role: "notification";
+      text: string;
+      timestamp: number;
+    }
+
+    const notification: CustomNotification = {
+      role: "notification",
+      text: "This is a notification",
+      timestamp: Date.now(),
+    };
+
+    const context: AgentContext = {
+      systemPrompt: "You are helpful.",
+      messages: [notification as unknown as AgentMessage], // Custom message in context
+      tools: [],
+    };
+
+    const userPrompt: AgentMessage = createUserMessage("Hello");
+
+    let convertedMessages: Message[] = [];
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: (messages) => {
+        // Filter out notifications, convert rest
+        convertedMessages = messages
+          .filter((m) => (m as { role: string }).role !== "notification")
+          .filter(
+            (m) =>
+              m.role === "user" ||
+              m.role === "assistant" ||
+              m.role === "toolResult",
+          ) as Message[];
+        return convertedMessages;
+      },
+    };
+
+    const streamFn = () => {
+      const stream = new MockAssistantStream();
+      queueMicrotask(() => {
+        const message = createAssistantMessage([
+          { type: "text", text: "Response" },
+        ]);
+        stream.push({ type: "done", reason: "stop", message });
+      });
+      return stream;
+    };
+
+    const events: AgentEvent[] = [];
+    const stream = agentLoop(
+      [userPrompt],
+      context,
+      config,
+      undefined,
+      streamFn,
+    );
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    // The notification should have been filtered out in convertToLlm
+    expect(convertedMessages.length).toBe(1); // Only user message
+    expect(convertedMessages[0].role).toBe("user");
+  });
+
+  it("should apply transformContext before convertToLlm", async () => {
+    const context: AgentContext = {
+      systemPrompt: "You are helpful.",
+      messages: [
+        createUserMessage("old message 1"),
+        createAssistantMessage([{ type: "text", text: "old response 1" }]),
+        createUserMessage("old message 2"),
+        createAssistantMessage([{ type: "text", text: "old response 2" }]),
+      ],
+      tools: [],
+    };
+
+    const userPrompt: AgentMessage = createUserMessage("new message");
+
+    let transformedMessages: AgentMessage[] = [];
+    let convertedMessages: Message[] = [];
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      transformContext: async (messages) => {
+        // Keep only last 2 messages (prune old ones)
+        transformedMessages = messages.slice(-2);
+        return transformedMessages;
+      },
+      convertToLlm: (messages) => {
+        convertedMessages = messages.filter(
+          (m) =>
+            m.role === "user" ||
+            m.role === "assistant" ||
+            m.role === "toolResult",
+        ) as Message[];
+        return convertedMessages;
+      },
+    };
+
+    const streamFn = () => {
+      const stream = new MockAssistantStream();
+      queueMicrotask(() => {
+        const message = createAssistantMessage([
+          { type: "text", text: "Response" },
+        ]);
+        stream.push({ type: "done", reason: "stop", message });
+      });
+      return stream;
+    };
+
+    const stream = agentLoop(
+      [userPrompt],
+      context,
+      config,
+      undefined,
+      streamFn,
+    );
+
+    for await (const _ of stream) {
+      // consume
+    }
+
+    // transformContext should have been called first, keeping only last 2
+    expect(transformedMessages.length).toBe(2);
+    // Then convertToLlm receives the pruned messages
+    expect(convertedMessages.length).toBe(2);
+  });
+
+  it("should handle tool calls and results", async () => {
+    const toolSchema = Type.Object({ value: Type.String() });
+    const executed: string[] = [];
+    const tool: AgentTool<typeof toolSchema, { value: string }> = {
+      name: "echo",
+      label: "Echo",
+      description: "Echo tool",
+      parameters: toolSchema,
+      async execute(_toolCallId, params) {
+        executed.push(params.value);
+        return {
+          content: [{ type: "text", text: `echoed: ${params.value}` }],
+          details: { value: params.value },
+        };
+      },
+    };
+
+    const context: AgentContext = {
+      systemPrompt: "",
+      messages: [],
+      tools: [tool],
+    };
+
+    const userPrompt: AgentMessage = createUserMessage("echo something");
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: identityConverter,
+    };
+
+    let callIndex = 0;
+    const streamFn = () => {
+      const stream = new MockAssistantStream();
+      queueMicrotask(() => {
+        if (callIndex === 0) {
+          // First call: return tool call
+          const message = createAssistantMessage(
+            [
+              {
+                type: "toolCall",
+                id: "tool-1",
+                name: "echo",
+                arguments: { value: "hello" },
+              },
+            ],
+            "toolUse",
+          );
+          stream.push({ type: "done", reason: "toolUse", message });
+        } else {
+          // Second call: return final response
+          const message = createAssistantMessage([
+            { type: "text", text: "done" },
+          ]);
+          stream.push({ type: "done", reason: "stop", message });
+        }
+        callIndex++;
+      });
+      return stream;
+    };
+
+    const events: AgentEvent[] = [];
+    const stream = agentLoop(
+      [userPrompt],
+      context,
+      config,
+      undefined,
+      streamFn,
+    );
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    // Tool should have been executed
+    expect(executed).toEqual(["hello"]);
+
+    // Should have tool execution events
+    const toolStart = events.find((e) => e.type === "tool_execution_start");
+    const toolEnd = events.find((e) => e.type === "tool_execution_end");
+    expect(toolStart).toBeDefined();
+    expect(toolEnd).toBeDefined();
+    if (toolEnd?.type === "tool_execution_end") {
+      expect(toolEnd.isError).toBe(false);
+    }
+  });
+
+  it("should inject queued messages and skip remaining tool calls", async () => {
+    const toolSchema = Type.Object({ value: Type.String() });
+    const executed: string[] = [];
+    const tool: AgentTool<typeof toolSchema, { value: string }> = {
+      name: "echo",
+      label: "Echo",
+      description: "Echo tool",
+      parameters: toolSchema,
+      async execute(_toolCallId, params) {
+        executed.push(params.value);
+        return {
+          content: [{ type: "text", text: `ok:${params.value}` }],
+          details: { value: params.value },
+        };
+      },
+    };
+
+    const context: AgentContext = {
+      systemPrompt: "",
+      messages: [],
+      tools: [tool],
+    };
+
+    const userPrompt: AgentMessage = createUserMessage("start");
+    const queuedUserMessage: AgentMessage = createUserMessage("interrupt");
+
+    let queuedDelivered = false;
+    let callIndex = 0;
+    let sawInterruptInContext = false;
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: identityConverter,
+      getSteeringMessages: async () => {
+        // Return steering message after first tool executes
+        if (executed.length === 1 && !queuedDelivered) {
+          queuedDelivered = true;
+          return [queuedUserMessage];
+        }
+        return [];
+      },
+    };
+
+    const events: AgentEvent[] = [];
+    const stream = agentLoop(
+      [userPrompt],
+      context,
+      config,
+      undefined,
+      (_model, ctx, _options) => {
+        // Check if interrupt message is in context on second call
+        if (callIndex === 1) {
+          sawInterruptInContext = ctx.messages.some(
+            (m) =>
+              m.role === "user" &&
+              typeof m.content === "string" &&
+              m.content === "interrupt",
+          );
+        }
+
+        const mockStream = new MockAssistantStream();
+        queueMicrotask(() => {
+          if (callIndex === 0) {
+            // First call: return two tool calls
+            const message = createAssistantMessage(
+              [
+                {
+                  type: "toolCall",
+                  id: "tool-1",
+                  name: "echo",
+                  arguments: { value: "first" },
+                },
+                {
+                  type: "toolCall",
+                  id: "tool-2",
+                  name: "echo",
+                  arguments: { value: "second" },
+                },
+              ],
+              "toolUse",
+            );
+            mockStream.push({ type: "done", reason: "toolUse", message });
+          } else {
+            // Second call: return final response
+            const message = createAssistantMessage([
+              { type: "text", text: "done" },
+            ]);
+            mockStream.push({ type: "done", reason: "stop", message });
+          }
+          callIndex++;
+        });
+        return mockStream;
+      },
+    );
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    // Only first tool should have executed
+    expect(executed).toEqual(["first"]);
+
+    // Second tool should be skipped
+    const toolEnds = events.filter(
+      (e): e is Extract<AgentEvent, { type: "tool_execution_end" }> =>
+        e.type === "tool_execution_end",
+    );
+    expect(toolEnds.length).toBe(2);
+    expect(toolEnds[0].isError).toBe(false);
+    expect(toolEnds[1].isError).toBe(true);
+    if (toolEnds[1].result.content[0]?.type === "text") {
+      expect(toolEnds[1].result.content[0].text).toContain(
+        "Skipped due to queued user message",
+      );
+    }
+
+    // Queued message should appear in events
+    const queuedMessageEvent = events.find(
+      (e) =>
+        e.type === "message_start" &&
+        e.message.role === "user" &&
+        typeof e.message.content === "string" &&
+        e.message.content === "interrupt",
+    );
+    expect(queuedMessageEvent).toBeDefined();
+
+    // Interrupt message should be in context when second LLM call is made
+    expect(sawInterruptInContext).toBe(true);
+  });
+});
+
+describe("agentLoopContinue with AgentMessage", () => {
+  it("should throw when context has no messages", () => {
+    const context: AgentContext = {
+      systemPrompt: "You are helpful.",
+      messages: [],
+      tools: [],
+    };
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: identityConverter,
+    };
+
+    expect(() => agentLoopContinue(context, config)).toThrow(
+      "Cannot continue: no messages in context",
+    );
+  });
+
+  it("should continue from existing context without emitting user message events", async () => {
+    const userMessage: AgentMessage = createUserMessage("Hello");
+
+    const context: AgentContext = {
+      systemPrompt: "You are helpful.",
+      messages: [userMessage],
+      tools: [],
+    };
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: identityConverter,
+    };
+
+    const streamFn = () => {
+      const stream = new MockAssistantStream();
+      queueMicrotask(() => {
+        const message = createAssistantMessage([
+          { type: "text", text: "Response" },
+        ]);
+        stream.push({ type: "done", reason: "stop", message });
+      });
+      return stream;
+    };
+
+    const events: AgentEvent[] = [];
+    const stream = agentLoopContinue(context, config, undefined, streamFn);
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    const messages = await stream.result();
+
+    // Should only return the new assistant message (not the existing user message)
+    expect(messages.length).toBe(1);
+    expect(messages[0].role).toBe("assistant");
+
+    // Should NOT have user message events (that's the key difference from agentLoop)
+    const messageEndEvents = events.filter((e) => e.type === "message_end");
+    expect(messageEndEvents.length).toBe(1);
+    expect((messageEndEvents[0] as any).message.role).toBe("assistant");
+  });
+
+  it("should allow custom message types as last message (caller responsibility)", async () => {
+    // Custom message that will be converted to user message by convertToLlm
+    interface CustomMessage {
+      role: "custom";
+      text: string;
+      timestamp: number;
+    }
+
+    const customMessage: CustomMessage = {
+      role: "custom",
+      text: "Hook content",
+      timestamp: Date.now(),
+    };
+
+    const context: AgentContext = {
+      systemPrompt: "You are helpful.",
+      messages: [customMessage as unknown as AgentMessage],
+      tools: [],
+    };
+
+    const config: AgentLoopConfig = {
+      model: createModel(),
+      convertToLlm: (messages) => {
+        // Convert custom to user message
+        return messages
+          .map((m) => {
+            if ((m as any).role === "custom") {
+              return {
+                role: "user" as const,
+                content: (m as any).text,
+                timestamp: m.timestamp,
+              };
+            }
+            return m;
+          })
+          .filter(
+            (m) =>
+              m.role === "user" ||
+              m.role === "assistant" ||
+              m.role === "toolResult",
+          ) as Message[];
+      },
+    };
+
+    const streamFn = () => {
+      const stream = new MockAssistantStream();
+      queueMicrotask(() => {
+        const message = createAssistantMessage([
+          { type: "text", text: "Response to custom message" },
+        ]);
+        stream.push({ type: "done", reason: "stop", message });
+      });
+      return stream;
+    };
+
+    // Should not throw - the custom message will be converted to user message
+    const stream = agentLoopContinue(context, config, undefined, streamFn);
+
+    const events: AgentEvent[] = [];
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    const messages = await stream.result();
+    expect(messages.length).toBe(1);
+    expect(messages[0].role).toBe("assistant");
+  });
+});
--- a/packages/agent/test/agent.test.ts
+++ b/packages/agent/test/agent.test.ts
@ -0,0 +1,383 @@
+import {
+  type AssistantMessage,
+  type AssistantMessageEvent,
+  EventStream,
+  getModel,
+} from "@mariozechner/pi-ai";
+import { describe, expect, it } from "vitest";
+import { Agent } from "../src/index.js";
+
+// Mock stream that mimics AssistantMessageEventStream
+class MockAssistantStream extends EventStream<
+  AssistantMessageEvent,
+  AssistantMessage
+> {
+  constructor() {
+    super(
+      (event) => event.type === "done" || event.type === "error",
+      (event) => {
+        if (event.type === "done") return event.message;
+        if (event.type === "error") return event.error;
+        throw new Error("Unexpected event type");
+      },
+    );
+  }
+}
+
+function createAssistantMessage(text: string): AssistantMessage {
+  return {
+    role: "assistant",
+    content: [{ type: "text", text }],
+    api: "openai-responses",
+    provider: "openai",
+    model: "mock",
+    usage: {
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 0,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "stop",
+    timestamp: Date.now(),
+  };
+}
+
+describe("Agent", () => {
+  it("should create an agent instance with default state", () => {
+    const agent = new Agent();
+
+    expect(agent.state).toBeDefined();
+    expect(agent.state.systemPrompt).toBe("");
+    expect(agent.state.model).toBeDefined();
+    expect(agent.state.thinkingLevel).toBe("off");
+    expect(agent.state.tools).toEqual([]);
+    expect(agent.state.messages).toEqual([]);
+    expect(agent.state.isStreaming).toBe(false);
+    expect(agent.state.streamMessage).toBe(null);
+    expect(agent.state.pendingToolCalls).toEqual(new Set());
+    expect(agent.state.error).toBeUndefined();
+  });
+
+  it("should create an agent instance with custom initial state", () => {
+    const customModel = getModel("openai", "gpt-4o-mini");
+    const agent = new Agent({
+      initialState: {
+        systemPrompt: "You are a helpful assistant.",
+        model: customModel,
+        thinkingLevel: "low",
+      },
+    });
+
+    expect(agent.state.systemPrompt).toBe("You are a helpful assistant.");
+    expect(agent.state.model).toBe(customModel);
+    expect(agent.state.thinkingLevel).toBe("low");
+  });
+
+  it("should subscribe to events", () => {
+    const agent = new Agent();
+
+    let eventCount = 0;
+    const unsubscribe = agent.subscribe((_event) => {
+      eventCount++;
+    });
+
+    // No initial event on subscribe
+    expect(eventCount).toBe(0);
+
+    // State mutators don't emit events
+    agent.setSystemPrompt("Test prompt");
+    expect(eventCount).toBe(0);
+    expect(agent.state.systemPrompt).toBe("Test prompt");
+
+    // Unsubscribe should work
+    unsubscribe();
+    agent.setSystemPrompt("Another prompt");
+    expect(eventCount).toBe(0); // Should not increase
+  });
+
+  it("should update state with mutators", () => {
+    const agent = new Agent();
+
+    // Test setSystemPrompt
+    agent.setSystemPrompt("Custom prompt");
+    expect(agent.state.systemPrompt).toBe("Custom prompt");
+
+    // Test setModel
+    const newModel = getModel("google", "gemini-2.5-flash");
+    agent.setModel(newModel);
+    expect(agent.state.model).toBe(newModel);
+
+    // Test setThinkingLevel
+    agent.setThinkingLevel("high");
+    expect(agent.state.thinkingLevel).toBe("high");
+
+    // Test setTools
+    const tools = [{ name: "test", description: "test tool" } as any];
+    agent.setTools(tools);
+    expect(agent.state.tools).toBe(tools);
+
+    // Test replaceMessages
+    const messages = [
+      { role: "user" as const, content: "Hello", timestamp: Date.now() },
+    ];
+    agent.replaceMessages(messages);
+    expect(agent.state.messages).toEqual(messages);
+    expect(agent.state.messages).not.toBe(messages); // Should be a copy
+
+    // Test appendMessage
+    const newMessage = {
+      role: "assistant" as const,
+      content: [{ type: "text" as const, text: "Hi" }],
+    };
+    agent.appendMessage(newMessage as any);
+    expect(agent.state.messages).toHaveLength(2);
+    expect(agent.state.messages[1]).toBe(newMessage);
+
+    // Test clearMessages
+    agent.clearMessages();
+    expect(agent.state.messages).toEqual([]);
+  });
+
+  it("should support steering message queue", async () => {
+    const agent = new Agent();
+
+    const message = {
+      role: "user" as const,
+      content: "Steering message",
+      timestamp: Date.now(),
+    };
+    agent.steer(message);
+
+    // The message is queued but not yet in state.messages
+    expect(agent.state.messages).not.toContainEqual(message);
+  });
+
+  it("should support follow-up message queue", async () => {
+    const agent = new Agent();
+
+    const message = {
+      role: "user" as const,
+      content: "Follow-up message",
+      timestamp: Date.now(),
+    };
+    agent.followUp(message);
+
+    // The message is queued but not yet in state.messages
+    expect(agent.state.messages).not.toContainEqual(message);
+  });
+
+  it("should handle abort controller", () => {
+    const agent = new Agent();
+
+    // Should not throw even if nothing is running
+    expect(() => agent.abort()).not.toThrow();
+  });
+
+  it("should throw when prompt() called while streaming", async () => {
+    let abortSignal: AbortSignal | undefined;
+    const agent = new Agent({
+      // Use a stream function that responds to abort
+      streamFn: (_model, _context, options) => {
+        abortSignal = options?.signal;
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          stream.push({ type: "start", partial: createAssistantMessage("") });
+          // Check abort signal periodically
+          const checkAbort = () => {
+            if (abortSignal?.aborted) {
+              stream.push({
+                type: "error",
+                reason: "aborted",
+                error: createAssistantMessage("Aborted"),
+              });
+            } else {
+              setTimeout(checkAbort, 5);
+            }
+          };
+          checkAbort();
+        });
+        return stream;
+      },
+    });
+
+    // Start first prompt (don't await, it will block until abort)
+    const firstPrompt = agent.prompt("First message");
+
+    // Wait a tick for isStreaming to be set
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    expect(agent.state.isStreaming).toBe(true);
+
+    // Second prompt should reject
+    await expect(agent.prompt("Second message")).rejects.toThrow(
+      "Agent is already processing a prompt. Use steer() or followUp() to queue messages, or wait for completion.",
+    );
+
+    // Cleanup - abort to stop the stream
+    agent.abort();
+    await firstPrompt.catch(() => {}); // Ignore abort error
+  });
+
+  it("should throw when continue() called while streaming", async () => {
+    let abortSignal: AbortSignal | undefined;
+    const agent = new Agent({
+      streamFn: (_model, _context, options) => {
+        abortSignal = options?.signal;
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          stream.push({ type: "start", partial: createAssistantMessage("") });
+          const checkAbort = () => {
+            if (abortSignal?.aborted) {
+              stream.push({
+                type: "error",
+                reason: "aborted",
+                error: createAssistantMessage("Aborted"),
+              });
+            } else {
+              setTimeout(checkAbort, 5);
+            }
+          };
+          checkAbort();
+        });
+        return stream;
+      },
+    });
+
+    // Start first prompt
+    const firstPrompt = agent.prompt("First message");
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    expect(agent.state.isStreaming).toBe(true);
+
+    // continue() should reject
+    await expect(agent.continue()).rejects.toThrow(
+      "Agent is already processing. Wait for completion before continuing.",
+    );
+
+    // Cleanup
+    agent.abort();
+    await firstPrompt.catch(() => {});
+  });
+
+  it("continue() should process queued follow-up messages after an assistant turn", async () => {
+    const agent = new Agent({
+      streamFn: () => {
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          stream.push({
+            type: "done",
+            reason: "stop",
+            message: createAssistantMessage("Processed"),
+          });
+        });
+        return stream;
+      },
+    });
+
+    agent.replaceMessages([
+      {
+        role: "user",
+        content: [{ type: "text", text: "Initial" }],
+        timestamp: Date.now() - 10,
+      },
+      createAssistantMessage("Initial response"),
+    ]);
+
+    agent.followUp({
+      role: "user",
+      content: [{ type: "text", text: "Queued follow-up" }],
+      timestamp: Date.now(),
+    });
+
+    await expect(agent.continue()).resolves.toBeUndefined();
+
+    const hasQueuedFollowUp = agent.state.messages.some((message) => {
+      if (message.role !== "user") return false;
+      if (typeof message.content === "string")
+        return message.content === "Queued follow-up";
+      return message.content.some(
+        (part) => part.type === "text" && part.text === "Queued follow-up",
+      );
+    });
+
+    expect(hasQueuedFollowUp).toBe(true);
+    expect(agent.state.messages[agent.state.messages.length - 1].role).toBe(
+      "assistant",
+    );
+  });
+
+  it("continue() should keep one-at-a-time steering semantics from assistant tail", async () => {
+    let responseCount = 0;
+    const agent = new Agent({
+      streamFn: () => {
+        const stream = new MockAssistantStream();
+        responseCount++;
+        queueMicrotask(() => {
+          stream.push({
+            type: "done",
+            reason: "stop",
+            message: createAssistantMessage(`Processed ${responseCount}`),
+          });
+        });
+        return stream;
+      },
+    });
+
+    agent.replaceMessages([
+      {
+        role: "user",
+        content: [{ type: "text", text: "Initial" }],
+        timestamp: Date.now() - 10,
+      },
+      createAssistantMessage("Initial response"),
+    ]);
+
+    agent.steer({
+      role: "user",
+      content: [{ type: "text", text: "Steering 1" }],
+      timestamp: Date.now(),
+    });
+    agent.steer({
+      role: "user",
+      content: [{ type: "text", text: "Steering 2" }],
+      timestamp: Date.now() + 1,
+    });
+
+    await expect(agent.continue()).resolves.toBeUndefined();
+
+    const recentMessages = agent.state.messages.slice(-4);
+    expect(recentMessages.map((m) => m.role)).toEqual([
+      "user",
+      "assistant",
+      "user",
+      "assistant",
+    ]);
+    expect(responseCount).toBe(2);
+  });
+
+  it("forwards sessionId to streamFn options", async () => {
+    let receivedSessionId: string | undefined;
+    const agent = new Agent({
+      sessionId: "session-abc",
+      streamFn: (_model, _context, options) => {
+        receivedSessionId = options?.sessionId;
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          const message = createAssistantMessage("ok");
+          stream.push({ type: "done", reason: "stop", message });
+        });
+        return stream;
+      },
+    });
+
+    await agent.prompt("hello");
+    expect(receivedSessionId).toBe("session-abc");
+
+    // Test setter
+    agent.sessionId = "session-def";
+    expect(agent.sessionId).toBe("session-def");
+
+    await agent.prompt("hello again");
+    expect(receivedSessionId).toBe("session-def");
+  });
+});
--- a/packages/agent/test/bedrock-models.test.ts
+++ b/packages/agent/test/bedrock-models.test.ts
@ -0,0 +1,316 @@
+/**
+ * A test suite to ensure Amazon Bedrock models work correctly with the agent loop.
+ *
+ * Some Bedrock models don't support all features (e.g., reasoning signatures).
+ * This test suite verifies that the agent loop works with various Bedrock models.
+ *
+ * This test suite is not enabled by default unless AWS credentials and
+ * `BEDROCK_EXTENSIVE_MODEL_TEST` environment variables are set.
+ *
+ * You can run this test suite with:
+ * ```bash
+ * $ AWS_REGION=us-east-1 BEDROCK_EXTENSIVE_MODEL_TEST=1 AWS_PROFILE=pi npm test -- ./test/bedrock-models.test.ts
+ * ```
+ *
+ * ## Known Issues by Category
+ *
+ * 1. **Inference Profile Required**: Some models require an inference profile ARN instead of on-demand.
+ * 2. **Invalid Model ID**: Model identifiers that don't exist in the current region.
+ * 3. **Max Tokens Exceeded**: Model's maxTokens in our config exceeds the actual limit.
+ * 4. **No Reasoning in User Messages**: Model rejects reasoning content when replayed in conversation.
+ * 5. **Invalid Signature Format**: Model validates signature format (Anthropic newer models).
+ */
+
+import type { AssistantMessage } from "@mariozechner/pi-ai";
+import { getModels } from "@mariozechner/pi-ai";
+import { describe, expect, it } from "vitest";
+import { Agent } from "../src/index.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+
+// =============================================================================
+// Known Issue Categories
+// =============================================================================
+
+/** Models that require inference profile ARN (not available on-demand in us-east-1) */
+const REQUIRES_INFERENCE_PROFILE = new Set([
+  "anthropic.claude-3-5-haiku-20241022-v1:0",
+  "anthropic.claude-3-5-sonnet-20241022-v2:0",
+  "anthropic.claude-3-opus-20240229-v1:0",
+  "meta.llama3-1-70b-instruct-v1:0",
+  "meta.llama3-1-8b-instruct-v1:0",
+]);
+
+/** Models with invalid identifiers (not available in us-east-1 or don't exist) */
+const INVALID_MODEL_ID = new Set([
+  "deepseek.v3-v1:0",
+  "eu.anthropic.claude-haiku-4-5-20251001-v1:0",
+  "eu.anthropic.claude-opus-4-5-20251101-v1:0",
+  "eu.anthropic.claude-sonnet-4-5-20250929-v1:0",
+  "qwen.qwen3-235b-a22b-2507-v1:0",
+  "qwen.qwen3-coder-480b-a35b-v1:0",
+]);
+
+/** Models where our maxTokens config exceeds the model's actual limit */
+const MAX_TOKENS_EXCEEDED = new Set([
+  "us.meta.llama4-maverick-17b-instruct-v1:0",
+  "us.meta.llama4-scout-17b-instruct-v1:0",
+]);
+
+/**
+ * Models that reject reasoning content in user messages (when replaying conversation).
+ * These work for multi-turn but fail when synthetic thinking is injected.
+ */
+const NO_REASONING_IN_USER_MESSAGES = new Set([
+  // Mistral models
+  "mistral.ministral-3-14b-instruct",
+  "mistral.ministral-3-8b-instruct",
+  "mistral.mistral-large-2402-v1:0",
+  "mistral.voxtral-mini-3b-2507",
+  "mistral.voxtral-small-24b-2507",
+  // Nvidia models
+  "nvidia.nemotron-nano-12b-v2",
+  "nvidia.nemotron-nano-9b-v2",
+  // Qwen models
+  "qwen.qwen3-coder-30b-a3b-v1:0",
+  // Amazon Nova models
+  "us.amazon.nova-lite-v1:0",
+  "us.amazon.nova-micro-v1:0",
+  "us.amazon.nova-premier-v1:0",
+  "us.amazon.nova-pro-v1:0",
+  // Meta Llama models
+  "us.meta.llama3-2-11b-instruct-v1:0",
+  "us.meta.llama3-2-1b-instruct-v1:0",
+  "us.meta.llama3-2-3b-instruct-v1:0",
+  "us.meta.llama3-2-90b-instruct-v1:0",
+  "us.meta.llama3-3-70b-instruct-v1:0",
+  // DeepSeek
+  "us.deepseek.r1-v1:0",
+  // Older Anthropic models
+  "anthropic.claude-3-5-sonnet-20240620-v1:0",
+  "anthropic.claude-3-haiku-20240307-v1:0",
+  "anthropic.claude-3-sonnet-20240229-v1:0",
+  // Cohere models
+  "cohere.command-r-plus-v1:0",
+  "cohere.command-r-v1:0",
+  // Google models
+  "google.gemma-3-27b-it",
+  "google.gemma-3-4b-it",
+  // Non-Anthropic models that don't support signatures (now handled by omitting signature)
+  // but still reject reasoning content in user messages
+  "global.amazon.nova-2-lite-v1:0",
+  "minimax.minimax-m2",
+  "moonshot.kimi-k2-thinking",
+  "openai.gpt-oss-120b-1:0",
+  "openai.gpt-oss-20b-1:0",
+  "openai.gpt-oss-safeguard-120b",
+  "openai.gpt-oss-safeguard-20b",
+  "qwen.qwen3-32b-v1:0",
+  "qwen.qwen3-next-80b-a3b",
+  "qwen.qwen3-vl-235b-a22b",
+]);
+
+/**
+ * Models that validate signature format (Anthropic newer models).
+ * These work for multi-turn but fail when synthetic/invalid signature is injected.
+ */
+const VALIDATES_SIGNATURE_FORMAT = new Set([
+  "global.anthropic.claude-haiku-4-5-20251001-v1:0",
+  "global.anthropic.claude-opus-4-5-20251101-v1:0",
+  "global.anthropic.claude-sonnet-4-20250514-v1:0",
+  "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+  "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+  "us.anthropic.claude-opus-4-1-20250805-v1:0",
+  "us.anthropic.claude-opus-4-20250514-v1:0",
+]);
+
+/**
+ * DeepSeek R1 fails multi-turn because it rejects reasoning in the replayed assistant message.
+ */
+const REJECTS_REASONING_ON_REPLAY = new Set(["us.deepseek.r1-v1:0"]);
+
+// =============================================================================
+// Helper Functions
+// =============================================================================
+
+function isModelUnavailable(modelId: string): boolean {
+  return (
+    REQUIRES_INFERENCE_PROFILE.has(modelId) ||
+    INVALID_MODEL_ID.has(modelId) ||
+    MAX_TOKENS_EXCEEDED.has(modelId)
+  );
+}
+
+function failsMultiTurnWithThinking(modelId: string): boolean {
+  return REJECTS_REASONING_ON_REPLAY.has(modelId);
+}
+
+function failsSyntheticSignature(modelId: string): boolean {
+  return (
+    NO_REASONING_IN_USER_MESSAGES.has(modelId) ||
+    VALIDATES_SIGNATURE_FORMAT.has(modelId)
+  );
+}
+
+// =============================================================================
+// Tests
+// =============================================================================
+
+describe("Amazon Bedrock Models - Agent Loop", () => {
+  const shouldRunExtensiveTests =
+    hasBedrockCredentials() && process.env.BEDROCK_EXTENSIVE_MODEL_TEST;
+
+  // Get all Amazon Bedrock models
+  const allBedrockModels = getModels("amazon-bedrock");
+
+  if (shouldRunExtensiveTests) {
+    for (const model of allBedrockModels) {
+      const modelId = model.id;
+
+      describe(`Model: ${modelId}`, () => {
+        // Skip entirely unavailable models
+        const unavailable = isModelUnavailable(modelId);
+
+        it.skipIf(unavailable)(
+          "should handle basic text prompt",
+          { timeout: 60_000 },
+          async () => {
+            const agent = new Agent({
+              initialState: {
+                systemPrompt:
+                  "You are a helpful assistant. Be extremely concise.",
+                model,
+                thinkingLevel: "off",
+                tools: [],
+              },
+            });
+
+            await agent.prompt("Reply with exactly: 'OK'");
+
+            if (agent.state.error) {
+              throw new Error(`Basic prompt error: ${agent.state.error}`);
+            }
+
+            expect(agent.state.isStreaming).toBe(false);
+            expect(agent.state.messages.length).toBe(2);
+
+            const assistantMessage = agent.state.messages[1];
+            if (assistantMessage.role !== "assistant")
+              throw new Error("Expected assistant message");
+
+            console.log(`${modelId}: OK`);
+          },
+        );
+
+        // Skip if model is unavailable or known to fail multi-turn with thinking
+        const skipMultiTurn =
+          unavailable || failsMultiTurnWithThinking(modelId);
+
+        it.skipIf(skipMultiTurn)(
+          "should handle multi-turn conversation with thinking content in history",
+          { timeout: 120_000 },
+          async () => {
+            const agent = new Agent({
+              initialState: {
+                systemPrompt:
+                  "You are a helpful assistant. Be extremely concise.",
+                model,
+                thinkingLevel: "medium",
+                tools: [],
+              },
+            });
+
+            // First turn
+            await agent.prompt("My name is Alice.");
+
+            if (agent.state.error) {
+              throw new Error(`First turn error: ${agent.state.error}`);
+            }
+
+            // Second turn - this should replay the first assistant message which may contain thinking
+            await agent.prompt("What is my name?");
+
+            if (agent.state.error) {
+              throw new Error(`Second turn error: ${agent.state.error}`);
+            }
+
+            expect(agent.state.messages.length).toBe(4);
+            console.log(`${modelId}: multi-turn OK`);
+          },
+        );
+
+        // Skip if model is unavailable or known to fail synthetic signature
+        const skipSynthetic = unavailable || failsSyntheticSignature(modelId);
+
+        it.skipIf(skipSynthetic)(
+          "should handle conversation with synthetic thinking signature in history",
+          { timeout: 60_000 },
+          async () => {
+            const agent = new Agent({
+              initialState: {
+                systemPrompt:
+                  "You are a helpful assistant. Be extremely concise.",
+                model,
+                thinkingLevel: "off",
+                tools: [],
+              },
+            });
+
+            // Inject a message with a thinking block that has a signature
+            const syntheticAssistantMessage: AssistantMessage = {
+              role: "assistant",
+              content: [
+                {
+                  type: "thinking",
+                  thinking: "I need to remember the user's name.",
+                  thinkingSignature: "synthetic-signature-123",
+                },
+                { type: "text", text: "Nice to meet you, Alice!" },
+              ],
+              api: "bedrock-converse-stream",
+              provider: "amazon-bedrock",
+              model: modelId,
+              usage: {
+                input: 10,
+                output: 20,
+                cacheRead: 0,
+                cacheWrite: 0,
+                totalTokens: 30,
+                cost: {
+                  input: 0,
+                  output: 0,
+                  cacheRead: 0,
+                  cacheWrite: 0,
+                  total: 0,
+                },
+              },
+              stopReason: "stop",
+              timestamp: Date.now(),
+            };
+
+            agent.replaceMessages([
+              {
+                role: "user",
+                content: "My name is Alice.",
+                timestamp: Date.now(),
+              },
+              syntheticAssistantMessage,
+            ]);
+
+            await agent.prompt("What is my name?");
+
+            if (agent.state.error) {
+              throw new Error(
+                `Synthetic signature error: ${agent.state.error}`,
+              );
+            }
+
+            expect(agent.state.messages.length).toBe(4);
+            console.log(`${modelId}: synthetic signature OK`);
+          },
+        );
+      });
+    }
+  } else {
+    it.skip("skipped - set AWS credentials and BEDROCK_EXTENSIVE_MODEL_TEST=1 to run", () => {});
+  }
+});
--- a/packages/agent/test/bedrock-utils.ts
+++ b/packages/agent/test/bedrock-utils.ts
@ -0,0 +1,18 @@
+/**
+ * Utility functions for Amazon Bedrock tests
+ */
+
+/**
+ * Check if any valid AWS credentials are configured for Bedrock.
+ * Returns true if any of the following are set:
+ * - AWS_PROFILE (named profile from ~/.aws/credentials)
+ * - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM keys)
+ * - AWS_BEARER_TOKEN_BEDROCK (Bedrock API key)
+ */
+export function hasBedrockCredentials(): boolean {
+  return !!(
+    process.env.AWS_PROFILE ||
+    (process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
+    process.env.AWS_BEARER_TOKEN_BEDROCK
+  );
+}
--- a/packages/agent/test/e2e.test.ts
+++ b/packages/agent/test/e2e.test.ts
@ -0,0 +1,571 @@
+import type {
+  AssistantMessage,
+  Model,
+  ToolResultMessage,
+  UserMessage,
+} from "@mariozechner/pi-ai";
+import { getModel } from "@mariozechner/pi-ai";
+import { describe, expect, it } from "vitest";
+import { Agent } from "../src/index.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+import { calculateTool } from "./utils/calculate.js";
+
+async function basicPrompt(model: Model<any>) {
+  const agent = new Agent({
+    initialState: {
+      systemPrompt: "You are a helpful assistant. Keep your responses concise.",
+      model,
+      thinkingLevel: "off",
+      tools: [],
+    },
+  });
+
+  await agent.prompt("What is 2+2? Answer with just the number.");
+
+  expect(agent.state.isStreaming).toBe(false);
+  expect(agent.state.messages.length).toBe(2);
+  expect(agent.state.messages[0].role).toBe("user");
+  expect(agent.state.messages[1].role).toBe("assistant");
+
+  const assistantMessage = agent.state.messages[1];
+  if (assistantMessage.role !== "assistant")
+    throw new Error("Expected assistant message");
+  expect(assistantMessage.content.length).toBeGreaterThan(0);
+
+  const textContent = assistantMessage.content.find((c) => c.type === "text");
+  expect(textContent).toBeDefined();
+  if (textContent?.type !== "text") throw new Error("Expected text content");
+  expect(textContent.text).toContain("4");
+}
+
+async function toolExecution(model: Model<any>) {
+  const agent = new Agent({
+    initialState: {
+      systemPrompt:
+        "You are a helpful assistant. Always use the calculator tool for math.",
+      model,
+      thinkingLevel: "off",
+      tools: [calculateTool],
+    },
+  });
+
+  await agent.prompt("Calculate 123 * 456 using the calculator tool.");
+
+  expect(agent.state.isStreaming).toBe(false);
+  expect(agent.state.messages.length).toBeGreaterThanOrEqual(3);
+
+  const toolResultMsg = agent.state.messages.find(
+    (m) => m.role === "toolResult",
+  );
+  expect(toolResultMsg).toBeDefined();
+  if (toolResultMsg?.role !== "toolResult")
+    throw new Error("Expected tool result message");
+  const textContent =
+    toolResultMsg.content
+      ?.filter((c) => c.type === "text")
+      .map((c: any) => c.text)
+      .join("\n") || "";
+  expect(textContent).toBeDefined();
+
+  const expectedResult = 123 * 456;
+  expect(textContent).toContain(String(expectedResult));
+
+  const finalMessage = agent.state.messages[agent.state.messages.length - 1];
+  if (finalMessage.role !== "assistant")
+    throw new Error("Expected final assistant message");
+  const finalText = finalMessage.content.find((c) => c.type === "text");
+  expect(finalText).toBeDefined();
+  if (finalText?.type !== "text") throw new Error("Expected text content");
+  // Check for number with or without comma formatting
+  const hasNumber =
+    finalText.text.includes(String(expectedResult)) ||
+    finalText.text.includes("56,088") ||
+    finalText.text.includes("56088");
+  expect(hasNumber).toBe(true);
+}
+
+async function abortExecution(model: Model<any>) {
+  const agent = new Agent({
+    initialState: {
+      systemPrompt: "You are a helpful assistant.",
+      model,
+      thinkingLevel: "off",
+      tools: [calculateTool],
+    },
+  });
+
+  const promptPromise = agent.prompt(
+    "Calculate 100 * 200, then 300 * 400, then sum the results.",
+  );
+
+  setTimeout(() => {
+    agent.abort();
+  }, 100);
+
+  await promptPromise;
+
+  expect(agent.state.isStreaming).toBe(false);
+  expect(agent.state.messages.length).toBeGreaterThanOrEqual(2);
+
+  const lastMessage = agent.state.messages[agent.state.messages.length - 1];
+  if (lastMessage.role !== "assistant")
+    throw new Error("Expected assistant message");
+  expect(lastMessage.stopReason).toBe("aborted");
+  expect(lastMessage.errorMessage).toBeDefined();
+  expect(agent.state.error).toBeDefined();
+  expect(agent.state.error).toBe(lastMessage.errorMessage);
+}
+
+async function stateUpdates(model: Model<any>) {
+  const agent = new Agent({
+    initialState: {
+      systemPrompt: "You are a helpful assistant.",
+      model,
+      thinkingLevel: "off",
+      tools: [],
+    },
+  });
+
+  const events: Array<string> = [];
+
+  agent.subscribe((event) => {
+    events.push(event.type);
+  });
+
+  await agent.prompt("Count from 1 to 5.");
+
+  // Should have received lifecycle events
+  expect(events).toContain("agent_start");
+  expect(events).toContain("agent_end");
+  expect(events).toContain("message_start");
+  expect(events).toContain("message_end");
+  // May have message_update events during streaming
+  const hasMessageUpdates = events.some((e) => e === "message_update");
+  expect(hasMessageUpdates).toBe(true);
+
+  // Check final state
+  expect(agent.state.isStreaming).toBe(false);
+  expect(agent.state.messages.length).toBe(2); // User message + assistant response
+}
+
+async function multiTurnConversation(model: Model<any>) {
+  const agent = new Agent({
+    initialState: {
+      systemPrompt: "You are a helpful assistant.",
+      model,
+      thinkingLevel: "off",
+      tools: [],
+    },
+  });
+
+  await agent.prompt("My name is Alice.");
+  expect(agent.state.messages.length).toBe(2);
+
+  await agent.prompt("What is my name?");
+  expect(agent.state.messages.length).toBe(4);
+
+  const lastMessage = agent.state.messages[3];
+  if (lastMessage.role !== "assistant")
+    throw new Error("Expected assistant message");
+  const lastText = lastMessage.content.find((c) => c.type === "text");
+  if (lastText?.type !== "text") throw new Error("Expected text content");
+  expect(lastText.text.toLowerCase()).toContain("alice");
+}
+
+describe("Agent E2E Tests", () => {
+  describe.skipIf(!process.env.GEMINI_API_KEY)(
+    "Google Provider (gemini-2.5-flash)",
+    () => {
+      const model = getModel("google", "gemini-2.5-flash");
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.OPENAI_API_KEY)(
+    "OpenAI Provider (gpt-4o-mini)",
+    () => {
+      const model = getModel("openai", "gpt-4o-mini");
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
+    "Anthropic Provider (claude-haiku-4-5)",
+    () => {
+      const model = getModel("anthropic", "claude-haiku-4-5");
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-3)", () => {
+    const model = getModel("xai", "grok-3");
+
+    it("should handle basic text prompt", async () => {
+      await basicPrompt(model);
+    });
+
+    it("should execute tools correctly", async () => {
+      await toolExecution(model);
+    });
+
+    it("should handle abort during execution", async () => {
+      await abortExecution(model);
+    });
+
+    it("should emit state updates during streaming", async () => {
+      await stateUpdates(model);
+    });
+
+    it("should maintain context across multiple turns", async () => {
+      await multiTurnConversation(model);
+    });
+  });
+
+  describe.skipIf(!process.env.GROQ_API_KEY)(
+    "Groq Provider (openai/gpt-oss-20b)",
+    () => {
+      const model = getModel("groq", "openai/gpt-oss-20b");
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.CEREBRAS_API_KEY)(
+    "Cerebras Provider (gpt-oss-120b)",
+    () => {
+      const model = getModel("cerebras", "gpt-oss-120b");
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.ZAI_API_KEY)(
+    "zAI Provider (glm-4.5-air)",
+    () => {
+      const model = getModel("zai", "glm-4.5-air");
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+
+  describe.skipIf(!hasBedrockCredentials())(
+    "Amazon Bedrock Provider (claude-sonnet-4-5)",
+    () => {
+      const model = getModel(
+        "amazon-bedrock",
+        "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+      );
+
+      it("should handle basic text prompt", async () => {
+        await basicPrompt(model);
+      });
+
+      it("should execute tools correctly", async () => {
+        await toolExecution(model);
+      });
+
+      it("should handle abort during execution", async () => {
+        await abortExecution(model);
+      });
+
+      it("should emit state updates during streaming", async () => {
+        await stateUpdates(model);
+      });
+
+      it("should maintain context across multiple turns", async () => {
+        await multiTurnConversation(model);
+      });
+    },
+  );
+});
+
+describe("Agent.continue()", () => {
+  describe("validation", () => {
+    it("should throw when no messages in context", async () => {
+      const agent = new Agent({
+        initialState: {
+          systemPrompt: "Test",
+          model: getModel("anthropic", "claude-haiku-4-5"),
+        },
+      });
+
+      await expect(agent.continue()).rejects.toThrow(
+        "No messages to continue from",
+      );
+    });
+
+    it("should throw when last message is assistant", async () => {
+      const agent = new Agent({
+        initialState: {
+          systemPrompt: "Test",
+          model: getModel("anthropic", "claude-haiku-4-5"),
+        },
+      });
+
+      const assistantMessage: AssistantMessage = {
+        role: "assistant",
+        content: [{ type: "text", text: "Hello" }],
+        api: "anthropic-messages",
+        provider: "anthropic",
+        model: "claude-haiku-4-5",
+        usage: {
+          input: 0,
+          output: 0,
+          cacheRead: 0,
+          cacheWrite: 0,
+          totalTokens: 0,
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+        },
+        stopReason: "stop",
+        timestamp: Date.now(),
+      };
+      agent.replaceMessages([assistantMessage]);
+
+      await expect(agent.continue()).rejects.toThrow(
+        "Cannot continue from message role: assistant",
+      );
+    });
+  });
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
+    "continue from user message",
+    () => {
+      const model = getModel("anthropic", "claude-haiku-4-5");
+
+      it("should continue and get response when last message is user", async () => {
+        const agent = new Agent({
+          initialState: {
+            systemPrompt:
+              "You are a helpful assistant. Follow instructions exactly.",
+            model,
+            thinkingLevel: "off",
+            tools: [],
+          },
+        });
+
+        // Manually add a user message without calling prompt()
+        const userMessage: UserMessage = {
+          role: "user",
+          content: [{ type: "text", text: "Say exactly: HELLO WORLD" }],
+          timestamp: Date.now(),
+        };
+        agent.replaceMessages([userMessage]);
+
+        // Continue from the user message
+        await agent.continue();
+
+        expect(agent.state.isStreaming).toBe(false);
+        expect(agent.state.messages.length).toBe(2);
+        expect(agent.state.messages[0].role).toBe("user");
+        expect(agent.state.messages[1].role).toBe("assistant");
+
+        const assistantMsg = agent.state.messages[1] as AssistantMessage;
+        const textContent = assistantMsg.content.find((c) => c.type === "text");
+        expect(textContent).toBeDefined();
+        if (textContent?.type === "text") {
+          expect(textContent.text.toUpperCase()).toContain("HELLO WORLD");
+        }
+      });
+    },
+  );
+
+  describe.skipIf(!process.env.ANTHROPIC_API_KEY)(
+    "continue from tool result",
+    () => {
+      const model = getModel("anthropic", "claude-haiku-4-5");
+
+      it("should continue and process tool results", async () => {
+        const agent = new Agent({
+          initialState: {
+            systemPrompt:
+              "You are a helpful assistant. After getting a calculation result, state the answer clearly.",
+            model,
+            thinkingLevel: "off",
+            tools: [calculateTool],
+          },
+        });
+
+        // Set up a conversation state as if tool was just executed
+        const userMessage: UserMessage = {
+          role: "user",
+          content: [{ type: "text", text: "What is 5 + 3?" }],
+          timestamp: Date.now(),
+        };
+
+        const assistantMessage: AssistantMessage = {
+          role: "assistant",
+          content: [
+            { type: "text", text: "Let me calculate that." },
+            {
+              type: "toolCall",
+              id: "calc-1",
+              name: "calculate",
+              arguments: { expression: "5 + 3" },
+            },
+          ],
+          api: "anthropic-messages",
+          provider: "anthropic",
+          model: "claude-haiku-4-5",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "toolUse",
+          timestamp: Date.now(),
+        };
+
+        const toolResult: ToolResultMessage = {
+          role: "toolResult",
+          toolCallId: "calc-1",
+          toolName: "calculate",
+          content: [{ type: "text", text: "5 + 3 = 8" }],
+          isError: false,
+          timestamp: Date.now(),
+        };
+
+        agent.replaceMessages([userMessage, assistantMessage, toolResult]);
+
+        // Continue from the tool result
+        await agent.continue();
+
+        expect(agent.state.isStreaming).toBe(false);
+        // Should have added an assistant response
+        expect(agent.state.messages.length).toBeGreaterThanOrEqual(4);
+
+        const lastMessage =
+          agent.state.messages[agent.state.messages.length - 1];
+        expect(lastMessage.role).toBe("assistant");
+
+        if (lastMessage.role === "assistant") {
+          const textContent = lastMessage.content
+            .filter((c) => c.type === "text")
+            .map((c) => (c as { type: "text"; text: string }).text)
+            .join(" ");
+          // Should mention 8 in the response
+          expect(textContent).toMatch(/8/);
+        }
+      });
+    },
+  );
+});
--- a/packages/agent/test/utils/calculate.ts
+++ b/packages/agent/test/utils/calculate.ts
@ -0,0 +1,37 @@
+import { type Static, Type } from "@sinclair/typebox";
+import type { AgentTool, AgentToolResult } from "../../src/types.js";
+
+export interface CalculateResult extends AgentToolResult<undefined> {
+  content: Array<{ type: "text"; text: string }>;
+  details: undefined;
+}
+
+export function calculate(expression: string): CalculateResult {
+  try {
+    const result = new Function(`return ${expression}`)();
+    return {
+      content: [{ type: "text", text: `${expression} = ${result}` }],
+      details: undefined,
+    };
+  } catch (e: any) {
+    throw new Error(e.message || String(e));
+  }
+}
+
+const calculateSchema = Type.Object({
+  expression: Type.String({
+    description: "The mathematical expression to evaluate",
+  }),
+});
+
+type CalculateParams = Static<typeof calculateSchema>;
+
+export const calculateTool: AgentTool<typeof calculateSchema, undefined> = {
+  label: "Calculator",
+  name: "calculate",
+  description: "Evaluate mathematical expressions",
+  parameters: calculateSchema,
+  execute: async (_toolCallId: string, args: CalculateParams) => {
+    return calculate(args.expression);
+  },
+};
--- a/packages/agent/test/utils/get-current-time.ts
+++ b/packages/agent/test/utils/get-current-time.ts
@ -0,0 +1,61 @@
+import { type Static, Type } from "@sinclair/typebox";
+import type { AgentTool, AgentToolResult } from "../../src/types.js";
+
+export interface GetCurrentTimeResult extends AgentToolResult<{
+  utcTimestamp: number;
+}> {}
+
+export async function getCurrentTime(
+  timezone?: string,
+): Promise<GetCurrentTimeResult> {
+  const date = new Date();
+  if (timezone) {
+    try {
+      const timeStr = date.toLocaleString("en-US", {
+        timeZone: timezone,
+        dateStyle: "full",
+        timeStyle: "long",
+      });
+      return {
+        content: [{ type: "text", text: timeStr }],
+        details: { utcTimestamp: date.getTime() },
+      };
+    } catch (_e) {
+      throw new Error(
+        `Invalid timezone: ${timezone}. Current UTC time: ${date.toISOString()}`,
+      );
+    }
+  }
+  const timeStr = date.toLocaleString("en-US", {
+    dateStyle: "full",
+    timeStyle: "long",
+  });
+  return {
+    content: [{ type: "text", text: timeStr }],
+    details: { utcTimestamp: date.getTime() },
+  };
+}
+
+const getCurrentTimeSchema = Type.Object({
+  timezone: Type.Optional(
+    Type.String({
+      description:
+        "Optional timezone (e.g., 'America/New_York', 'Europe/London')",
+    }),
+  ),
+});
+
+type GetCurrentTimeParams = Static<typeof getCurrentTimeSchema>;
+
+export const getCurrentTimeTool: AgentTool<
+  typeof getCurrentTimeSchema,
+  { utcTimestamp: number }
+> = {
+  label: "Current Time",
+  name: "get_current_time",
+  description: "Get the current date and time",
+  parameters: getCurrentTimeSchema,
+  execute: async (_toolCallId: string, args: GetCurrentTimeParams) => {
+    return getCurrentTime(args.timezone);
+  },
+};