move pi-mono into companion-cloud as apps/companion-os

- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-21 11:04:38 +00:00 · 2026-03-07 09:22:50 -08:00 · 2026-03-07 09:22:50 -08:00 · 0250f72976
commit 0250f72976
579 changed files with 206942 additions and 0 deletions
--- a/packages/ai/test/interleaved-thinking.test.ts
+++ b/packages/ai/test/interleaved-thinking.test.ts
@ -0,0 +1,206 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getEnvApiKey } from "../src/env-api-keys.js";
+import { getModel } from "../src/models.js";
+import { completeSimple } from "../src/stream.js";
+import type {
+  Api,
+  Context,
+  Model,
+  StopReason,
+  Tool,
+  ToolCall,
+  ToolResultMessage,
+} from "../src/types.js";
+import { StringEnum } from "../src/utils/typebox-helpers.js";
+import { hasBedrockCredentials } from "./bedrock-utils.js";
+
+const calculatorSchema = Type.Object({
+  a: Type.Number({ description: "First number" }),
+  b: Type.Number({ description: "Second number" }),
+  operation: StringEnum(["add", "subtract", "multiply", "divide"], {
+    description: "The operation to perform.",
+  }),
+});
+
+const calculatorTool: Tool<typeof calculatorSchema> = {
+  name: "calculator",
+  description: "Perform basic arithmetic operations",
+  parameters: calculatorSchema,
+};
+
+type CalculatorOperation = "add" | "subtract" | "multiply" | "divide";
+
+type CalculatorArguments = {
+  a: number;
+  b: number;
+  operation: CalculatorOperation;
+};
+
+function asCalculatorArguments(
+  args: ToolCall["arguments"],
+): CalculatorArguments {
+  if (typeof args !== "object" || args === null) {
+    throw new Error("Tool arguments must be an object");
+  }
+
+  const value = args as Record<string, unknown>;
+  const operation = value.operation;
+  if (
+    typeof value.a !== "number" ||
+    typeof value.b !== "number" ||
+    (operation !== "add" &&
+      operation !== "subtract" &&
+      operation !== "multiply" &&
+      operation !== "divide")
+  ) {
+    throw new Error("Invalid calculator arguments");
+  }
+
+  return { a: value.a, b: value.b, operation };
+}
+
+function evaluateCalculatorCall(toolCall: ToolCall): number {
+  const { a, b, operation } = asCalculatorArguments(toolCall.arguments);
+  switch (operation) {
+    case "add":
+      return a + b;
+    case "subtract":
+      return a - b;
+    case "multiply":
+      return a * b;
+    case "divide":
+      return a / b;
+  }
+}
+
+async function assertSecondToolCallWithInterleavedThinking<TApi extends Api>(
+  llm: Model<TApi>,
+  reasoning: "high" | "xhigh",
+) {
+  const context: Context = {
+    systemPrompt: [
+      "You are a helpful assistant that must use tools for arithmetic.",
+      "Always think before every tool call, not just the first one.",
+      "Do not answer with plain text when a tool call is required.",
+    ].join(" "),
+    messages: [
+      {
+        role: "user",
+        content: [
+          "Use calculator to calculate 328 * 29.",
+          "You must call the calculator tool exactly once.",
+          "Provide the final answer based on the best guess given the tool result, even if it seems unreliable.",
+          "Start by thinking about the steps you will take to solve the problem.",
+        ].join(" "),
+        timestamp: Date.now(),
+      },
+    ],
+    tools: [calculatorTool],
+  };
+
+  const firstResponse = await completeSimple(llm, context, { reasoning });
+
+  expect(firstResponse.stopReason, `Error: ${firstResponse.errorMessage}`).toBe(
+    "toolUse" satisfies StopReason,
+  );
+  expect(firstResponse.content.some((block) => block.type === "thinking")).toBe(
+    true,
+  );
+  expect(firstResponse.content.some((block) => block.type === "toolCall")).toBe(
+    true,
+  );
+
+  const firstToolCall = firstResponse.content.find(
+    (block) => block.type === "toolCall",
+  );
+  expect(firstToolCall?.type).toBe("toolCall");
+  if (!firstToolCall || firstToolCall.type !== "toolCall") {
+    throw new Error("Expected first response to include a tool call");
+  }
+
+  context.messages.push(firstResponse);
+
+  const correctAnswer = evaluateCalculatorCall(firstToolCall);
+  const firstToolResult: ToolResultMessage = {
+    role: "toolResult",
+    toolCallId: firstToolCall.id,
+    toolName: firstToolCall.name,
+    content: [
+      {
+        type: "text",
+        text: `The answer is ${correctAnswer} or ${correctAnswer * 2}.`,
+      },
+    ],
+    isError: false,
+    timestamp: Date.now(),
+  };
+  context.messages.push(firstToolResult);
+
+  const secondResponse = await completeSimple(llm, context, { reasoning });
+
+  expect(
+    secondResponse.stopReason,
+    `Error: ${secondResponse.errorMessage}`,
+  ).toBe("stop" satisfies StopReason);
+  expect(
+    secondResponse.content.some((block) => block.type === "thinking"),
+  ).toBe(true);
+  expect(secondResponse.content.some((block) => block.type === "text")).toBe(
+    true,
+  );
+}
+
+const hasAnthropicCredentials = !!getEnvApiKey("anthropic");
+
+describe.skipIf(!hasBedrockCredentials())(
+  "Amazon Bedrock interleaved thinking",
+  () => {
+    it(
+      "should do interleaved thinking on Claude Opus 4.5",
+      { retry: 3 },
+      async () => {
+        const llm = getModel(
+          "amazon-bedrock",
+          "global.anthropic.claude-opus-4-5-20251101-v1:0",
+        );
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+
+    it(
+      "should do interleaved thinking on Claude Opus 4.6",
+      { retry: 3 },
+      async () => {
+        const llm = getModel(
+          "amazon-bedrock",
+          "global.anthropic.claude-opus-4-6-v1",
+        );
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+  },
+);
+
+describe.skipIf(!hasAnthropicCredentials)(
+  "Anthropic interleaved thinking",
+  () => {
+    it(
+      "should do interleaved thinking on Claude Opus 4.5",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("anthropic", "claude-opus-4-5");
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+
+    it(
+      "should do interleaved thinking on Claude Opus 4.6",
+      { retry: 3 },
+      async () => {
+        const llm = getModel("anthropic", "claude-opus-4-6");
+        await assertSecondToolCallWithInterleavedThinking(llm, "high");
+      },
+    );
+  },
+);