diff --git a/packages/ai/README.md b/packages/ai/README.md index dc378075..8223eeb1 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -4,22 +4,6 @@ Unified LLM API with automatic model discovery, provider configuration, token an **Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows. -## API Changes in v0.5.15+ - -The `AssistantMessage` response structure has been updated to support multiple content blocks of different types. Instead of separate fields for `text`, `thinking`, and `toolCalls`, responses now have a unified `content` array that can contain multiple blocks of each type in any order. - -```typescript -// Old API (pre-0.5.15) -response.text // single text string -response.thinking // single thinking string -response.toolCalls // array of tool calls - -// New API (0.5.15+) -response.content // array of TextContent | ThinkingContent | ToolCall blocks -``` - -This change allows models to return multiple thinking and text blocks, which is especially useful for complex reasoning tasks. - ## Supported Providers - **OpenAI** diff --git a/packages/pods/src/models.json b/packages/pods/src/models.json index 450f1447..2ab3546e 100644 --- a/packages/pods/src/models.json +++ b/packages/pods/src/models.json @@ -114,7 +114,7 @@ } } ], - "notes": "Requires vLLM 0.10.1+gptoss. Tools/functoin calls only via /v1/responses endpoint." + "notes": "Tools/function calls only via /v1/responses endpoint." }, "openai/gpt-oss-120b": { "name": "GPT-OSS-120B", @@ -123,25 +123,25 @@ "gpuCount": 1, "gpuTypes": ["H100", "H200"], "args": ["--async-scheduling", "--gpu-memory-utilization", "0.95", "--max-num-batched-tokens", "1024"], - "notes": "Single GPU deployment. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint." + "notes": "Single GPU deployment. Tools/function calls only via /v1/responses endpoint." }, { "gpuCount": 2, "gpuTypes": ["H100", "H200"], "args": ["--tensor-parallel-size", "2", "--async-scheduling", "--gpu-memory-utilization", "0.94"], - "notes": "Recommended for H100/H200. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint." + "notes": "Recommended for H100/H200. Tools/function calls only via /v1/responses endpoint." }, { "gpuCount": 4, "gpuTypes": ["H100", "H200"], "args": ["--tensor-parallel-size", "4", "--async-scheduling"], - "notes": "Higher throughput. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint." + "notes": "Higher throughput. Tools/function calls only via /v1/responses endpoint." }, { "gpuCount": 8, "gpuTypes": ["H100", "H200"], "args": ["--tensor-parallel-size", "8", "--async-scheduling"], - "notes": "Maximum throughput for evaluation workloads. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint." + "notes": "Maximum throughput for evaluation workloads. Tools/function calls only via /v1/responses endpoint." } ] },