mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-20 18:02:11 +00:00
docs(ai): Remove old API reference from README
This commit is contained in:
parent
8f4e96f207
commit
c960d36da1
2 changed files with 5 additions and 21 deletions
|
|
@ -4,22 +4,6 @@ Unified LLM API with automatic model discovery, provider configuration, token an
|
||||||
|
|
||||||
**Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows.
|
**Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows.
|
||||||
|
|
||||||
## API Changes in v0.5.15+
|
|
||||||
|
|
||||||
The `AssistantMessage` response structure has been updated to support multiple content blocks of different types. Instead of separate fields for `text`, `thinking`, and `toolCalls`, responses now have a unified `content` array that can contain multiple blocks of each type in any order.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// Old API (pre-0.5.15)
|
|
||||||
response.text // single text string
|
|
||||||
response.thinking // single thinking string
|
|
||||||
response.toolCalls // array of tool calls
|
|
||||||
|
|
||||||
// New API (0.5.15+)
|
|
||||||
response.content // array of TextContent | ThinkingContent | ToolCall blocks
|
|
||||||
```
|
|
||||||
|
|
||||||
This change allows models to return multiple thinking and text blocks, which is especially useful for complex reasoning tasks.
|
|
||||||
|
|
||||||
## Supported Providers
|
## Supported Providers
|
||||||
|
|
||||||
- **OpenAI**
|
- **OpenAI**
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"notes": "Requires vLLM 0.10.1+gptoss. Tools/functoin calls only via /v1/responses endpoint."
|
"notes": "Tools/function calls only via /v1/responses endpoint."
|
||||||
},
|
},
|
||||||
"openai/gpt-oss-120b": {
|
"openai/gpt-oss-120b": {
|
||||||
"name": "GPT-OSS-120B",
|
"name": "GPT-OSS-120B",
|
||||||
|
|
@ -123,25 +123,25 @@
|
||||||
"gpuCount": 1,
|
"gpuCount": 1,
|
||||||
"gpuTypes": ["H100", "H200"],
|
"gpuTypes": ["H100", "H200"],
|
||||||
"args": ["--async-scheduling", "--gpu-memory-utilization", "0.95", "--max-num-batched-tokens", "1024"],
|
"args": ["--async-scheduling", "--gpu-memory-utilization", "0.95", "--max-num-batched-tokens", "1024"],
|
||||||
"notes": "Single GPU deployment. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
"notes": "Single GPU deployment. Tools/function calls only via /v1/responses endpoint."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"gpuCount": 2,
|
"gpuCount": 2,
|
||||||
"gpuTypes": ["H100", "H200"],
|
"gpuTypes": ["H100", "H200"],
|
||||||
"args": ["--tensor-parallel-size", "2", "--async-scheduling", "--gpu-memory-utilization", "0.94"],
|
"args": ["--tensor-parallel-size", "2", "--async-scheduling", "--gpu-memory-utilization", "0.94"],
|
||||||
"notes": "Recommended for H100/H200. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
"notes": "Recommended for H100/H200. Tools/function calls only via /v1/responses endpoint."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"gpuCount": 4,
|
"gpuCount": 4,
|
||||||
"gpuTypes": ["H100", "H200"],
|
"gpuTypes": ["H100", "H200"],
|
||||||
"args": ["--tensor-parallel-size", "4", "--async-scheduling"],
|
"args": ["--tensor-parallel-size", "4", "--async-scheduling"],
|
||||||
"notes": "Higher throughput. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
"notes": "Higher throughput. Tools/function calls only via /v1/responses endpoint."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"gpuCount": 8,
|
"gpuCount": 8,
|
||||||
"gpuTypes": ["H100", "H200"],
|
"gpuTypes": ["H100", "H200"],
|
||||||
"args": ["--tensor-parallel-size", "8", "--async-scheduling"],
|
"args": ["--tensor-parallel-size", "8", "--async-scheduling"],
|
||||||
"notes": "Maximum throughput for evaluation workloads. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
"notes": "Maximum throughput for evaluation workloads. Tools/function calls only via /v1/responses endpoint."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue