From f064ea0e14d969ff2108c37af7f9cbaefad44bf3 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Sun, 17 Aug 2025 20:18:45 +0200 Subject: [PATCH] feat(ai): Create unified AI package with OpenAI, Anthropic, and Gemini support - Set up @mariozechner/ai package structure following monorepo patterns - Install OpenAI, Anthropic, and Google Gemini SDK dependencies - Document comprehensive API investigation for all three providers - Design minimal unified API with streaming-first architecture - Add models.dev integration for pricing and capabilities - Implement automatic caching strategy for all providers - Update project documentation with package creation guide --- package-lock.json | 390 ++- package.json | 2 +- packages/ai/README.md | 62 + packages/ai/anthropic-api.md | 1706 ++++++++++++ packages/ai/gemini-api.md | 1233 +++++++++ packages/ai/openai-api.md | 2320 +++++++++++++++++ packages/ai/package.json | 32 + packages/ai/plan.md | 950 +++++++ packages/ai/src/index.ts | 5 + packages/ai/tsconfig.build.json | 9 + ...-183528-ai-unified-api-package-analysis.md | 606 +++++ .../20250817-183528-ai-unified-api-package.md | 46 + todos/project-description.md | 96 +- tsconfig.json | 1 + 14 files changed, 7437 insertions(+), 21 deletions(-) create mode 100644 packages/ai/README.md create mode 100644 packages/ai/anthropic-api.md create mode 100644 packages/ai/gemini-api.md create mode 100644 packages/ai/openai-api.md create mode 100644 packages/ai/package.json create mode 100644 packages/ai/plan.md create mode 100644 packages/ai/src/index.ts create mode 100644 packages/ai/tsconfig.build.json create mode 100644 todos/done/20250817-183528-ai-unified-api-package-analysis.md create mode 100644 todos/done/20250817-183528-ai-unified-api-package.md diff --git a/package-lock.json b/package-lock.json index fe555185..c434192c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,6 +19,15 @@ "node": ">=20.0.0" } }, + "node_modules/@anthropic-ai/sdk": { + "version": "0.60.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.60.0.tgz", + "integrity": "sha512-9zu/TXaUy8BZhXedDtt1wT3H4LOlpKDO1/ftiFpeR3N1PCr3KJFKkxxlQWWt1NNp08xSwUNJ3JNY8yhl8av6eQ==", + "license": "MIT", + "bin": { + "anthropic-ai-sdk": "bin/cli" + } + }, "node_modules/@biomejs/biome": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/@biomejs/biome/-/biome-2.1.4.tgz", @@ -624,6 +633,31 @@ "node": ">=18" } }, + "node_modules/@google/genai": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.14.0.tgz", + "integrity": "sha512-jirYprAAJU1svjwSDVCzyVq+FrJpJd5CSxR/g2Ga/gZ0ZYZpcWjMS75KJl9y71K1mDN+tcx6s21CzCbB2R840g==", + "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^9.14.2", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.11.0" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } + } + }, + "node_modules/@mariozechner/ai": { + "resolved": "packages/ai", + "link": true + }, "node_modules/@mariozechner/pi": { "resolved": "packages/pods", "link": true @@ -659,6 +693,15 @@ "dev": true, "license": "MIT" }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/ansi-regex": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", @@ -683,6 +726,41 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "license": "BSD-3-Clause" + }, "node_modules/chalk": { "version": "5.5.0", "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.5.0.tgz", @@ -695,6 +773,32 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/debug": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/esbuild": { "version": "0.25.8", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.8.tgz", @@ -737,6 +841,12 @@ "@esbuild/win32-x64": "0.25.8" } }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -752,6 +862,36 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gcp-metadata": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz", + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, "node_modules/get-tsconfig": { "version": "4.10.1", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.1.tgz", @@ -765,6 +905,58 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/google-auth-library": { + "version": "9.15.1", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz", + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/google-logging-utils": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz", + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/husky": { "version": "9.1.7", "resolved": "https://registry.npmjs.org/husky/-/husky-9.1.7.tgz", @@ -781,6 +973,95 @@ "url": "https://github.com/sponsors/typicode" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz", + "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.0", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/openai": { + "version": "5.12.2", + "resolved": "https://registry.npmjs.org/openai/-/openai-5.12.2.tgz", + "integrity": "sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -791,6 +1072,26 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/signal-exit": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", @@ -818,6 +1119,12 @@ "url": "https://github.com/chalk/strip-ansi?sponsor=1" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/tsx": { "version": "4.20.3", "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.3.tgz", @@ -858,6 +1165,56 @@ "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", "license": "MIT" }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/ws": { + "version": "8.18.3", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", + "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "packages/agent": { "name": "@mariozechner/pi-agent", "version": "0.5.8", @@ -1041,25 +1398,6 @@ "node": ">=16 || 14 >=14.17" } }, - "packages/agent/node_modules/openai": { - "version": "5.12.2", - "license": "Apache-2.0", - "bin": { - "openai": "bin/cli" - }, - "peerDependencies": { - "ws": "^8.18.0", - "zod": "^3.23.8" - }, - "peerDependenciesMeta": { - "ws": { - "optional": true - }, - "zod": { - "optional": true - } - } - }, "packages/agent/node_modules/package-json-from-dist": { "version": "1.0.1", "license": "BlueOak-1.0.0" @@ -1259,6 +1597,20 @@ "node": ">=8" } }, + "packages/ai": { + "name": "@mariozechner/ai", + "version": "0.5.8", + "license": "MIT", + "dependencies": { + "@anthropic-ai/sdk": "0.60.0", + "@google/genai": "1.14.0", + "openai": "5.12.2" + }, + "devDependencies": {}, + "engines": { + "node": ">=20.0.0" + } + }, "packages/pods": { "name": "@mariozechner/pi", "version": "0.5.8", diff --git a/package.json b/package.json index 186dc90d..1ccf027d 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ ], "scripts": { "clean": "npm run clean --workspaces", - "build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi", + "build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/ai && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi", "check": "biome check --write . && npm run check --workspaces && tsc --noEmit", "test": "npm run test --workspaces --if-present", "version:patch": "npm version patch -ws --no-git-tag-version && node scripts/sync-versions.js", diff --git a/packages/ai/README.md b/packages/ai/README.md new file mode 100644 index 00000000..9cff0dab --- /dev/null +++ b/packages/ai/README.md @@ -0,0 +1,62 @@ +# @mariozechner/ai + +Unified API for OpenAI, Anthropic, and Google Gemini LLM providers. This package provides a common interface for working with multiple LLM providers, handling their differences transparently while exposing a consistent, minimal API. + +## Features (Planned) + +- **Unified Interface**: Single API for OpenAI, Anthropic, and Google Gemini +- **Streaming Support**: Real-time response streaming with delta events +- **Tool Calling**: Consistent tool/function calling across providers +- **Reasoning/Thinking**: Support for reasoning tokens where available +- **Session Management**: Serializable conversation state across providers +- **Token Tracking**: Unified token counting (input, output, cached, reasoning) +- **Interrupt Handling**: Graceful cancellation of requests +- **Provider Detection**: Automatic configuration based on endpoint +- **Caching Support**: Provider-specific caching strategies + +## Installation + +```bash +npm install @mariozechner/ai +``` + +## Quick Start (Coming Soon) + +```typescript +import { createClient } from '@mariozechner/ai'; + +// Automatically detects provider from configuration +const client = createClient({ + provider: 'openai', + apiKey: process.env.OPENAI_API_KEY, + model: 'gpt-4' +}); + +// Same API works for all providers +const response = await client.complete({ + messages: [ + { role: 'user', content: 'Hello!' } + ], + stream: true +}); + +for await (const event of response) { + if (event.type === 'content') { + process.stdout.write(event.text); + } +} +``` + +## Supported Providers + +- **OpenAI**: GPT-3.5, GPT-4, o1, o3 models +- **Anthropic**: Claude models via native SDK +- **Google Gemini**: Gemini models with thinking support + +## Development + +This package is part of the pi monorepo. See the main README for development instructions. + +## License + +MIT \ No newline at end of file diff --git a/packages/ai/anthropic-api.md b/packages/ai/anthropic-api.md new file mode 100644 index 00000000..7ede2cb2 --- /dev/null +++ b/packages/ai/anthropic-api.md @@ -0,0 +1,1706 @@ +# Anthropic SDK Implementation Guide + +This document provides a comprehensive guide for implementing the required features using the Anthropic SDK. All examples use TypeScript and include actual code that works with the SDK. + +## Table of Contents + +1. [Basic Client Setup](#basic-client-setup) +2. [Streaming Responses](#streaming-responses) +3. [Request Abortion](#request-abortion) +4. [Error Handling](#error-handling) +5. [Stop Reasons](#stop-reasons) +6. [Context and Message History](#context-and-message-history) +7. [Token Counting](#token-counting) +8. [Prompt Caching](#prompt-caching) +9. [Tool Use (Function Calling)](#tool-use-function-calling) +10. [System Prompts](#system-prompts) +11. [Content Block System](#content-block-system) +12. [MessageStream Helper Class](#messagestream-helper-class) +13. [Thinking Tokens and Extended Reasoning](#thinking-tokens-and-extended-reasoning) +14. [Complete Implementation Example](#complete-implementation-example) + +## Basic Client Setup + +```typescript +import Anthropic from '@anthropic-ai/sdk'; + +// Create client with configuration +const anthropic = new Anthropic({ + apiKey: process.env.ANTHROPIC_API_KEY, // Required + baseURL: 'https://api.anthropic.com', // Optional, this is the default + timeout: 60000, // Optional, in milliseconds + maxRetries: 3, // Optional, default is 2 +}); +``` + +### Environment Variables + +The SDK automatically reads from these environment variables: +- `ANTHROPIC_API_KEY` - Your API key +- `ANTHROPIC_BASE_URL` - Custom base URL (optional) + +## Streaming Responses + +### Basic Streaming with MessageStream + +```typescript +import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream'; + +async function basicStream() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Hello, Claude!' }], + }); + + // Listen to different event types + stream.on('text', (text, snapshot) => { + process.stdout.write(text); // text is the delta, snapshot is accumulated + }); + + stream.on('message', (message) => { + console.log('\nFinal message:', message); + }); + + stream.on('error', (error) => { + console.error('Error:', error); + }); + + // Wait for completion + const finalMessage = await stream.finalMessage(); + return finalMessage; +} +``` + +### Raw Streaming with create() + +```typescript +import { RawMessageStreamEvent } from '@anthropic-ai/sdk'; + +async function rawStreaming() { + const stream = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Hello!' }], + stream: true, + }); + + let content = ''; + let usage: any = null; + + for await (const chunk of stream) { + switch (chunk.type) { + case 'message_start': + console.log('Message started:', chunk.message); + break; + + case 'content_block_delta': + if (chunk.delta.type === 'text_delta') { + content += chunk.delta.text; + process.stdout.write(chunk.delta.text); + } + break; + + case 'message_delta': + if (chunk.usage) { + usage = chunk.usage; + } + console.log('\nStop reason:', chunk.delta.stop_reason); + break; + + case 'message_stop': + console.log('\nStream ended'); + break; + } + } + + return { content, usage }; +} +``` + +### Handling Thinking Tokens in Streams + +```typescript +async function streamWithThinking() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 4000, + thinking: { + type: 'enabled', + budget_tokens: 2000, + }, + messages: [{ role: 'user', content: 'Solve this complex math problem: ...' }], + }); + + stream.on('thinking', (thinking, snapshot) => { + console.log('[Thinking]', thinking); // Delta thinking content + }); + + stream.on('text', (text, snapshot) => { + process.stdout.write(text); // Regular response text + }); + + const message = await stream.finalMessage(); + + // Access thinking content from final message + for (const block of message.content) { + if (block.type === 'thinking') { + console.log('Final thinking:', block.thinking); + } + } +} +``` + +## Request Abortion + +### AbortController Integration + +```typescript +async function abortableRequest() { + const controller = new AbortController(); + + // Abort after 5 seconds + const timeoutId = setTimeout(() => controller.abort(), 5000); + + try { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Long task...' }], + }, { + // Pass abort signal in request options + signal: controller.signal, + }); + + stream.on('error', (error) => { + if (error.name === 'AbortError') { + console.log('Request was aborted'); + } else { + console.error('Other error:', error); + } + }); + + const result = await stream.finalMessage(); + clearTimeout(timeoutId); + return result; + + } catch (error) { + clearTimeout(timeoutId); + + if (error.name === 'AbortError') { + console.log('Request aborted by user'); + } else { + throw error; + } + } +} + +// Manual abort from MessageStream +async function manualAbort() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Start a story...' }], + }); + + // Abort after receiving some content + stream.on('text', (text, snapshot) => { + if (snapshot.length > 100) { + stream.abort(); // Built-in abort method + } + }); + + try { + await stream.finalMessage(); + } catch (error) { + if (stream.aborted) { + console.log('Stream was manually aborted'); + } + } +} +``` + +## Error Handling + +### Comprehensive Error Types + +```typescript +import { + AnthropicError, + APIError, + APIConnectionError, + APIConnectionTimeoutError, + APIUserAbortError, + NotFoundError, + ConflictError, + RateLimitError, + BadRequestError, + AuthenticationError, + InternalServerError, + PermissionDeniedError, + UnprocessableEntityError, +} from '@anthropic-ai/sdk'; + +async function handleErrors() { + try { + const message = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Hello!' }], + }); + + return message; + + } catch (error) { + // Handle specific error types + if (error instanceof RateLimitError) { + console.error('Rate limit exceeded:', { + status: error.status, + headers: error.headers, + retryAfter: error.headers.get('retry-after'), + }); + + // Wait and retry logic + const retryAfter = parseInt(error.headers.get('retry-after') || '60'); + await new Promise(resolve => setTimeout(resolve, retryAfter * 1000)); + + } else if (error instanceof AuthenticationError) { + console.error('Authentication failed:', error.status); + throw new Error('Invalid API key'); + + } else if (error instanceof BadRequestError) { + console.error('Bad request:', { + status: error.status, + error: error.error, + message: error.message, + }); + + } else if (error instanceof APIConnectionTimeoutError) { + console.error('Request timed out'); + // Retry with longer timeout + + } else if (error instanceof APIConnectionError) { + console.error('Network error:', error.message); + // Retry with backoff + + } else if (error instanceof APIUserAbortError) { + console.log('Request was aborted by user'); + + } else if (error instanceof InternalServerError) { + console.error('Server error:', error.status); + // Retry with exponential backoff + + } else if (error instanceof APIError) { + console.error('API error:', { + status: error.status, + error: error.error, + requestId: error.requestID, + }); + + } else { + console.error('Unexpected error:', error); + throw error; + } + } +} + +// Error handling in streams +function handleStreamErrors() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Hello!' }], + }); + + stream.on('error', (error) => { + if (error instanceof RateLimitError) { + console.log('Rate limited during stream'); + } else if (error instanceof APIConnectionError) { + console.log('Connection lost during stream'); + } else { + console.error('Stream error:', error); + } + }); + + return stream; +} +``` + +## Stop Reasons + +### Understanding Stop Reasons + +```typescript +import { StopReason } from '@anthropic-ai/sdk'; + +async function handleStopReasons() { + const message = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 100, // Intentionally low to trigger max_tokens + messages: [{ role: 'user', content: 'Write a long story...' }], + stop_sequences: ['THE END'], // Custom stop sequence + }); + + // Extract and handle stop reason + const stopReason: StopReason = message.stop_reason; + + switch (stopReason) { + case 'end_turn': + console.log('Model completed naturally'); + break; + + case 'max_tokens': + console.log('Hit token limit, response may be incomplete'); + // Consider continuing with a follow-up request + break; + + case 'stop_sequence': + console.log('Hit custom stop sequence:', message.stop_sequence); + break; + + case 'tool_use': + console.log('Model wants to use tools'); + // Handle tool calls (see Tool Use section) + break; + + case 'pause_turn': + console.log('Long turn paused, can continue'); + // Continue with the partial response as context + break; + + case 'refusal': + console.log('Model refused to respond due to safety'); + break; + + default: + console.log('Unknown stop reason:', stopReason); + } + + return { message, stopReason }; +} + +// In streaming mode +function handleStopReasonsInStream() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Hello!' }], + }); + + stream.on('message', (message) => { + const stopReason = message.stop_reason; + console.log('Final stop reason:', stopReason); + + if (stopReason === 'max_tokens') { + console.log('Response was truncated'); + } + }); + + return stream; +} +``` + +## Context and Message History + +### Message Format and Serialization + +```typescript +import { MessageParam, Message } from '@anthropic-ai/sdk'; + +interface ConversationState { + messages: MessageParam[]; + totalTokens: number; + model: string; + systemPrompt?: string; +} + +class ConversationManager { + private state: ConversationState; + + constructor(model: string, systemPrompt?: string) { + this.state = { + messages: [], + totalTokens: 0, + model, + systemPrompt, + }; + } + + // Add user message + addUserMessage(content: string | any[]) { + this.state.messages.push({ + role: 'user', + content, + }); + } + + // Add assistant message from API response + addAssistantMessage(message: Message) { + this.state.messages.push({ + role: 'assistant', + content: message.content, + }); + + // Update token count + this.state.totalTokens += message.usage.input_tokens + message.usage.output_tokens; + } + + // Add tool results + addToolResult(toolUseId: string, result: string, isError = false) { + // Find the last message and ensure it has tool use + const lastMessage = this.state.messages[this.state.messages.length - 1]; + if (lastMessage?.role === 'assistant') { + // Add tool result as new user message + this.state.messages.push({ + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: toolUseId, + content: result, + is_error: isError, + }], + }); + } + } + + // Get messages for API call + getMessages(): MessageParam[] { + return [...this.state.messages]; + } + + // Serialize for persistence + serialize(): string { + return JSON.stringify(this.state); + } + + // Deserialize from storage + static deserialize(json: string): ConversationManager { + const state = JSON.parse(json); + const manager = new ConversationManager(state.model, state.systemPrompt); + manager.state = state; + return manager; + } + + // Create request parameters + createRequestParams(newMessage?: string): any { + if (newMessage) { + this.addUserMessage(newMessage); + } + + const params: any = { + model: this.state.model, + max_tokens: 4000, + messages: this.getMessages(), + }; + + if (this.state.systemPrompt) { + params.system = this.state.systemPrompt; + } + + return params; + } + + // Get conversation stats + getStats() { + return { + messageCount: this.state.messages.length, + totalTokens: this.state.totalTokens, + userMessages: this.state.messages.filter(m => m.role === 'user').length, + assistantMessages: this.state.messages.filter(m => m.role === 'assistant').length, + }; + } +} + +// Usage example +async function conversationExample() { + const conversation = new ConversationManager( + 'claude-sonnet-4-20250514', + 'You are a helpful coding assistant.' + ); + + // First exchange + const params1 = conversation.createRequestParams('Hello, can you help me with Python?'); + const response1 = await anthropic.messages.create(params1); + conversation.addAssistantMessage(response1); + + // Second exchange + const params2 = conversation.createRequestParams('Show me a simple function.'); + const response2 = await anthropic.messages.create(params2); + conversation.addAssistantMessage(response2); + + // Save conversation + const saved = conversation.serialize(); + localStorage.setItem('conversation', saved); + + // Later: restore conversation + const restored = ConversationManager.deserialize(saved); + console.log('Conversation stats:', restored.getStats()); +} +``` + +## Token Counting + +### Using the Count Tokens API + +```typescript +import { MessageCountTokensParams, MessageTokensCount } from '@anthropic-ai/sdk'; + +async function countTokens() { + const messages = [ + { role: 'user', content: 'Hello, how are you?' }, + { role: 'assistant', content: 'I am doing well, thank you for asking!' }, + { role: 'user', content: 'Can you help me write some code?' }, + ] as const; + + // Count tokens for messages + const tokenCount: MessageTokensCount = await anthropic.messages.countTokens({ + model: 'claude-sonnet-4-20250514', + messages, + system: 'You are a helpful coding assistant.', + }); + + console.log('Input tokens:', tokenCount.input_tokens); + return tokenCount.input_tokens; +} + +// Count tokens with tools +async function countTokensWithTools() { + const tools = [ + { + name: 'calculator', + description: 'Perform mathematical calculations', + input_schema: { + type: 'object', + properties: { + expression: { type: 'string' }, + }, + required: ['expression'], + }, + }, + ]; + + const tokenCount = await anthropic.messages.countTokens({ + model: 'claude-sonnet-4-20250514', + messages: [{ role: 'user', content: 'Calculate 2+2' }], + tools, + }); + + return tokenCount.input_tokens; +} + +// Extract usage from responses +function extractUsageFromResponse(message: Message) { + const usage = message.usage; + + return { + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + cacheReadTokens: usage.cache_read_input_tokens || 0, + cacheWriteTokens: usage.cache_creation_input_tokens || 0, + totalTokens: usage.input_tokens + usage.output_tokens, + serviceTier: usage.service_tier, + cacheCreation: usage.cache_creation, + }; +} + +// Token usage in streaming +function trackTokensInStream() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Hello!' }], + }); + + let finalUsage: any = null; + + stream.on('message', (message) => { + finalUsage = extractUsageFromResponse(message); + console.log('Final usage:', finalUsage); + }); + + return stream; +} +``` + +## Prompt Caching + +### Basic Caching Implementation + +```typescript +import { CacheControlEphemeral } from '@anthropic-ai/sdk'; + +async function usePromptCaching() { + // Cache control for system prompt + const systemPrompt = [ + { + type: 'text', + text: 'You are an expert software engineer with deep knowledge of...', + cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral, + }, + ]; + + // Cache control for large document + const messages = [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Here is a large codebase to analyze:', + }, + { + type: 'document', + source: { + type: 'text', + data: '// Large codebase content...', + media_type: 'text/plain', + }, + cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral, + }, + { + type: 'text', + text: 'Please analyze this code for bugs.', + }, + ], + }, + ] as const; + + const response = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + system: systemPrompt, + messages, + }); + + // Check cache usage + const usage = response.usage; + console.log('Cache read tokens:', usage.cache_read_input_tokens); + console.log('Cache write tokens:', usage.cache_creation_input_tokens); + + return response; +} + +// Caching with different TTL options +async function cachingWithTTL() { + const shortCache = { + type: 'ephemeral', + ttl: '5m', // 5 minutes + } as CacheControlEphemeral; + + const longCache = { + type: 'ephemeral', + ttl: '1h', // 1 hour (default) + } as CacheControlEphemeral; + + const messages = [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Short-lived context', + cache_control: shortCache, + }, + { + type: 'text', + text: 'Long-lived context that should be cached longer', + cache_control: longCache, + }, + { + type: 'text', + text: 'What can you tell me about this?', + }, + ], + }, + ] as const; + + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages, + }); +} +``` + +## Tool Use (Function Calling) + +### Complete Tool Implementation + +```typescript +import { Tool, ToolUseBlock, ToolChoice } from '@anthropic-ai/sdk'; + +// Define tools +const tools: Tool[] = [ + { + name: 'calculator', + description: 'Perform mathematical calculations', + input_schema: { + type: 'object', + properties: { + expression: { + type: 'string', + description: 'Mathematical expression to evaluate', + }, + }, + required: ['expression'], + }, + }, + { + name: 'weather', + description: 'Get weather information for a location', + input_schema: { + type: 'object', + properties: { + location: { + type: 'string', + description: 'City name or coordinates', + }, + units: { + type: 'string', + enum: ['celsius', 'fahrenheit'], + description: 'Temperature units', + }, + }, + required: ['location'], + }, + }, +]; + +// Tool implementations +const toolImplementations = { + calculator: (args: { expression: string }) => { + try { + // Simple eval - in production, use a safe math parser + const result = eval(args.expression); + return `Result: ${result}`; + } catch (error) { + return `Error: Invalid expression - ${error.message}`; + } + }, + + weather: async (args: { location: string; units?: string }) => { + // Mock weather API call + return `Weather in ${args.location}: 22°C, sunny with light clouds`; + }, +}; + +async function toolUseExample() { + const conversation = new ConversationManager('claude-sonnet-4-20250514'); + + // Send initial message with tools + conversation.addUserMessage('What is 15 * 23 and what is the weather in Paris?'); + + const response = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: conversation.getMessages(), + tools, + tool_choice: { type: 'auto' } as ToolChoice, + }); + + conversation.addAssistantMessage(response); + + // Handle tool calls + const toolCalls: ToolUseBlock[] = response.content.filter( + (block): block is ToolUseBlock => block.type === 'tool_use' + ); + + // Execute each tool call + for (const toolCall of toolCalls) { + const toolName = toolCall.name; + const toolArgs = toolCall.input; + const toolId = toolCall.id; + + console.log(`Executing tool: ${toolName} with args:`, toolArgs); + + try { + let result: string; + + if (toolName in toolImplementations) { + result = await toolImplementations[toolName](toolArgs as any); + } else { + result = `Error: Unknown tool "${toolName}"`; + } + + // Add tool result to conversation + conversation.addToolResult(toolId, result); + + } catch (error) { + // Add error result + conversation.addToolResult(toolId, `Error: ${error.message}`, true); + } + } + + // Get final response after tool execution + if (toolCalls.length > 0) { + const finalResponse = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: conversation.getMessages(), + tools, + }); + + conversation.addAssistantMessage(finalResponse); + return finalResponse; + } + + return response; +} + +// Streaming with tools +async function streamingWithTools() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Calculate 42 * 17' }], + tools, + }); + + const toolCalls: ToolUseBlock[] = []; + + stream.on('contentBlock', (block) => { + if (block.type === 'tool_use') { + toolCalls.push(block); + } + }); + + stream.on('message', async (message) => { + if (message.stop_reason === 'tool_use') { + console.log('Tool calls detected:', toolCalls); + // Handle tools... + } + }); + + return stream; +} + +// Force specific tool usage +async function forceToolUsage() { + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: 'I need to do some math' }], + tools, + tool_choice: { + type: 'tool', + name: 'calculator', + } as ToolChoice, + }); +} +``` + +## System Prompts + +### System Prompt Variations + +```typescript +// Simple string system prompt +async function basicSystemPrompt() { + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + system: 'You are a helpful coding assistant specialized in Python.', + messages: [{ role: 'user', content: 'Help me write a function' }], + }); +} + +// Complex system prompt with caching +async function complexSystemPrompt() { + const systemPrompt = [ + { + type: 'text', + text: `You are an expert software engineer with the following expertise: + +1. Python development and best practices +2. Web frameworks like Django and FastAPI +3. Database design and optimization +4. Testing strategies and TDD +5. Code review and refactoring + +Guidelines for your responses: +- Always write clean, readable code +- Include proper error handling +- Add type hints when using Python +- Explain your reasoning +- Suggest improvements when applicable + +When reviewing code: +- Focus on functionality, performance, and maintainability +- Point out potential bugs or edge cases +- Suggest more pythonic approaches when relevant`, + cache_control: { type: 'ephemeral', ttl: '1h' }, + }, + ] as const; + + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + system: systemPrompt, + messages: [{ role: 'user', content: 'Review this Python function for me' }], + }); +} + +// Dynamic system prompt based on context +function buildSystemPrompt(userRole: string, expertise: string[]): string { + const basePrompt = `You are an AI assistant helping a ${userRole}.`; + + const expertisePrompt = expertise.length > 0 + ? `\n\nYour areas of expertise include: ${expertise.join(', ')}.` + : ''; + + const guidelines = ` + +Guidelines: +- Be helpful and accurate +- Explain complex concepts clearly +- Provide practical examples +- Ask for clarification when needed`; + + return basePrompt + expertisePrompt + guidelines; +} + +async function dynamicSystemPrompt() { + const systemPrompt = buildSystemPrompt('software developer', [ + 'JavaScript', 'TypeScript', 'React', 'Node.js' + ]); + + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + system: systemPrompt, + messages: [{ role: 'user', content: 'Help me optimize this React component' }], + }); +} +``` + +## Content Block System + +### Understanding Content Blocks + +The Anthropic API uses a content block system where message content is always an array, even for simple text. + +```typescript +import { + ContentBlockParam, + TextBlockParam, + ImageBlockParam, + DocumentBlockParam, + ToolUseBlockParam, + ToolResultBlockParam +} from '@anthropic-ai/sdk'; + +// Text content (most common) +const textContent: TextBlockParam = { + type: 'text', + text: 'Hello, Claude!', +}; + +// Image content +const imageContent: ImageBlockParam = { + type: 'image', + source: { + type: 'base64', + media_type: 'image/jpeg', + data: '/9j/4AAQSkZJRg...', // base64 encoded image + }, +}; + +// Document content with caching +const documentContent: DocumentBlockParam = { + type: 'document', + source: { + type: 'text', + data: 'Large document content...', + media_type: 'text/plain', + }, + cache_control: { type: 'ephemeral', ttl: '1h' }, + title: 'Important Document', + context: 'This document contains key information for the project', +}; + +// Tool use block (from assistant) +const toolUseContent: ToolUseBlockParam = { + type: 'tool_use', + id: 'tool_123', + name: 'calculator', + input: { expression: '2 + 2' }, +}; + +// Tool result block (from user) +const toolResultContent: ToolResultBlockParam = { + type: 'tool_result', + tool_use_id: 'tool_123', + content: 'Result: 4', +}; + +// Mixed content message +async function mixedContentExample() { + const mixedMessage: ContentBlockParam[] = [ + { + type: 'text', + text: 'Here is an image and a document to analyze:', + }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw0KGgoAAAANSUhEUgA...', // base64 image + }, + }, + { + type: 'document', + source: { + type: 'text', + data: 'Document content here...', + media_type: 'text/plain', + }, + title: 'Analysis Document', + }, + { + type: 'text', + text: 'What insights can you provide from these?', + }, + ]; + + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + messages: [{ role: 'user', content: mixedMessage }], + }); +} + +// Helper functions for content manipulation +function createTextBlock(text: string, cached = false): TextBlockParam { + const block: TextBlockParam = { + type: 'text', + text, + }; + + if (cached) { + block.cache_control = { type: 'ephemeral', ttl: '1h' }; + } + + return block; +} + +function createImageBlock(base64Data: string, mimeType: string): ImageBlockParam { + return { + type: 'image', + source: { + type: 'base64', + media_type: mimeType as any, + data: base64Data, + }, + }; +} + +// Extract text from response content blocks +function extractTextFromResponse(content: any[]): string { + return content + .filter(block => block.type === 'text') + .map(block => block.text) + .join('\n'); +} + +// Extract thinking content +function extractThinkingFromResponse(content: any[]): string | null { + const thinkingBlock = content.find(block => block.type === 'thinking'); + return thinkingBlock?.thinking || null; +} +``` + +## MessageStream Helper Class + +### Advanced MessageStream Usage + +```typescript +import { MessageStream, MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream'; + +class AdvancedMessageHandler { + private stream: MessageStream; + private content = ''; + private thinking = ''; + private toolCalls: any[] = []; + private citations: any[] = []; + + constructor(stream: MessageStream) { + this.stream = stream; + this.setupEventHandlers(); + } + + private setupEventHandlers() { + // Connection established + this.stream.on('connect', () => { + console.log('Stream connected'); + }); + + // Text content (delta and snapshot) + this.stream.on('text', (delta: string, snapshot: string) => { + process.stdout.write(delta); + this.content = snapshot; + }); + + // Thinking content (Claude's internal reasoning) + this.stream.on('thinking', (delta: string, snapshot: string) => { + console.log('[Thinking]', delta); + this.thinking = snapshot; + }); + + // Citations (when referencing documents) + this.stream.on('citation', (citation, citations) => { + console.log('Citation:', citation); + this.citations = citations; + }); + + // Content blocks (including tool calls) + this.stream.on('contentBlock', (block) => { + if (block.type === 'tool_use') { + console.log('Tool call:', block); + this.toolCalls.push(block); + } + }); + + // Raw stream events + this.stream.on('streamEvent', (event, snapshot) => { + // Handle any stream event + console.log('Stream event:', event.type); + }); + + // Final message + this.stream.on('finalMessage', (message) => { + console.log('\nFinal message received'); + this.handleFinalMessage(message); + }); + + // Error handling + this.stream.on('error', (error) => { + console.error('Stream error:', error); + }); + + // Stream end + this.stream.on('end', () => { + console.log('\nStream ended'); + }); + + // User abort + this.stream.on('abort', (error) => { + console.log('Stream aborted by user'); + }); + } + + private handleFinalMessage(message: any) { + console.log('Stop reason:', message.stop_reason); + console.log('Token usage:', message.usage); + + // Process thinking content if available + for (const block of message.content) { + if (block.type === 'thinking') { + console.log('Final thinking content:', block.thinking); + } + } + } + + async waitForCompletion() { + try { + const finalMessage = await this.stream.finalMessage(); + return { + message: finalMessage, + content: this.content, + thinking: this.thinking, + toolCalls: this.toolCalls, + citations: this.citations, + }; + } catch (error) { + if (this.stream.aborted) { + console.log('Stream was aborted'); + } else { + throw error; + } + } + } + + abort() { + this.stream.abort(); + } + + // Get request ID for debugging + getRequestId() { + return this.stream.request_id; + } + + // Access the underlying Response object + async getResponse() { + const { response } = await this.stream.withResponse(); + return response; + } +} + +// Usage example +async function advancedStreamExample() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 2000, + thinking: { + type: 'enabled', + budget_tokens: 1000, + }, + messages: [{ + role: 'user', + content: 'Analyze this complex problem and show your reasoning...' + }], + }); + + const handler = new AdvancedMessageHandler(stream); + + // Optional: abort after 30 seconds + const timeoutId = setTimeout(() => { + handler.abort(); + }, 30000); + + try { + const result = await handler.waitForCompletion(); + clearTimeout(timeoutId); + + console.log('Final result:', { + contentLength: result.content.length, + thinkingLength: result.thinking.length, + toolCallCount: result.toolCalls.length, + citationCount: result.citations.length, + }); + + return result; + } catch (error) { + clearTimeout(timeoutId); + throw error; + } +} +``` + +## Thinking Tokens and Extended Reasoning + +### Enabling Extended Thinking + +```typescript +async function extendedThinkingExample() { + const response = await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 4000, + thinking: { + type: 'enabled', + budget_tokens: 2000, // Minimum 1024, must be < max_tokens + }, + messages: [{ + role: 'user', + content: `Solve this complex problem step by step: + +A company has 3 factories. Factory A produces 100 units/day, +Factory B produces 150 units/day, and Factory C produces 200 units/day. +If the company needs to fulfill an order of 10,000 units in the most +cost-efficient way, and the costs per unit are $5, $4, and $6 respectively, +what's the optimal production strategy?` + }], + }); + + // Extract thinking content + for (const block of response.content) { + if (block.type === 'thinking') { + console.log('Claude\'s thinking process:'); + console.log(block.thinking); + console.log('Signature:', block.signature); + } else if (block.type === 'text') { + console.log('\nFinal answer:'); + console.log(block.text); + } + } + + return response; +} + +// Disable thinking +async function disableThinking() { + return await anthropic.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + thinking: { + type: 'disabled', + }, + messages: [{ role: 'user', content: 'Quick answer please' }], + }); +} + +// Streaming with thinking +async function streamThinking() { + const stream = anthropic.messages.stream({ + model: 'claude-sonnet-4-20250514', + max_tokens: 3000, + thinking: { + type: 'enabled', + budget_tokens: 1500, + }, + messages: [{ + role: 'user', + content: 'Think through this carefully: How would you design a distributed cache?' + }], + }); + + let thinkingContent = ''; + let responseContent = ''; + + stream.on('thinking', (delta, snapshot) => { + // Stream thinking content as it comes + process.stdout.write(`[THINKING] ${delta}`); + thinkingContent = snapshot; + }); + + stream.on('text', (delta, snapshot) => { + // Stream final response + process.stdout.write(delta); + responseContent = snapshot; + }); + + const finalMessage = await stream.finalMessage(); + + return { + thinking: thinkingContent, + response: responseContent, + usage: finalMessage.usage, + }; +} +``` + +## Complete Implementation Example + +Here's a comprehensive example that combines all the features: + +```typescript +import Anthropic, { + MessageParam, + Message, + Tool, + ToolUseBlock, + AnthropicError +} from '@anthropic-ai/sdk'; + +class AnthropicClient { + private client: Anthropic; + private conversation: MessageParam[] = []; + private totalTokens = 0; + + constructor(apiKey: string) { + this.client = new Anthropic({ apiKey }); + } + + async sendMessage( + content: string, + options: { + stream?: boolean; + tools?: Tool[]; + thinking?: boolean; + systemPrompt?: string; + maxTokens?: number; + temperature?: number; + cached?: boolean; + } = {} + ) { + const { + stream = false, + tools = [], + thinking = false, + systemPrompt, + maxTokens = 1024, + temperature = 1.0, + cached = false, + } = options; + + // Add user message + this.conversation.push({ + role: 'user', + content: cached + ? [{ type: 'text', text: content, cache_control: { type: 'ephemeral', ttl: '1h' } }] + : content, + }); + + const params: any = { + model: 'claude-sonnet-4-20250514', + max_tokens: maxTokens, + temperature, + messages: [...this.conversation], + }; + + if (systemPrompt) { + params.system = systemPrompt; + } + + if (tools.length > 0) { + params.tools = tools; + params.tool_choice = { type: 'auto' }; + } + + if (thinking) { + params.thinking = { + type: 'enabled', + budget_tokens: Math.min(maxTokens / 2, 2000), + }; + } + + try { + if (stream) { + return await this.handleStreamingResponse(params, tools); + } else { + return await this.handleSingleResponse(params, tools); + } + } catch (error) { + return this.handleError(error); + } + } + + private async handleSingleResponse(params: any, tools: Tool[]) { + const response = await this.client.messages.create(params); + + // Track tokens + this.totalTokens += response.usage.input_tokens + response.usage.output_tokens; + + // Add assistant response + this.conversation.push({ + role: 'assistant', + content: response.content, + }); + + // Handle tool calls + const toolCalls = response.content.filter( + (block): block is ToolUseBlock => block.type === 'tool_use' + ); + + if (toolCalls.length > 0 && tools.length > 0) { + await this.handleToolCalls(toolCalls, params, tools); + } + + return { + content: this.extractText(response.content), + thinking: this.extractThinking(response.content), + toolCalls, + usage: response.usage, + stopReason: response.stop_reason, + }; + } + + private async handleStreamingResponse(params: any, tools: Tool[]) { + const stream = this.client.messages.stream(params); + + let content = ''; + let thinking = ''; + const toolCalls: ToolUseBlock[] = []; + let finalMessage: Message; + + return new Promise((resolve, reject) => { + stream.on('text', (delta, snapshot) => { + process.stdout.write(delta); + content = snapshot; + }); + + stream.on('thinking', (delta, snapshot) => { + console.log(`[THINKING] ${delta}`); + thinking = snapshot; + }); + + stream.on('contentBlock', (block) => { + if (block.type === 'tool_use') { + toolCalls.push(block); + } + }); + + stream.on('finalMessage', async (message) => { + finalMessage = message; + this.totalTokens += message.usage.input_tokens + message.usage.output_tokens; + + this.conversation.push({ + role: 'assistant', + content: message.content, + }); + + if (toolCalls.length > 0 && tools.length > 0) { + try { + await this.handleToolCalls(toolCalls, params, tools); + } catch (error) { + reject(error); + return; + } + } + + resolve({ + content, + thinking, + toolCalls, + usage: message.usage, + stopReason: message.stop_reason, + }); + }); + + stream.on('error', reject); + }); + } + + private async handleToolCalls(toolCalls: ToolUseBlock[], params: any, tools: Tool[]) { + // Execute tool calls + for (const toolCall of toolCalls) { + const result = await this.executeToolCall(toolCall); + + this.conversation.push({ + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: toolCall.id, + content: result.content, + is_error: result.isError, + }], + }); + } + + // Get response after tool execution + const followUpResponse = await this.client.messages.create({ + ...params, + messages: [...this.conversation], + }); + + this.conversation.push({ + role: 'assistant', + content: followUpResponse.content, + }); + + this.totalTokens += followUpResponse.usage.input_tokens + followUpResponse.usage.output_tokens; + } + + private async executeToolCall(toolCall: ToolUseBlock): Promise<{ content: string; isError: boolean }> { + // Mock tool implementations + const tools = { + calculator: (args: any) => { + try { + const result = eval(args.expression); + return { content: `Result: ${result}`, isError: false }; + } catch (error) { + return { content: `Error: ${error.message}`, isError: true }; + } + }, + weather: (args: any) => { + return { content: `Weather in ${args.location}: 22°C, sunny`, isError: false }; + }, + }; + + const toolName = toolCall.name; + if (toolName in tools) { + return tools[toolName](toolCall.input); + } else { + return { content: `Unknown tool: ${toolName}`, isError: true }; + } + } + + private extractText(content: any[]): string { + return content + .filter(block => block.type === 'text') + .map(block => block.text) + .join('\n'); + } + + private extractThinking(content: any[]): string { + const thinkingBlock = content.find(block => block.type === 'thinking'); + return thinkingBlock?.thinking || ''; + } + + private handleError(error: any) { + if (error instanceof AnthropicError) { + console.error('Anthropic API error:', error.message); + + if (error.status === 429) { + console.log('Rate limited - should retry with backoff'); + } else if (error.status === 401) { + console.log('Authentication failed - check API key'); + } + } else { + console.error('Unexpected error:', error); + } + + throw error; + } + + // Utility methods + getConversationHistory(): MessageParam[] { + return [...this.conversation]; + } + + getTotalTokens(): number { + return this.totalTokens; + } + + clearConversation(): void { + this.conversation = []; + this.totalTokens = 0; + } + + async countTokens(messages: MessageParam[], systemPrompt?: string): Promise { + const params: any = { + model: 'claude-sonnet-4-20250514', + messages, + }; + + if (systemPrompt) { + params.system = systemPrompt; + } + + const result = await this.client.messages.countTokens(params); + return result.input_tokens; + } +} + +// Usage example +async function completeExample() { + const client = new AnthropicClient(process.env.ANTHROPIC_API_KEY!); + + const tools: Tool[] = [ + { + name: 'calculator', + description: 'Perform mathematical calculations', + input_schema: { + type: 'object', + properties: { + expression: { type: 'string' }, + }, + required: ['expression'], + }, + }, + ]; + + // Simple message + let result = await client.sendMessage('Hello, Claude!'); + console.log('Response:', result.content); + + // Message with thinking + result = await client.sendMessage( + 'Solve this complex math problem: What is the optimal way to arrange 10 people around a circular table?', + { thinking: true, maxTokens: 2000 } + ); + console.log('Thinking:', result.thinking); + console.log('Response:', result.content); + + // Streaming with tools + result = await client.sendMessage( + 'Calculate 15 * 23 and explain the steps', + { stream: true, tools, thinking: true } + ); + + console.log('Total tokens used:', client.getTotalTokens()); +} +``` + +## Key Implementation Notes + +1. **Content is Always an Array**: Even simple text messages use the content block system +2. **Error Handling**: The SDK provides specific error types for different HTTP status codes +3. **Streaming Events**: Use MessageStream for easier event handling, or raw streaming for more control +4. **Token Counting**: Use the dedicated countTokens API for accurate estimates +5. **Caching**: Add cache_control to content blocks, not to the message level +6. **Tool Calls**: Always check stop_reason for 'tool_use' and handle the tool execution flow +7. **Thinking**: Requires explicit configuration and sufficient token budget +8. **Abort**: Use AbortController for request cancellation, or MessageStream.abort() for streams + +This guide covers all the essential patterns for working with the Anthropic SDK effectively. \ No newline at end of file diff --git a/packages/ai/gemini-api.md b/packages/ai/gemini-api.md new file mode 100644 index 00000000..6b8ff549 --- /dev/null +++ b/packages/ai/gemini-api.md @@ -0,0 +1,1233 @@ +# Google Gemini SDK Implementation Guide + +This document provides comprehensive implementation guidance for the Google Gemini SDK (`@google/genai`) showing exactly how to implement all required features for our unified AI API. + +## Table of Contents + +1. [Setup and Basic Usage](#setup-and-basic-usage) +2. [Streaming Responses](#streaming-responses) +3. [Aborting Requests](#aborting-requests) +4. [Error Handling](#error-handling) +5. [Stop Reasons](#stop-reasons) +6. [Message History and Serialization](#message-history-and-serialization) +7. [Token Counting](#token-counting) +8. [Context Caching](#context-caching) +9. [Function Calling (Tools)](#function-calling-tools) +10. [System Instructions](#system-instructions) +11. [Parts System for Content](#parts-system-for-content) +12. [Thinking Tokens](#thinking-tokens) +13. [Peculiarities and Gotchas](#peculiarities-and-gotchas) + +## Setup and Basic Usage + +### Installation and Initialization + +```typescript +import { GoogleGenAI, type GenerateContentResponse } from '@google/genai'; + +// Initialize client +const client = new GoogleGenAI({ + apiKey: process.env.GEMINI_API_KEY, + // Optional: Use Vertex AI instead + // vertexai: true, + // project: 'your-project-id', + // location: 'us-central1', +}); + +// Basic non-streaming request +const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + contents: 'Hello, how are you?' +}); + +console.log(response.text); +``` + +### Key Types and Interfaces + +```typescript +// Core types from the SDK +interface GoogleGenAIOptions { + apiKey?: string; + vertexai?: boolean; + project?: string; + location?: string; + apiVersion?: string; +} + +interface Content { + parts?: Part[]; + role?: string; // 'user' | 'model' +} + +interface Part { + text?: string; + thought?: boolean; // For thinking content + functionCall?: FunctionCall; + functionResponse?: FunctionResponse; + inlineData?: Blob; + fileData?: FileData; +} + +interface GenerateContentResponse { + candidates?: Candidate[]; + usageMetadata?: GenerateContentResponseUsageMetadata; + promptFeedback?: GenerateContentResponsePromptFeedback; + text: string | undefined; // Convenience getter +} +``` + +## Streaming Responses + +Gemini supports streaming via `generateContentStream` which returns an `AsyncGenerator`: + +```typescript +async function streamContent() { + const stream = await client.models.generateContentStream({ + model: 'gemini-2.0-flash-exp', + contents: 'Write a short story about a robot.' + }); + + let fullText = ''; + for await (const chunk of stream) { + // Each chunk is a GenerateContentResponse + const chunkText = chunk.text; + if (chunkText) { + fullText += chunkText; + process.stdout.write(chunkText); // Stream to output + } + + // Check for function calls in streaming + if (chunk.candidates?.[0]?.content?.parts) { + for (const part of chunk.candidates[0].content.parts) { + if (part.functionCall) { + console.log('Function call:', part.functionCall); + } + if (part.thought) { + console.log('Thinking:', part.text); + } + } + } + } + + return fullText; +} +``` + +### Streaming with Thinking Tokens + +```typescript +async function streamWithThinking() { + const stream = await client.models.generateContentStream({ + model: 'gemini-2.0-flash-thinking-exp-1219', + contents: 'Solve this math problem: 2x + 5 = 13' + }); + + let thinking = ''; + let response = ''; + + for await (const chunk of stream) { + if (chunk.candidates?.[0]?.content?.parts) { + for (const part of chunk.candidates[0].content.parts) { + if (part.thought && part.text) { + thinking += part.text; + console.log('[THINKING]', part.text); + } else if (part.text && !part.thought) { + response += part.text; + console.log('[RESPONSE]', part.text); + } + } + } + } + + return { thinking, response }; +} +``` + +## Aborting Requests + +Gemini supports request cancellation via `AbortSignal`: + +```typescript +class GeminiClient { + private currentController: AbortController | null = null; + + async generateWithCancellation(prompt: string): Promise { + // Create new abort controller + this.currentController = new AbortController(); + + try { + const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + contents: prompt, + abortSignal: this.currentController.signal + }); + + return response.text || ''; + } catch (error) { + if (error.name === 'AbortError') { + console.log('Request was cancelled'); + throw new Error('Request cancelled by user'); + } + throw error; + } finally { + this.currentController = null; + } + } + + async generateStreamWithCancellation(prompt: string): Promise> { + this.currentController = new AbortController(); + + try { + const stream = await client.models.generateContentStream({ + model: 'gemini-2.0-flash-exp', + contents: prompt, + abortSignal: this.currentController.signal + }); + + return this.processStream(stream); + } catch (error) { + if (error.name === 'AbortError') { + throw new Error('Request cancelled by user'); + } + throw error; + } + } + + private async* processStream(stream: AsyncGenerator): AsyncGenerator { + try { + for await (const chunk of stream) { + if (chunk.text) { + yield chunk.text; + } + } + } catch (error) { + if (error.name === 'AbortError') { + return; // Exit generator cleanly + } + throw error; + } finally { + this.currentController = null; + } + } + + // Cancel current request + cancel(): void { + if (this.currentController) { + this.currentController.abort(); + } + } +} +``` + +## Error Handling + +### Error Types and Handling + +```typescript +import { ApiError } from '@google/genai'; + +interface GeminiErrorInfo { + type: 'rate_limit' | 'auth' | 'invalid_request' | 'network' | 'server' | 'unknown'; + message: string; + statusCode?: number; + retryable: boolean; +} + +function handleGeminiError(error: unknown): GeminiErrorInfo { + if (error instanceof ApiError) { + const statusCode = error.status; + + switch (statusCode) { + case 401: + case 403: + return { + type: 'auth', + message: 'Authentication failed - check API key', + statusCode, + retryable: false + }; + + case 429: + return { + type: 'rate_limit', + message: 'Rate limit exceeded', + statusCode, + retryable: true + }; + + case 400: + return { + type: 'invalid_request', + message: error.message || 'Invalid request parameters', + statusCode, + retryable: false + }; + + case 500: + case 502: + case 503: + case 504: + return { + type: 'server', + message: 'Server error - try again later', + statusCode, + retryable: true + }; + + default: + return { + type: 'unknown', + message: error.message || 'Unknown API error', + statusCode, + retryable: false + }; + } + } + + if (error instanceof Error) { + if (error.name === 'AbortError') { + return { + type: 'network', + message: 'Request was cancelled', + retryable: false + }; + } + + return { + type: 'network', + message: error.message, + retryable: true + }; + } + + return { + type: 'unknown', + message: 'Unknown error occurred', + retryable: false + }; +} + +// Usage with retry logic +async function generateWithRetry(prompt: string, maxRetries = 3): Promise { + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + contents: prompt + }); + + return response.text || ''; + } catch (error) { + const errorInfo = handleGeminiError(error); + + if (!errorInfo.retryable || attempt === maxRetries) { + throw new Error(`${errorInfo.type}: ${errorInfo.message}`); + } + + // Exponential backoff for retryable errors + const delay = Math.pow(2, attempt - 1) * 1000; + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + + throw new Error('Max retries exceeded'); +} +``` + +## Stop Reasons + +Gemini provides finish reasons in the response candidates: + +```typescript +enum FinishReason { + FINISH_REASON_UNSPECIFIED = 'FINISH_REASON_UNSPECIFIED', + STOP = 'STOP', // Natural stop + MAX_TOKENS = 'MAX_TOKENS', // Hit token limit + SAFETY = 'SAFETY', // Safety filter triggered + RECITATION = 'RECITATION', // Recitation filter + LANGUAGE = 'LANGUAGE', // Language not supported + OTHER = 'OTHER' +} + +function extractStopReason(response: GenerateContentResponse): string | null { + const candidate = response.candidates?.[0]; + if (!candidate) return null; + + return candidate.finishReason || null; +} + +// Handle different stop reasons +function handleStopReason(response: GenerateContentResponse): void { + const reason = extractStopReason(response); + + switch (reason) { + case 'STOP': + console.log('Response completed naturally'); + break; + + case 'MAX_TOKENS': + console.log('Response truncated due to token limit'); + break; + + case 'SAFETY': + console.log('Response blocked by safety filters'); + // Check promptFeedback for details + if (response.promptFeedback?.blockReason) { + console.log('Block reason:', response.promptFeedback.blockReason); + } + break; + + case 'RECITATION': + console.log('Response blocked due to recitation concerns'); + break; + + default: + if (reason) { + console.log('Unexpected finish reason:', reason); + } + } +} +``` + +## Message History and Serialization + +### Managing Conversation History + +```typescript +interface SerializableMessage { + role: 'user' | 'model'; + content: string; + functionCalls?: FunctionCall[]; + functionResponses?: FunctionResponse[]; + thinking?: string; +} + +interface SerializableSession { + messages: SerializableMessage[]; + totalUsage: { + promptTokens: number; + candidatesTokens: number; + totalTokens: number; + thoughtsTokens?: number; + }; +} + +class GeminiConversation { + private messages: Content[] = []; + private totalUsage = { + promptTokens: 0, + candidatesTokens: 0, + totalTokens: 0, + thoughtsTokens: 0 + }; + + addUserMessage(text: string): void { + this.messages.push({ + role: 'user', + parts: [{ text }] + }); + } + + addAssistantMessage(response: GenerateContentResponse): void { + const candidate = response.candidates?.[0]; + if (!candidate?.content) return; + + this.messages.push(candidate.content); + + // Update usage + if (response.usageMetadata) { + this.totalUsage.promptTokens += response.usageMetadata.promptTokenCount || 0; + this.totalUsage.candidatesTokens += response.usageMetadata.candidatesTokenCount || 0; + this.totalUsage.totalTokens += response.usageMetadata.totalTokenCount || 0; + this.totalUsage.thoughtsTokens += response.usageMetadata.thoughtsTokenCount || 0; + } + } + + async sendMessage(text: string): Promise { + this.addUserMessage(text); + + const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + contents: this.messages + }); + + this.addAssistantMessage(response); + return response.text || ''; + } + + // Serialize for persistence + serialize(): SerializableSession { + const messages: SerializableMessage[] = []; + + for (const content of this.messages) { + const message: SerializableMessage = { + role: (content.role as 'user' | 'model') || 'user', + content: '', + functionCalls: [], + functionResponses: [], + thinking: '' + }; + + for (const part of content.parts || []) { + if (part.text) { + if (part.thought) { + message.thinking += part.text; + } else { + message.content += part.text; + } + } + if (part.functionCall) { + message.functionCalls!.push(part.functionCall); + } + if (part.functionResponse) { + message.functionResponses!.push(part.functionResponse); + } + } + + messages.push(message); + } + + return { + messages, + totalUsage: { ...this.totalUsage } + }; + } + + // Deserialize from storage + static fromSerialized(session: SerializableSession): GeminiConversation { + const conversation = new GeminiConversation(); + conversation.totalUsage = { ...session.totalUsage }; + + for (const msg of session.messages) { + const parts: Part[] = []; + + if (msg.content) { + parts.push({ text: msg.content }); + } + + if (msg.thinking) { + parts.push({ text: msg.thinking, thought: true }); + } + + for (const funcCall of msg.functionCalls || []) { + parts.push({ functionCall: funcCall }); + } + + for (const funcResp of msg.functionResponses || []) { + parts.push({ functionResponse: funcResp }); + } + + conversation.messages.push({ + role: msg.role, + parts + }); + } + + return conversation; + } +} +``` + +## Token Counting + +### Understanding Gemini Token Usage + +```typescript +interface TokenUsage { + promptTokens: number; + candidatesTokens: number; // Output tokens + totalTokens: number; + thoughtsTokens?: number; // Thinking tokens (reasoning models) + cachedContentTokens?: number; // Cache read tokens +} + +function extractTokenUsage(response: GenerateContentResponse): TokenUsage { + const usage = response.usageMetadata; + + return { + promptTokens: usage?.promptTokenCount || 0, + candidatesTokens: usage?.candidatesTokenCount || 0, + totalTokens: usage?.totalTokenCount || 0, + thoughtsTokens: usage?.thoughtsTokenCount || 0, + cachedContentTokens: usage?.cachedContentTokenCount || 0 + }; +} + +// Count tokens before sending (estimation) +async function countTokens(content: string | Content[]): Promise { + const response = await client.models.computeTokens({ + model: 'gemini-2.0-flash-exp', + contents: typeof content === 'string' + ? [{ parts: [{ text: content }] }] + : content + }); + + return response.totalTokens || 0; +} + +// Token usage accumulation +class TokenTracker { + private usage = { + totalPromptTokens: 0, + totalCandidatesTokens: 0, + totalThoughtsTokens: 0, + totalCachedTokens: 0, + totalRequests: 0 + }; + + addUsage(response: GenerateContentResponse): void { + const tokenUsage = extractTokenUsage(response); + + this.usage.totalPromptTokens += tokenUsage.promptTokens; + this.usage.totalCandidatesTokens += tokenUsage.candidatesTokens; + this.usage.totalThoughtsTokens += tokenUsage.thoughtsTokens || 0; + this.usage.totalCachedTokens += tokenUsage.cachedContentTokens || 0; + this.usage.totalRequests++; + } + + getStats() { + return { + ...this.usage, + totalTokens: this.usage.totalPromptTokens + this.usage.totalCandidatesTokens, + averageTokensPerRequest: this.usage.totalRequests > 0 + ? (this.usage.totalPromptTokens + this.usage.totalCandidatesTokens) / this.usage.totalRequests + : 0 + }; + } +} +``` + +## Context Caching + +Gemini supports context caching to reduce costs for repeated large prompts: + +```typescript +import { type CachedContent } from '@google/genai'; + +class GeminiCache { + async createCache( + systemInstruction: string, + contents: Content[], + ttlHours = 1 + ): Promise { + const cache = await client.caches.create({ + model: 'gemini-2.0-flash-exp', + systemInstruction: { parts: [{ text: systemInstruction }] }, + contents, + ttl: `${ttlHours * 3600}s` // Convert hours to seconds + }); + + return cache; + } + + async generateWithCache( + cachedContent: CachedContent, + userMessage: string + ): Promise { + return await client.models.generateContent({ + model: cachedContent.model || 'gemini-2.0-flash-exp', + cachedContent: cachedContent.name, + contents: [{ + role: 'user', + parts: [{ text: userMessage }] + }] + }); + } + + async listCaches(): Promise { + const caches = []; + for await (const cache of client.caches.list()) { + caches.push(cache); + } + return caches; + } + + async deleteCache(cacheName: string): Promise { + await client.caches.delete({ name: cacheName }); + } + + // Example: Cache a large document for repeated analysis + async createDocumentCache(document: string): Promise { + const systemInstruction = ` + You are a document analysis assistant. The user will provide a large document, + and you should be ready to answer questions about it, summarize it, or extract + information from it. + `; + + const contents = [{ + role: 'user' as const, + parts: [{ text: `Please analyze this document:\n\n${document}` }] + }]; + + return this.createCache(systemInstruction, contents, 24); // Cache for 24 hours + } +} + +// Usage example +async function demonstrateCache() { + const cache = new GeminiCache(); + + // Create cache with large document + const document = "... very large document content ..."; + const cachedContent = await cache.createDocumentCache(document); + + // Now ask questions using the cache (saves tokens!) + const response1 = await cache.generateWithCache( + cachedContent, + "What are the key points in this document?" + ); + + const response2 = await cache.generateWithCache( + cachedContent, + "Can you summarize the conclusions?" + ); + + // Clean up when done + await cache.deleteCache(cachedContent.name!); +} +``` + +## Function Calling (Tools) + +### Basic Function Calling Setup + +```typescript +interface ToolDefinition { + name: string; + description: string; + parameters: { + type: 'object'; + properties: Record; + required: string[]; + }; +} + +// Define tools +const tools: ToolDefinition[] = [{ + name: 'get_weather', + description: 'Get current weather for a location', + parameters: { + type: 'object', + properties: { + location: { + type: 'string', + description: 'City name or location' + }, + units: { + type: 'string', + enum: ['celsius', 'fahrenheit'], + description: 'Temperature units' + } + }, + required: ['location'] + } +}]; + +// Convert to Gemini format +function createGeminiTools(tools: ToolDefinition[]) { + return [{ + functionDeclarations: tools.map(tool => ({ + name: tool.name, + description: tool.description, + parametersJsonSchema: tool.parameters + })) + }]; +} + +// Function call handler +async function executeFunction(functionCall: FunctionCall): Promise { + const { name, args } = functionCall; + const params = typeof args === 'string' ? JSON.parse(args) : args; + + switch (name) { + case 'get_weather': + return await getWeatherData(params.location, params.units); + default: + throw new Error(`Unknown function: ${name}`); + } +} + +// Mock weather function +async function getWeatherData(location: string, units = 'celsius') { + return { + location, + temperature: 22, + conditions: 'sunny', + units + }; +} +``` + +### Complete Function Calling Flow + +```typescript +class GeminiFunctionCalling { + private tools: ToolDefinition[]; + + constructor(tools: ToolDefinition[]) { + this.tools = tools; + } + + async processWithTools(messages: Content[]): Promise { + let currentMessages = [...messages]; + let iterations = 0; + const maxIterations = 5; + + while (iterations < maxIterations) { + const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + contents: currentMessages, + tools: createGeminiTools(this.tools), + toolConfig: { + functionCallingConfig: { + mode: 'AUTO' // Let model decide when to call functions + } + } + }); + + const candidate = response.candidates?.[0]; + if (!candidate?.content) break; + + // Add assistant response to conversation + currentMessages.push(candidate.content); + + // Check for function calls + const functionCalls = this.extractFunctionCalls(candidate.content); + + if (functionCalls.length === 0) { + // No more function calls, return final response + return response.text || ''; + } + + // Execute function calls + for (const functionCall of functionCalls) { + try { + const result = await executeFunction(functionCall); + + // Add function response to conversation + currentMessages.push({ + role: 'user', + parts: [{ + functionResponse: { + name: functionCall.name, + id: functionCall.id, + response: { result } + } + }] + }); + } catch (error) { + // Add error response + currentMessages.push({ + role: 'user', + parts: [{ + functionResponse: { + name: functionCall.name, + id: functionCall.id, + response: { error: error.message } + } + }] + }); + } + } + + iterations++; + } + + throw new Error('Max function calling iterations exceeded'); + } + + private extractFunctionCalls(content: Content): FunctionCall[] { + const calls: FunctionCall[] = []; + + for (const part of content.parts || []) { + if (part.functionCall) { + calls.push(part.functionCall); + } + } + + return calls; + } + + // Streaming version with function calls + async *processStreamWithTools(messages: Content[]): AsyncGenerator<{ + type: 'content' | 'function_call' | 'function_result'; + content?: string; + functionCall?: FunctionCall; + functionResult?: any; + }> { + const stream = await client.models.generateContentStream({ + model: 'gemini-2.0-flash-exp', + contents: messages, + tools: createGeminiTools(this.tools), + toolConfig: { + functionCallingConfig: { mode: 'AUTO' } + } + }); + + let pendingFunctionCalls: FunctionCall[] = []; + + for await (const chunk of stream) { + const candidate = chunk.candidates?.[0]; + if (!candidate?.content) continue; + + for (const part of candidate.content.parts || []) { + if (part.text && !part.thought) { + yield { type: 'content', content: part.text }; + } + + if (part.functionCall) { + pendingFunctionCalls.push(part.functionCall); + yield { type: 'function_call', functionCall: part.functionCall }; + } + } + } + + // Execute any pending function calls + for (const functionCall of pendingFunctionCalls) { + try { + const result = await executeFunction(functionCall); + yield { type: 'function_result', functionResult: result }; + } catch (error) { + yield { + type: 'function_result', + functionResult: { error: error.message } + }; + } + } + } +} +``` + +## System Instructions + +Gemini handles system instructions differently from other providers: + +```typescript +// System instruction is a separate parameter, not part of messages +async function generateWithSystemInstruction( + systemPrompt: string, + userMessage: string +): Promise { + const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + systemInstruction: { + parts: [{ text: systemPrompt }] + }, + contents: [{ + role: 'user', + parts: [{ text: userMessage }] + }] + }); + + return response.text || ''; +} + +// For conversation with system instruction +class GeminiConversationWithSystem { + private systemInstruction: Content; + private messages: Content[] = []; + + constructor(systemPrompt: string) { + this.systemInstruction = { + parts: [{ text: systemPrompt }] + }; + } + + async sendMessage(text: string): Promise { + this.messages.push({ + role: 'user', + parts: [{ text }] + }); + + const response = await client.models.generateContent({ + model: 'gemini-2.0-flash-exp', + systemInstruction: this.systemInstruction, + contents: this.messages + }); + + const candidate = response.candidates?.[0]; + if (candidate?.content) { + this.messages.push(candidate.content); + } + + return response.text || ''; + } + + updateSystemInstruction(newPrompt: string): void { + this.systemInstruction = { + parts: [{ text: newPrompt }] + }; + } +} +``` + +## Parts System for Content + +Understanding Gemini's parts-based content system: + +```typescript +// Text content +const textPart: Part = { + text: 'Hello, world!' +}; + +// Thinking content (for reasoning models) +const thinkingPart: Part = { + text: 'Let me think about this problem...', + thought: true +}; + +// Function call +const functionCallPart: Part = { + functionCall: { + name: 'get_weather', + args: { location: 'San Francisco' } + } +}; + +// Function response +const functionResponsePart: Part = { + functionResponse: { + name: 'get_weather', + response: { temperature: 72, conditions: 'sunny' } + } +}; + +// Image data (inline) +const imagePart: Part = { + inlineData: { + mimeType: 'image/jpeg', + data: 'base64-encoded-image-data' + } +}; + +// File reference +const filePart: Part = { + fileData: { + mimeType: 'image/jpeg', + fileUri: 'gs://bucket/image.jpg' + } +}; + +// Creating multi-part content +const multiPartContent: Content = { + role: 'user', + parts: [ + { text: 'What is in this image?' }, + { + inlineData: { + mimeType: 'image/jpeg', + data: await imageToBase64('path/to/image.jpg') + } + } + ] +}; + +// Utility functions for parts +function createTextPart(text: string): Part { + return { text }; +} + +function createThinkingPart(text: string): Part { + return { text, thought: true }; +} + +function createImagePart(imageData: string, mimeType: string): Part { + return { + inlineData: { + mimeType, + data: imageData + } + }; +} + +async function imageToBase64(filePath: string): Promise { + const fs = await import('fs/promises'); + const buffer = await fs.readFile(filePath); + return buffer.toString('base64'); +} +``` + +## Thinking Tokens + +Gemini thinking models (like `gemini-2.0-flash-thinking-exp-1219`) provide reasoning traces: + +```typescript +interface ThinkingExtractor { + thinking: string; + response: string; + thinkingTokens: number; + responseTokens: number; +} + +function extractThinking(response: GenerateContentResponse): ThinkingExtractor { + let thinking = ''; + let responseText = ''; + + const candidate = response.candidates?.[0]; + if (candidate?.content?.parts) { + for (const part of candidate.content.parts) { + if (part.text) { + if (part.thought) { + thinking += part.text; + } else { + responseText += part.text; + } + } + } + } + + const usage = response.usageMetadata; + + return { + thinking, + response: responseText, + thinkingTokens: usage?.thoughtsTokenCount || 0, + responseTokens: usage?.candidatesTokenCount || 0 + }; +} + +// Streaming thinking extraction +async function streamWithThinkingExtraction(prompt: string) { + const stream = await client.models.generateContentStream({ + model: 'gemini-2.0-flash-thinking-exp-1219', + contents: prompt + }); + + let thinkingContent = ''; + let responseContent = ''; + + for await (const chunk of stream) { + const candidate = chunk.candidates?.[0]; + if (!candidate?.content?.parts) continue; + + for (const part of candidate.content.parts) { + if (part.text) { + if (part.thought) { + thinkingContent += part.text; + console.log('[THINKING DELTA]', part.text); + } else { + responseContent += part.text; + console.log('[RESPONSE DELTA]', part.text); + } + } + } + } + + return { + thinking: thinkingContent, + response: responseContent + }; +} + +// Enable thinking for models that support it +async function generateWithThinking(prompt: string, model = 'gemini-2.0-flash-thinking-exp-1219') { + const response = await client.models.generateContent({ + model, + contents: prompt + }); + + return extractThinking(response); +} +``` + +## Peculiarities and Gotchas + +### Key Differences from Other APIs + +1. **System Instructions**: Separate parameter, not part of message history +2. **Parts-based Content**: Content is split into parts, each with specific types +3. **Thinking Detection**: Must check `part.thought` flag to identify reasoning content +4. **Function Calls**: Embedded in parts, not separate message types +5. **Role Names**: Uses 'model' instead of 'assistant' for AI responses +6. **Streaming**: Returns full `GenerateContentResponse` objects, not deltas + +### Common Pitfalls + +```typescript +// āŒ Wrong: Treating text as complete response +const response = await client.models.generateContent({...}); +console.log(response.candidates[0].content.parts[0].text); // May miss other parts + +// āœ… Correct: Use convenience getter or process all parts +console.log(response.text); // Concatenates all text parts automatically + +// āŒ Wrong: Mixing system instruction with messages +const messages = [ + { role: 'system', parts: [{ text: 'You are helpful' }] }, // Not supported + { role: 'user', parts: [{ text: 'Hello' }] } +]; + +// āœ… Correct: Separate system instruction +const response = await client.models.generateContent({ + systemInstruction: { parts: [{ text: 'You are helpful' }] }, + contents: [{ role: 'user', parts: [{ text: 'Hello' }] }] +}); + +// āŒ Wrong: Assuming single part responses +for await (const chunk of stream) { + console.log(chunk.text); // May miss function calls or thinking +} + +// āœ… Correct: Process all parts +for await (const chunk of stream) { + const candidate = chunk.candidates?.[0]; + if (candidate?.content?.parts) { + for (const part of candidate.content.parts) { + if (part.text && !part.thought) { + console.log('[RESPONSE]', part.text); + } else if (part.text && part.thought) { + console.log('[THINKING]', part.text); + } else if (part.functionCall) { + console.log('[FUNCTION CALL]', part.functionCall); + } + } + } +} +``` + +### Performance Tips + +1. **Use streaming** for better user experience with long responses +2. **Cache large prompts** to reduce token costs +3. **Batch token counting** when possible +4. **Set appropriate `abortSignal` timeouts** for long-running requests +5. **Handle function calls efficiently** to avoid timeout issues + +### Model-Specific Behaviors + +```typescript +// Different models have different capabilities +const modelCapabilities = { + 'gemini-2.0-flash-exp': { + thinking: false, + functionCalling: true, + vision: true, + maxTokens: 1000000 + }, + 'gemini-2.0-flash-thinking-exp-1219': { + thinking: true, + functionCalling: true, + vision: true, + maxTokens: 32768 + }, + 'gemini-1.5-pro': { + thinking: false, + functionCalling: true, + vision: true, + maxTokens: 2000000 + } +}; + +// Check model capabilities before using features +function supportsThinking(model: string): boolean { + return model.includes('thinking'); +} + +function getMaxTokens(model: string): number { + return modelCapabilities[model]?.maxTokens || 32768; +} +``` + +This comprehensive guide covers all the essential aspects of implementing Gemini API features. The key is understanding Gemini's parts-based content system and properly handling the different types of content (text, thinking, function calls) that can appear in responses. \ No newline at end of file diff --git a/packages/ai/openai-api.md b/packages/ai/openai-api.md new file mode 100644 index 00000000..cefe9ac3 --- /dev/null +++ b/packages/ai/openai-api.md @@ -0,0 +1,2320 @@ +# OpenAI SDK Implementation Guide + +This document provides a comprehensive guide to implementing the required features using the OpenAI SDK v5.12.2. All examples are based on actual usage patterns from the pi-mono codebase and include real TypeScript types from the SDK. + +## Table of Contents + +1. [Basic Setup](#basic-setup) +2. [Streaming Responses](#streaming-responses) +3. [Aborting Requests](#aborting-requests) +4. [Error Handling](#error-handling) +5. [Stop Reasons](#stop-reasons) +6. [Message History & Serialization](#message-history--serialization) +7. [Token Counting](#token-counting) +8. [Caching](#caching) +9. [Chat Completions vs Responses API](#chat-completions-vs-responses-api) +10. [Tool/Function Calling](#toolfunction-calling) +11. [System Prompts](#system-prompts) +12. [Provider-Specific Features](#provider-specific-features) +13. [Complete Implementation Examples](#complete-implementation-examples) + +## Basic Setup + +```typescript +import OpenAI from "openai"; + +// Basic client setup +const client = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + baseURL: "https://api.openai.com/v1", // Optional, default shown +}); + +// For other providers (Groq, Anthropic OpenAI-compatible, etc.) +const groqClient = new OpenAI({ + apiKey: process.env.GROQ_API_KEY, + baseURL: "https://api.groq.com/openai/v1", +}); +``` + +### Client Configuration Options + +```typescript +interface ClientOptions { + apiKey?: string; + baseURL?: string; + timeout?: number; // Request timeout in milliseconds + maxRetries?: number; // Number of retry attempts + defaultHeaders?: Record; + defaultQuery?: Record; +} +``` + +## Streaming Responses + +### Chat Completions Streaming + +```typescript +import type { + ChatCompletionChunk, + ChatCompletionCreateParamsStreaming +} from "openai/resources/chat/completions"; +import { Stream } from "openai/core/streaming"; + +async function streamChatCompletion() { + const params: ChatCompletionCreateParamsStreaming = { + model: "gpt-4o", + messages: [ + { role: "user", content: "Tell me a story" } + ], + stream: true, + max_completion_tokens: 1000, + }; + + const stream: Stream = await client.chat.completions.create(params); + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta; + + if (delta?.content) { + process.stdout.write(delta.content); + } + + if (delta?.tool_calls) { + console.log("Tool call delta:", delta.tool_calls); + } + + if (chunk.choices[0]?.finish_reason) { + console.log("\nFinish reason:", chunk.choices[0].finish_reason); + } + } +} +``` + +### Responses API Streaming + +```typescript +import type { + ResponseCreateParamsStreaming, + ResponseStreamEvent +} from "openai/resources/responses"; + +async function streamResponsesAPI() { + const params: ResponseCreateParamsStreaming = { + model: "o1-mini", + input: [ + { + role: "user", + content: [{ type: "input_text", text: "Solve this math problem: 2x + 5 = 11" }] + } + ], + stream: true, + max_output_tokens: 2000, + reasoning: { + effort: "low", + summary: "detailed" + } + }; + + const stream: Stream = await client.responses.create(params); + + for await (const event of stream) { + switch (event.type) { + case "response.reasoning.text.delta": + // Reasoning/thinking tokens (o1/o3) + process.stdout.write(`[thinking] ${event.delta}`); + break; + + case "response.text.delta": + // Output content + process.stdout.write(event.delta); + break; + + case "response.function_call.arguments.delta": + // Tool call arguments being built + console.log("Tool call delta:", event.delta); + break; + + case "response.completed": + console.log("\nResponse completed"); + break; + } + } +} +``` + +### Streaming Patterns + +```typescript +// Pattern 1: Simple content streaming +async function simpleStream(messages: any[]) { + const stream = await client.chat.completions.create({ + model: "gpt-4o", + messages, + stream: true, + }); + + let fullContent = ""; + for await (const chunk of stream) { + const content = chunk.choices[0]?.delta?.content || ""; + fullContent += content; + process.stdout.write(content); + } + + return fullContent; +} + +// Pattern 2: Event-driven streaming with handlers +interface StreamHandlers { + onContent?: (delta: string) => void; + onToolCall?: (toolCall: any) => void; + onFinish?: (reason: string) => void; +} + +async function eventDrivenStream(messages: any[], handlers: StreamHandlers) { + const stream = await client.chat.completions.create({ + model: "gpt-4o", + messages, + stream: true, + }); + + for await (const chunk of stream) { + const choice = chunk.choices[0]; + if (!choice) continue; + + if (choice.delta?.content) { + handlers.onContent?.(choice.delta.content); + } + + if (choice.delta?.tool_calls) { + handlers.onToolCall?.(choice.delta.tool_calls); + } + + if (choice.finish_reason) { + handlers.onFinish?.(choice.finish_reason); + } + } +} +``` + +## Aborting Requests + +### Using AbortController + +```typescript +class AbortableClient { + private client: OpenAI; + private abortController: AbortController | null = null; + + constructor(config: { apiKey: string; baseURL?: string }) { + this.client = new OpenAI(config); + } + + async askWithAbort(message: string): Promise { + // Create new AbortController for this request + this.abortController = new AbortController(); + + try { + const response = await this.client.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: message }], + max_completion_tokens: 1000, + }, { + signal: this.abortController.signal // Pass abort signal + }); + + return response.choices[0]?.message?.content || ""; + } catch (error) { + if (this.abortController.signal.aborted) { + throw new Error("Request was interrupted"); + } + throw error; + } finally { + this.abortController = null; + } + } + + // Call this to abort the current request + interrupt(): void { + this.abortController?.abort(); + } +} + +// Usage example +const abortableClient = new AbortableClient({ + apiKey: process.env.OPENAI_API_KEY! +}); + +// Start request +const responsePromise = abortableClient.askWithAbort("Write a long essay"); + +// Abort after 5 seconds +setTimeout(() => { + abortableClient.interrupt(); +}, 5000); + +try { + const response = await responsePromise; + console.log(response); +} catch (error) { + console.log("Request was aborted:", error.message); +} +``` + +### Aborting Streaming Requests + +```typescript +async function abortableStream(messages: any[]) { + const abortController = new AbortController(); + + // Abort after 10 seconds + const timeoutId = setTimeout(() => { + abortController.abort(); + }, 10000); + + try { + const stream = await client.chat.completions.create({ + model: "gpt-4o", + messages, + stream: true, + }, { + signal: abortController.signal + }); + + for await (const chunk of stream) { + // Check if aborted before processing each chunk + if (abortController.signal.aborted) { + break; + } + + const content = chunk.choices[0]?.delta?.content; + if (content) { + process.stdout.write(content); + } + } + } catch (error) { + if (abortController.signal.aborted) { + console.log("\nStream was aborted"); + } else { + throw error; + } + } finally { + clearTimeout(timeoutId); + } +} +``` + +## Error Handling + +### Error Types from OpenAI SDK + +```typescript +import { + OpenAIError, + APIError, + APIConnectionError, + APIConnectionTimeoutError, + APIUserAbortError, + AuthenticationError, + BadRequestError, + RateLimitError, + InternalServerError, + NotFoundError, + PermissionDeniedError, + UnprocessableEntityError +} from "openai"; + +// Comprehensive error handler +async function handleAPICall(apiCall: () => Promise): Promise { + try { + return await apiCall(); + } catch (error) { + if (error instanceof APIUserAbortError) { + console.log("Request was aborted by user"); + throw new Error("Request interrupted"); + } + + if (error instanceof AuthenticationError) { + console.error("Authentication failed:", error.message); + throw new Error("Invalid API key"); + } + + if (error instanceof RateLimitError) { + console.error("Rate limit exceeded:", error.message); + // Could implement exponential backoff here + throw new Error("Rate limited - try again later"); + } + + if (error instanceof APIConnectionError) { + console.error("Connection error:", error.message); + throw new Error("Network connection failed"); + } + + if (error instanceof APIConnectionTimeoutError) { + console.error("Request timeout:", error.message); + throw new Error("Request timed out"); + } + + if (error instanceof BadRequestError) { + console.error("Bad request:", error.message); + console.error("Error details:", error.error); + throw new Error(`Invalid request: ${error.message}`); + } + + if (error instanceof UnprocessableEntityError) { + console.error("Unprocessable entity:", error.message); + throw new Error(`Validation error: ${error.message}`); + } + + if (error instanceof APIError) { + console.error(`API Error ${error.status}:`, error.message); + console.error("Error code:", error.code); + console.error("Error type:", error.type); + throw new Error(`API error: ${error.message}`); + } + + if (error instanceof OpenAIError) { + console.error("OpenAI SDK error:", error.message); + throw new Error(`SDK error: ${error.message}`); + } + + // Unknown error + console.error("Unexpected error:", error); + throw error; + } +} + +// Usage with retry logic +async function apiCallWithRetry( + apiCall: () => Promise, + maxRetries: number = 3 +): Promise { + let lastError: Error; + + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + return await handleAPICall(apiCall); + } catch (error) { + lastError = error as Error; + + // Don't retry on certain errors + if (error instanceof AuthenticationError || + error instanceof BadRequestError || + error instanceof APIUserAbortError) { + throw error; + } + + // Exponential backoff for retryable errors + if (attempt < maxRetries - 1) { + const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + } + + throw lastError!; +} +``` + +### Error Context Extraction + +```typescript +function extractErrorDetails(error: unknown): { + message: string; + code?: string; + type?: string; + status?: number; + retryable: boolean; +} { + if (error instanceof APIError) { + return { + message: error.message, + code: error.code || undefined, + type: error.type, + status: error.status, + retryable: error instanceof RateLimitError || + error instanceof APIConnectionError || + error instanceof InternalServerError + }; + } + + if (error instanceof APIUserAbortError) { + return { + message: "Request was aborted", + retryable: false + }; + } + + if (error instanceof OpenAIError) { + return { + message: error.message, + retryable: false + }; + } + + return { + message: error instanceof Error ? error.message : "Unknown error", + retryable: false + }; +} +``` + +## Stop Reasons + +### Chat Completions Stop Reasons + +```typescript +type ChatCompletionFinishReason = + | "stop" // Natural stopping point or stop sequence + | "length" // Maximum token limit reached + | "content_filter" // Content filtered + | "tool_calls" // Model wants to call tools + | "function_call"; // Legacy function calling + +async function handleStopReasons() { + const response = await client.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: "Hello" }], + max_completion_tokens: 10, // Low limit to trigger "length" stop + stop: ["END"], // Custom stop sequence + }); + + const choice = response.choices[0]; + const finishReason = choice.finish_reason; + + switch (finishReason) { + case "stop": + console.log("Completed naturally or hit stop sequence"); + break; + + case "length": + console.log("Hit token limit - response may be incomplete"); + // Could request more tokens or continue conversation + break; + + case "content_filter": + console.log("Content was filtered"); + break; + + case "tool_calls": + console.log("Model wants to call tools"); + // Handle tool calls (see Tool Calling section) + break; + + default: + console.log("Unknown finish reason:", finishReason); + } + + return { + content: choice.message.content, + finishReason, + complete: finishReason === "stop" + }; +} +``` + +### Responses API Stop Reasons + +```typescript +// Responses API uses different event types to indicate completion +async function handleResponsesStopReasons() { + const response = await client.responses.create({ + model: "o1-mini", + input: [{ role: "user", content: [{ type: "input_text", text: "Hello" }] }], + max_output_tokens: 100, + }); + + for (const item of response.output || []) { + switch (item.type) { + case "message": + // Check for refusal in content + for (const content of item.content || []) { + if (content.type === "refusal") { + console.log("Response was refused:", content.refusal); + } else if (content.type === "output_text") { + console.log("Response completed normally"); + } + } + break; + + case "function_call": + console.log("Tool call requested"); + break; + } + } +} +``` + +### Streaming Stop Reason Detection + +```typescript +async function streamWithStopReasonHandling() { + const stream = await client.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: "Count to 10" }], + stream: true, + max_completion_tokens: 50, + }); + + let content = ""; + let finishReason: string | null = null; + + for await (const chunk of stream) { + const choice = chunk.choices[0]; + if (!choice) continue; + + if (choice.delta?.content) { + content += choice.delta.content; + process.stdout.write(choice.delta.content); + } + + if (choice.finish_reason) { + finishReason = choice.finish_reason; + break; + } + } + + console.log(`\nStreaming finished. Reason: ${finishReason}`); + + if (finishReason === "length") { + console.log("Response was cut off due to token limit"); + // Could continue the conversation to get the rest + } + + return { content, finishReason }; +} +``` + +## Message History & Serialization + +### Message Types and Formats + +```typescript +// Chat Completions message format +interface ChatMessage { + role: "system" | "user" | "assistant" | "tool" | "developer"; + content: string | null; + name?: string; + tool_calls?: Array<{ + id: string; + type: "function"; + function: { + name: string; + arguments: string; + }; + }>; + tool_call_id?: string; // For tool response messages +} + +// Responses API message format +interface ResponseMessage { + role: "user" | "developer"; + content: Array<{ + type: "input_text" | "input_image" | "input_audio"; + text?: string; + image?: { url: string }; + audio?: { data: string }; + }>; +} + +// Unified conversation history +interface ConversationHistory { + api: "completions" | "responses"; + model: string; + systemPrompt?: string; + messages: any[]; // API-specific format + totalTokens: number; + metadata: { + created: number; + lastUpdated: number; + provider: string; + }; +} +``` + +### Serialization Implementation + +```typescript +class ConversationManager { + private messages: any[] = []; + private api: "completions" | "responses"; + private systemPrompt?: string; + private totalTokens = 0; + + constructor(api: "completions" | "responses", systemPrompt?: string) { + this.api = api; + this.systemPrompt = systemPrompt; + + if (systemPrompt) { + if (api === "completions") { + this.messages.push({ role: "system", content: systemPrompt }); + } else { + this.messages.push({ role: "developer", content: systemPrompt }); + } + } + } + + addUserMessage(content: string) { + if (this.api === "completions") { + this.messages.push({ role: "user", content }); + } else { + this.messages.push({ + role: "user", + content: [{ type: "input_text", text: content }] + }); + } + } + + addAssistantMessage(content: string) { + if (this.api === "completions") { + this.messages.push({ role: "assistant", content }); + } else { + this.messages.push({ + type: "message", + content: [{ type: "output_text", text: content }] + }); + } + } + + addToolCall(id: string, name: string, args: string) { + if (this.api === "completions") { + // Add assistant message with tool calls + this.messages.push({ + role: "assistant", + content: null, + tool_calls: [{ + id, + type: "function" as const, + function: { name, arguments: args } + }] + }); + } else { + // Add function call to responses format + this.messages.push({ + type: "function_call", + call_id: id, + name, + arguments: args + }); + } + } + + addToolResult(id: string, result: string) { + if (this.api === "completions") { + this.messages.push({ + role: "tool", + tool_call_id: id, + content: result + }); + } else { + this.messages.push({ + type: "function_call_output", + call_id: id, + output: result + }); + } + } + + // Serialize to JSON + serialize(): string { + const data: ConversationHistory = { + api: this.api, + model: "unknown", // Set externally + systemPrompt: this.systemPrompt, + messages: this.messages, + totalTokens: this.totalTokens, + metadata: { + created: Date.now(), + lastUpdated: Date.now(), + provider: "openai" + } + }; + return JSON.stringify(data, null, 2); + } + + // Deserialize from JSON + static deserialize(json: string): ConversationManager { + const data: ConversationHistory = JSON.parse(json); + const manager = new ConversationManager(data.api, data.systemPrompt); + manager.messages = data.messages; + manager.totalTokens = data.totalTokens; + return manager; + } + + getMessages() { + return this.messages; + } + + updateTokenUsage(tokens: number) { + this.totalTokens += tokens; + } +} + +// Usage example +const conversation = new ConversationManager("completions", "You are a helpful assistant"); +conversation.addUserMessage("Hello"); +conversation.addAssistantMessage("Hi there!"); +conversation.updateTokenUsage(25); + +// Save to file +const serialized = conversation.serialize(); +await fs.writeFile("conversation.json", serialized); + +// Load from file +const loaded = await fs.readFile("conversation.json", "utf-8"); +const restored = ConversationManager.deserialize(loaded); +``` + +### Event-Based History Reconstruction + +```typescript +// From pi-agent codebase - reconstruct messages from events +type AgentEvent = + | { type: "user_message"; text: string } + | { type: "assistant_message"; text: string } + | { type: "tool_call"; toolCallId: string; name: string; args: string } + | { type: "tool_result"; toolCallId: string; result: string; isError: boolean } + | { type: "reasoning"; text: string } + | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number }; + +function reconstructMessagesFromEvents( + events: AgentEvent[], + api: "completions" | "responses", + systemPrompt?: string +): any[] { + const messages: any[] = []; + + // Add system prompt + if (systemPrompt) { + if (api === "completions") { + messages.push({ role: "system", content: systemPrompt }); + } else { + messages.push({ role: "developer", content: systemPrompt }); + } + } + + if (api === "responses") { + // Responses API format reconstruction + for (const event of events) { + switch (event.type) { + case "user_message": + messages.push({ + role: "user", + content: [{ type: "input_text", text: event.text }] + }); + break; + + case "reasoning": + messages.push({ + type: "reasoning", + content: [{ type: "reasoning_text", text: event.text }] + }); + break; + + case "tool_call": + messages.push({ + type: "function_call", + call_id: event.toolCallId, + name: event.name, + arguments: event.args + }); + break; + + case "tool_result": + messages.push({ + type: "function_call_output", + call_id: event.toolCallId, + output: event.result + }); + break; + + case "assistant_message": + messages.push({ + type: "message", + content: [{ type: "output_text", text: event.text }] + }); + break; + } + } + } else { + // Chat Completions format reconstruction + let pendingToolCalls: any[] = []; + + for (const event of events) { + switch (event.type) { + case "user_message": + messages.push({ role: "user", content: event.text }); + break; + + case "tool_call": + pendingToolCalls.push({ + id: event.toolCallId, + type: "function", + function: { + name: event.name, + arguments: event.args + } + }); + break; + + case "tool_result": + // Add assistant message with tool calls when we see first result + if (pendingToolCalls.length > 0) { + messages.push({ + role: "assistant", + content: null, + tool_calls: pendingToolCalls + }); + pendingToolCalls = []; + } + + messages.push({ + role: "tool", + tool_call_id: event.toolCallId, + content: event.result + }); + break; + + case "assistant_message": + messages.push({ role: "assistant", content: event.text }); + break; + } + } + } + + return messages; +} +``` + +## Token Counting + +### Usage Types from OpenAI SDK + +```typescript +// Chat Completions usage +interface CompletionUsage { + completion_tokens: number; + prompt_tokens: number; + total_tokens: number; + completion_tokens_details?: { + reasoning_tokens?: number; // o1/o3 reasoning tokens + cached_tokens?: number; + }; + prompt_tokens_details?: { + cached_tokens?: number; + }; +} + +// Responses API usage +interface ResponseUsage { + input_tokens: number; + output_tokens: number; + total_tokens: number; + input_tokens_details: { + cached_tokens?: number; + }; + output_tokens_details: { + reasoning_tokens?: number; // o1/o3 reasoning tokens + }; +} +``` + +### Token Counting Implementation + +```typescript +interface TokenUsage { + inputTokens: number; + outputTokens: number; + totalTokens: number; + reasoningTokens: number; + cacheReadTokens: number; + cacheWriteTokens: number; +} + +class TokenCounter { + private totalUsage: TokenUsage = { + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + reasoningTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0 + }; + + // Extract tokens from Chat Completions response + extractChatCompletionUsage(usage?: CompletionUsage): TokenUsage | null { + if (!usage) return null; + + const extracted: TokenUsage = { + inputTokens: usage.prompt_tokens || 0, + outputTokens: usage.completion_tokens || 0, + totalTokens: usage.total_tokens || 0, + reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0, + cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0, + cacheWriteTokens: 0 // Not available in this format + }; + + this.addUsage(extracted); + return extracted; + } + + // Extract tokens from Responses API response + extractResponseUsage(usage?: ResponseUsage): TokenUsage | null { + if (!usage) return null; + + const extracted: TokenUsage = { + inputTokens: usage.input_tokens || 0, + outputTokens: usage.output_tokens || 0, + totalTokens: usage.total_tokens || 0, + reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0, + cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0, + cacheWriteTokens: 0 // Not available in current API + }; + + this.addUsage(extracted); + return extracted; + } + + private addUsage(usage: TokenUsage) { + this.totalUsage.inputTokens += usage.inputTokens; + this.totalUsage.outputTokens += usage.outputTokens; + this.totalUsage.totalTokens += usage.totalTokens; + this.totalUsage.reasoningTokens += usage.reasoningTokens; + this.totalUsage.cacheReadTokens += usage.cacheReadTokens; + this.totalUsage.cacheWriteTokens += usage.cacheWriteTokens; + } + + getTotalUsage(): TokenUsage { + return { ...this.totalUsage }; + } + + reset() { + this.totalUsage = { + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + reasoningTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0 + }; + } + + // Format for display + formatUsage(usage?: TokenUsage): string { + const u = usage || this.totalUsage; + let parts = [`↑${u.inputTokens}`, `↓${u.outputTokens}`]; + + if (u.reasoningTokens > 0) { + parts.push(`⚔${u.reasoningTokens}`); + } + + if (u.cacheReadTokens > 0) { + parts.push(`šŸ“–${u.cacheReadTokens}`); + } + + if (u.cacheWriteTokens > 0) { + parts.push(`šŸ“${u.cacheWriteTokens}`); + } + + return parts.join(" "); + } +} + +// Usage with streaming +async function countTokensInStream() { + const tokenCounter = new TokenCounter(); + + const stream = await client.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: "Tell me about AI" }], + stream: true, + stream_options: { include_usage: true } // Important for token counts + }); + + for await (const chunk of stream) { + // Token usage comes in final chunk when stream_options.include_usage = true + if (chunk.usage) { + const usage = tokenCounter.extractChatCompletionUsage(chunk.usage); + console.log("Token usage:", tokenCounter.formatUsage(usage)); + } + } + + console.log("Total usage:", tokenCounter.formatUsage()); +} +``` + +### Token Estimation (for planning) + +```typescript +// Rough token estimation for planning purposes +function estimateTokens(text: string): number { + // Very rough approximation: ~4 characters per token for English + return Math.ceil(text.length / 4); +} + +function estimateMessageTokens(messages: any[]): number { + let total = 0; + + for (const message of messages) { + if (typeof message.content === "string") { + total += estimateTokens(message.content); + } else if (Array.isArray(message.content)) { + for (const content of message.content) { + if (content.text) { + total += estimateTokens(content.text); + } + } + } + + // Add overhead for message formatting + total += 10; + } + + return total; +} + +// Check if request will fit in context window +function checkContextLimit(messages: any[], maxTokens: number = 128000): boolean { + const estimated = estimateMessageTokens(messages); + const safetyMargin = 1000; // Reserve tokens for response + + return estimated + safetyMargin < maxTokens; +} +``` + +## Caching + +### Cache Headers and Configuration + +```typescript +// OpenAI supports prompt caching via special message formatting +// Cache is automatically used when messages are repeated + +async function demonstrateCaching() { + const longSystemPrompt = ` + You are an expert software engineer with deep knowledge of TypeScript, React, Node.js... + [Very long system prompt - 1000+ tokens] + `; + + // First request - will cache the system prompt + const response1 = await client.chat.completions.create({ + model: "gpt-4o", + messages: [ + { role: "system", content: longSystemPrompt }, + { role: "user", content: "Explain TypeScript generics" } + ] + }); + + console.log("First request usage:", response1.usage); + + // Second request with same system prompt - will use cache + const response2 = await client.chat.completions.create({ + model: "gpt-4o", + messages: [ + { role: "system", content: longSystemPrompt }, // Cached + { role: "user", content: "Explain React hooks" } + ] + }); + + console.log("Second request usage:", response2.usage); + console.log("Cache read tokens:", response2.usage?.prompt_tokens_details?.cached_tokens); +} +``` + +### Manual Cache Control + +```typescript +// For providers that support explicit cache control +interface CacheConfig { + enabled: boolean; + ttl?: number; // Time to live in seconds +} + +class CachedClient { + private client: OpenAI; + private cache = new Map(); + + constructor(apiKey: string, baseURL?: string) { + this.client = new OpenAI({ apiKey, baseURL }); + } + + private getCacheKey(messages: any[], model: string): string { + return JSON.stringify({ messages, model }); + } + + private isCacheValid(entry: { timestamp: number; ttl: number }): boolean { + return Date.now() - entry.timestamp < entry.ttl * 1000; + } + + async completionWithCache( + messages: any[], + model: string, + cacheConfig: CacheConfig = { enabled: true, ttl: 3600 } + ) { + if (cacheConfig.enabled) { + const cacheKey = this.getCacheKey(messages, model); + const cached = this.cache.get(cacheKey); + + if (cached && this.isCacheValid(cached)) { + console.log("Cache hit"); + return cached.response; + } + } + + const response = await this.client.chat.completions.create({ + model, + messages + }); + + if (cacheConfig.enabled) { + const cacheKey = this.getCacheKey(messages, model); + this.cache.set(cacheKey, { + response, + timestamp: Date.now(), + ttl: cacheConfig.ttl || 3600 + }); + } + + return response; + } + + clearCache() { + this.cache.clear(); + } +} +``` + +## Chat Completions vs Responses API + +### When to Use Each API + +```typescript +// Chat Completions API - Traditional conversational interface +// Use for: Most general chat/completion tasks +interface ChatCompletionsUseCase { + // āœ… Good for: + // - Regular conversations + // - Function/tool calling + // - Most models (gpt-4o, claude, gemini via compatibility) + // - Streaming text generation + // - File uploads and vision + + // āŒ Limitations: + // - No access to reasoning/thinking tokens for o1/o3 + // - Less structured for complex workflows +} + +// Responses API - Structured response interface +// Use for: Complex reasoning tasks, tool workflows +interface ResponsesAPIUseCase { + // āœ… Good for: + // - o1/o3 models with reasoning access + // - Complex tool calling workflows + // - Structured output requirements + // - Background processing + // - Access to reasoning tokens + + // āŒ Limitations: + // - Newer API with less ecosystem support + // - More complex message format + // - Not all models supported +} +``` + +### API Decision Logic + +```typescript +function selectAPI( + model: string, + requiresReasoning: boolean, + hasComplexTools: boolean +): "completions" | "responses" { + // Use Responses API for o1/o3 when reasoning is needed + if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) { + return "responses"; + } + + // Use Responses API for complex tool workflows + if (hasComplexTools && model.includes("gpt-4")) { + return "responses"; + } + + // Default to Chat Completions for broader compatibility + return "completions"; +} + +// Usage example +const model = "o1-mini"; +const needsReasoning = true; +const api = selectAPI(model, needsReasoning, false); + +if (api === "responses") { + console.log("Using Responses API for reasoning access"); +} else { + console.log("Using Chat Completions API for compatibility"); +} +``` + +### Dual API Client + +```typescript +class DualAPIClient { + private client: OpenAI; + + constructor(apiKey: string, baseURL?: string) { + this.client = new OpenAI({ apiKey, baseURL }); + } + + async complete(params: { + model: string; + messages: any[]; + tools?: any[]; + maxTokens?: number; + temperature?: number; + stream?: boolean; + reasoning?: boolean; + }) { + const api = this.selectAPI(params.model, params.reasoning || false); + + if (api === "responses") { + return this.callResponsesAPI(params); + } else { + return this.callChatCompletionsAPI(params); + } + } + + private selectAPI(model: string, requiresReasoning: boolean): "completions" | "responses" { + if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) { + return "responses"; + } + return "completions"; + } + + private async callChatCompletionsAPI(params: any) { + const requestParams = { + model: params.model, + messages: params.messages, + max_completion_tokens: params.maxTokens, + temperature: params.temperature, + tools: params.tools, + stream: params.stream + }; + + if (params.stream) { + return this.client.chat.completions.create(requestParams); + } else { + return this.client.chat.completions.create(requestParams); + } + } + + private async callResponsesAPI(params: any) { + // Convert messages to Responses API format + const input = params.messages.map((msg: any) => { + if (msg.role === "user") { + return { + role: "user", + content: [{ type: "input_text", text: msg.content }] + }; + } else if (msg.role === "system") { + return { + role: "developer", + content: msg.content + }; + } + return msg; + }); + + const requestParams = { + model: params.model, + input, + max_output_tokens: params.maxTokens, + tools: params.tools, + stream: params.stream, + reasoning: params.reasoning ? { effort: "low" } : undefined + }; + + return this.client.responses.create(requestParams); + } +} +``` + +## Tool/Function Calling + +### Tool Definition Format + +```typescript +// OpenAI tool definition format (JSON Schema) +interface ToolDefinition { + type: "function"; + function: { + name: string; + description: string; + parameters: { + type: "object"; + properties: Record; + required: string[]; + }; + }; +} + +// Example tool definitions +const tools: ToolDefinition[] = [ + { + type: "function", + function: { + name: "read_file", + description: "Read the contents of a file", + parameters: { + type: "object", + properties: { + path: { + type: "string", + description: "The file path to read" + } + }, + required: ["path"] + } + } + }, + { + type: "function", + function: { + name: "execute_command", + description: "Execute a shell command", + parameters: { + type: "object", + properties: { + command: { + type: "string", + description: "The command to execute" + }, + timeout: { + type: "number", + description: "Timeout in seconds", + default: 30 + } + }, + required: ["command"] + } + } + } +]; +``` + +### Tool Execution Engine + +```typescript +type ToolFunction = (args: any) => Promise; + +class ToolExecutor { + private tools = new Map(); + + register(name: string, fn: ToolFunction) { + this.tools.set(name, fn); + } + + async execute(name: string, argsJson: string): Promise { + const tool = this.tools.get(name); + if (!tool) { + throw new Error(`Unknown tool: ${name}`); + } + + try { + const args = JSON.parse(argsJson); + return await tool(args); + } catch (error) { + throw new Error(`Tool execution failed: ${error.message}`); + } + } + + getAvailableTools(): string[] { + return Array.from(this.tools.keys()); + } +} + +// Register tool implementations +const toolExecutor = new ToolExecutor(); + +toolExecutor.register("read_file", async (args: { path: string }) => { + const fs = await import("fs/promises"); + try { + const content = await fs.readFile(args.path, "utf-8"); + return content; + } catch (error) { + return `Error reading file: ${error.message}`; + } +}); + +toolExecutor.register("execute_command", async (args: { command: string; timeout?: number }) => { + const { exec } = await import("child_process"); + const { promisify } = await import("util"); + const execAsync = promisify(exec); + + try { + const { stdout, stderr } = await execAsync(args.command, { + timeout: (args.timeout || 30) * 1000 + }); + return stdout + (stderr ? `\nSTDERR: ${stderr}` : ""); + } catch (error) { + return `Command failed: ${error.message}`; + } +}); +``` + +### Complete Tool Calling Flow + +```typescript +async function completeChatWithTools(userMessage: string) { + const conversation = new ConversationManager("completions", "You are a helpful assistant with file system access."); + const tokenCounter = new TokenCounter(); + + conversation.addUserMessage(userMessage); + + while (true) { + const response = await client.chat.completions.create({ + model: "gpt-4o", + messages: conversation.getMessages(), + tools, + tool_choice: "auto", + max_completion_tokens: 1000 + }); + + // Track token usage + if (response.usage) { + tokenCounter.extractChatCompletionUsage(response.usage); + } + + const message = response.choices[0].message; + + if (message.tool_calls && message.tool_calls.length > 0) { + // Add assistant message with tool calls to conversation + conversation.getMessages().push({ + role: "assistant", + content: message.content, + tool_calls: message.tool_calls + }); + + // Execute each tool call + for (const toolCall of message.tool_calls) { + console.log(`šŸ”§ Calling ${toolCall.function.name}...`); + + try { + const result = await toolExecutor.execute( + toolCall.function.name, + toolCall.function.arguments + ); + + console.log(`āœ… Tool result: ${result.substring(0, 100)}...`); + conversation.addToolResult(toolCall.id, result); + + } catch (error) { + console.log(`āŒ Tool error: ${error.message}`); + conversation.addToolResult(toolCall.id, `Error: ${error.message}`); + } + } + + // Continue conversation with tool results + continue; + } else { + // Final response + const content = message.content || ""; + conversation.addAssistantMessage(content); + + console.log("šŸ¤– Assistant:", content); + console.log("šŸ“Š Token usage:", tokenCounter.formatUsage()); + + return content; + } + } +} + +// Usage +await completeChatWithTools("Read the package.json file and tell me about this project"); +``` + +### Streaming Tool Calls + +```typescript +async function streamingToolCalls(userMessage: string) { + const stream = await client.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: userMessage }], + tools, + tool_choice: "auto", + stream: true + }); + + let currentToolCalls: Map = new Map(); + let assistantMessage = ""; + + for await (const chunk of stream) { + const choice = chunk.choices[0]; + if (!choice) continue; + + const delta = choice.delta; + + // Regular content + if (delta.content) { + assistantMessage += delta.content; + process.stdout.write(delta.content); + } + + // Tool call deltas + if (delta.tool_calls) { + for (const toolCallDelta of delta.tool_calls) { + const id = toolCallDelta.id; + if (!id) continue; + + if (!currentToolCalls.has(id)) { + currentToolCalls.set(id, { name: "", args: "" }); + } + + const toolCall = currentToolCalls.get(id)!; + + if (toolCallDelta.function?.name) { + toolCall.name += toolCallDelta.function.name; + } + + if (toolCallDelta.function?.arguments) { + toolCall.args += toolCallDelta.function.arguments; + } + } + } + + // When finished, execute accumulated tool calls + if (choice.finish_reason === "tool_calls") { + console.log("\nšŸ”§ Executing tools..."); + + for (const [id, toolCall] of currentToolCalls) { + try { + const result = await toolExecutor.execute(toolCall.name, toolCall.args); + console.log(`āœ… ${toolCall.name}: ${result.substring(0, 100)}...`); + } catch (error) { + console.log(`āŒ ${toolCall.name}: ${error.message}`); + } + } + + break; + } + } +} +``` + +### Responses API Tool Calling + +```typescript +async function responsesAPIToolCalling() { + const response = await client.responses.create({ + model: "gpt-4o", + input: [ + { + role: "user", + content: [{ type: "input_text", text: "List files in current directory" }] + } + ], + tools: [ + { + type: "function", + function: { + name: "list_directory", + description: "List files in a directory", + parameters: { + type: "object", + properties: { + path: { type: "string", description: "Directory path" } + }, + required: ["path"] + } + } + } + ] + }); + + for (const item of response.output || []) { + switch (item.type) { + case "function_call": + console.log(`šŸ”§ Tool call: ${item.name}`); + console.log(`šŸ“ Arguments: ${item.arguments}`); + + try { + const result = await toolExecutor.execute(item.name, item.arguments); + console.log(`āœ… Result: ${result}`); + + // In a real implementation, you'd add this result back to the conversation + // and continue the response + } catch (error) { + console.log(`āŒ Error: ${error.message}`); + } + break; + + case "message": + for (const content of item.content || []) { + if (content.type === "output_text") { + console.log("šŸ¤– Response:", content.text); + } + } + break; + } + } +} +``` + +## System Prompts + +### System Prompt Handling by Model Type + +```typescript +interface SystemPromptConfig { + content: string; + role: "system" | "developer"; // Different models use different roles +} + +function formatSystemPrompt(prompt: string, model: string, api: "completions" | "responses"): any { + // Chat Completions API + if (api === "completions") { + // Most models use "system" role + if (model.includes("claude") || model.includes("gemini")) { + // Some providers via OpenAI compatibility might expect "system" + return { role: "system", content: prompt }; + } + + // OpenAI native models + return { role: "system", content: prompt }; + } + + // Responses API uses "developer" role for system messages + return { role: "developer", content: prompt }; +} + +// System prompt best practices +const systemPrompts = { + // General assistant + assistant: "You are a helpful, accurate, and reliable AI assistant. Provide clear, concise, and helpful responses.", + + // Code assistant + coder: `You are an expert software engineer with deep knowledge of multiple programming languages, frameworks, and best practices. + +Key principles: +- Write clean, maintainable, and well-documented code +- Follow language-specific conventions and best practices +- Explain your reasoning and trade-offs +- Suggest improvements and alternatives when appropriate +- Always test your code mentally before providing it + +When helping with code: +1. Understand the requirements fully +2. Choose appropriate tools and patterns +3. Provide working, tested solutions +4. Explain key concepts and decisions`, + + // Research assistant + researcher: `You are a thorough research assistant. When answering questions: + +1. Provide accurate, well-sourced information +2. Acknowledge limitations in your knowledge +3. Structure responses clearly with headings and bullet points +4. Cite sources when possible +5. Distinguish between facts, analysis, and opinions +6. Ask clarifying questions when the request is ambiguous`, + + // Tool-enabled assistant + toolEnabled: `You are an AI assistant with access to various tools for file operations, web searches, and code execution. + +Guidelines for tool use: +- Use tools when they would be helpful to answer the user's question +- Always explain what you're doing before calling a tool +- Interpret and summarize tool results for the user +- If a tool fails, try alternative approaches +- Be transparent about what information comes from tools vs your training + +Available capabilities: +- Read and write files +- Execute shell commands +- Search the web +- Analyze code and data` +}; +``` + +### Dynamic System Prompt Building + +```typescript +class SystemPromptBuilder { + private sections: string[] = []; + + addRole(role: string): this { + this.sections.push(`You are ${role}.`); + return this; + } + + addCapabilities(capabilities: string[]): this { + if (capabilities.length > 0) { + this.sections.push(`You have access to: ${capabilities.join(", ")}.`); + } + return this; + } + + addGuidelines(guidelines: string[]): this { + if (guidelines.length > 0) { + this.sections.push("Guidelines:\n" + guidelines.map(g => `- ${g}`).join("\n")); + } + return this; + } + + addContext(context: string): this { + if (context.trim()) { + this.sections.push(`Context: ${context}`); + } + return this; + } + + build(): string { + return this.sections.join("\n\n"); + } + + reset(): this { + this.sections = []; + return this; + } +} + +// Usage examples +const codeAssistantPrompt = new SystemPromptBuilder() + .addRole("an expert TypeScript developer") + .addCapabilities(["file system access", "code execution", "documentation lookup"]) + .addGuidelines([ + "Write clean, type-safe code", + "Explain complex concepts clearly", + "Suggest best practices", + "Test code before providing it" + ]) + .build(); + +const customerServicePrompt = new SystemPromptBuilder() + .addRole("a helpful customer service representative") + .addGuidelines([ + "Be polite and professional", + "Listen carefully to customer concerns", + "Provide accurate information", + "Escalate complex issues when needed" + ]) + .addContext("You work for TechCorp, a software company that makes productivity tools.") + .build(); +``` + +### Model-Specific System Prompt Optimization + +```typescript +function optimizeSystemPromptForModel(basePrompt: string, model: string): string { + // OpenAI models (especially o1/o3) work well with detailed, structured prompts + if (model.includes("gpt") || model.includes("o1") || model.includes("o3")) { + return `${basePrompt} + +Think step by step when solving complex problems. Show your reasoning process clearly.`; + } + + // Claude models prefer more conversational, principle-based prompts + if (model.includes("claude")) { + return `${basePrompt} + +I value helpful, harmless, and honest responses. Please be thoughtful and thorough in your analysis.`; + } + + // Gemini models work well with structured instructions + if (model.includes("gemini")) { + return `${basePrompt} + +Please structure your responses clearly and provide specific, actionable advice.`; + } + + // Default: return as-is + return basePrompt; +} + +// Provider-specific prompt injection handling +function detectAndMitigatePromptInjection(userInput: string): { safe: boolean; cleaned?: string } { + const injectionPatterns = [ + /ignore.*previous.*instruction/i, + /forget.*system.*prompt/i, + /act.*as.*different/i, + /pretend.*you.*are/i, + /new.*role.*now/i + ]; + + for (const pattern of injectionPatterns) { + if (pattern.test(userInput)) { + return { + safe: false, + cleaned: userInput.replace(pattern, "[FILTERED]") + }; + } + } + + return { safe: true }; +} +``` + +## Provider-Specific Features + +### Reasoning Support Detection + +```typescript +// From pi-agent codebase - detect and handle reasoning support per provider +type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other"; + +function detectProvider(baseURL?: string): Provider { + if (!baseURL) return "openai"; + if (baseURL.includes("api.openai.com")) return "openai"; + if (baseURL.includes("generativelanguage.googleapis.com")) return "gemini"; + if (baseURL.includes("api.groq.com")) return "groq"; + if (baseURL.includes("api.anthropic.com")) return "anthropic"; + if (baseURL.includes("openrouter.ai")) return "openrouter"; + return "other"; +} + +// Provider-specific reasoning parameter handling +function adjustRequestForReasoning( + requestOptions: any, + api: "completions" | "responses", + provider: Provider, + supportsReasoning: boolean +): any { + if (!supportsReasoning) return requestOptions; + + switch (provider) { + case "openai": + // OpenAI standard format + if (api === "responses") { + requestOptions.reasoning = { + effort: "low", + summary: "detailed" + }; + } else { + requestOptions.reasoning_effort = "low"; + } + break; + + case "gemini": + // Gemini uses extra_body for thinking configuration + if (api === "completions") { + requestOptions.extra_body = { + google: { + thinking_config: { + thinking_budget: 1024, + include_thoughts: true + } + } + }; + // Remove reasoning_effort when using thinking_config + delete requestOptions.reasoning_effort; + } + break; + + case "groq": + // Groq uses reasoning_format for Chat Completions + if (api === "completions") { + requestOptions.reasoning_format = "parsed"; + requestOptions.reasoning_effort = "low"; + } else { + // Groq Responses API doesn't support reasoning.summary + requestOptions.reasoning = { effort: "low" }; + } + break; + + case "openrouter": + // OpenRouter unified reasoning format + if (api === "completions") { + requestOptions.reasoning = { effort: "low" }; + delete requestOptions.reasoning_effort; + } + break; + + default: + // Standard OpenAI format for others + if (api === "responses") { + requestOptions.reasoning = { effort: "low" }; + } else { + requestOptions.reasoning_effort = "low"; + } + } + + return requestOptions; +} +``` + +### Provider-Specific Response Parsing + +```typescript +// Extract reasoning content from provider-specific response formats +function parseReasoningFromMessage(message: any, provider: Provider): { + cleanContent: string; + reasoningTexts: string[]; +} { + const reasoningTexts: string[] = []; + let cleanContent = message.content || ""; + + switch (provider) { + case "gemini": + // Gemini returns thinking in tags + if (cleanContent.includes("")) { + const thoughtMatches = cleanContent.matchAll(/([\s\S]*?)<\/thought>/g); + for (const match of thoughtMatches) { + reasoningTexts.push(match[1].trim()); + } + // Remove thought tags from response + cleanContent = cleanContent.replace(/[\s\S]*?<\/thought>/g, "").trim(); + } + break; + + case "groq": + // Groq returns reasoning in separate field + if (message.reasoning) { + reasoningTexts.push(message.reasoning); + } + break; + + case "openrouter": + // OpenRouter uses message.reasoning field + if (message.reasoning) { + reasoningTexts.push(message.reasoning); + } + break; + + default: + // OpenAI and others handle reasoning via events + break; + } + + return { cleanContent, reasoningTexts }; +} +``` + +### Provider-Specific Error Handling + +```typescript +function handleProviderSpecificErrors(error: any, provider: Provider): Error { + switch (provider) { + case "groq": + if (error.message?.includes("reasoning_format")) { + return new Error("Reasoning not supported by this Groq model"); + } + break; + + case "gemini": + if (error.message?.includes("thinking_config")) { + return new Error("Thinking mode not supported by this Gemini model"); + } + break; + + case "anthropic": + if (error.message?.includes("reasoning")) { + return new Error("Reasoning not available via Anthropic's OpenAI compatibility layer"); + } + break; + + case "openrouter": + // OpenRouter passes through underlying provider errors + if (error.message?.includes("not supported")) { + return new Error("Feature not supported by the selected model on OpenRouter"); + } + break; + } + + return error; +} +``` + +## Complete Implementation Examples + +### Basic Chat Client + +```typescript +import OpenAI from "openai"; +import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; + +class BasicChatClient { + private client: OpenAI; + private messages: ChatCompletionMessageParam[] = []; + + constructor(apiKey: string, baseURL?: string, systemPrompt?: string) { + this.client = new OpenAI({ apiKey, baseURL }); + + if (systemPrompt) { + this.messages.push({ role: "system", content: systemPrompt }); + } + } + + async chat(userMessage: string): Promise { + this.messages.push({ role: "user", content: userMessage }); + + try { + const response = await this.client.chat.completions.create({ + model: "gpt-4o", + messages: this.messages, + max_completion_tokens: 1000, + temperature: 0.7 + }); + + const assistantMessage = response.choices[0]?.message?.content || ""; + this.messages.push({ role: "assistant", content: assistantMessage }); + + return assistantMessage; + } catch (error) { + console.error("Chat error:", error); + throw error; + } + } + + getHistory(): ChatCompletionMessageParam[] { + return [...this.messages]; + } + + clearHistory(): void { + this.messages = this.messages.filter(m => m.role === "system"); + } +} +``` + +### Advanced Streaming Client with All Features + +```typescript +import OpenAI from "openai"; +import type { + ChatCompletionCreateParamsStreaming, + ChatCompletionChunk +} from "openai/resources/chat/completions"; + +interface StreamingClientConfig { + apiKey: string; + baseURL?: string; + model: string; + systemPrompt?: string; + tools?: any[]; + maxTokens?: number; + temperature?: number; +} + +interface StreamEvent { + type: "content" | "tool_call" | "reasoning" | "usage" | "error" | "complete"; + data: any; +} + +class AdvancedStreamingClient { + private client: OpenAI; + private config: StreamingClientConfig; + private messages: any[] = []; + private abortController: AbortController | null = null; + private tokenCounter = new TokenCounter(); + + constructor(config: StreamingClientConfig) { + this.config = config; + this.client = new OpenAI({ + apiKey: config.apiKey, + baseURL: config.baseURL + }); + + if (config.systemPrompt) { + this.messages.push({ role: "system", content: config.systemPrompt }); + } + } + + async *streamChat(userMessage: string): AsyncGenerator { + this.messages.push({ role: "user", content: userMessage }); + this.abortController = new AbortController(); + + try { + const params: ChatCompletionCreateParamsStreaming = { + model: this.config.model, + messages: this.messages, + stream: true, + max_completion_tokens: this.config.maxTokens || 1000, + temperature: this.config.temperature || 0.7, + tools: this.config.tools, + tool_choice: this.config.tools ? "auto" : undefined, + stream_options: { include_usage: true } + }; + + const stream = await this.client.chat.completions.create(params, { + signal: this.abortController.signal + }); + + let assistantContent = ""; + let currentToolCalls = new Map(); + + for await (const chunk of stream) { + if (this.abortController.signal.aborted) break; + + const choice = chunk.choices[0]; + if (!choice) continue; + + // Handle content + if (choice.delta?.content) { + assistantContent += choice.delta.content; + yield { + type: "content", + data: { delta: choice.delta.content, content: assistantContent } + }; + } + + // Handle tool calls + if (choice.delta?.tool_calls) { + for (const toolCall of choice.delta.tool_calls) { + if (!toolCall.id) continue; + + if (!currentToolCalls.has(toolCall.id)) { + currentToolCalls.set(toolCall.id, { + id: toolCall.id, + name: "", + arguments: "" + }); + } + + const call = currentToolCalls.get(toolCall.id); + if (toolCall.function?.name) { + call.name += toolCall.function.name; + } + if (toolCall.function?.arguments) { + call.arguments += toolCall.function.arguments; + } + + yield { + type: "tool_call", + data: { id: toolCall.id, delta: toolCall, current: call } + }; + } + } + + // Handle usage + if (chunk.usage) { + const usage = this.tokenCounter.extractChatCompletionUsage(chunk.usage); + yield { + type: "usage", + data: usage + }; + } + + // Handle completion + if (choice.finish_reason) { + if (choice.finish_reason === "tool_calls") { + // Execute tool calls + const toolResults = await this.executeToolCalls(Array.from(currentToolCalls.values())); + + // Add messages and continue + this.messages.push({ + role: "assistant", + content: assistantContent || null, + tool_calls: Array.from(currentToolCalls.values()).map(call => ({ + id: call.id, + type: "function", + function: { + name: call.name, + arguments: call.arguments + } + })) + }); + + for (const result of toolResults) { + this.messages.push({ + role: "tool", + tool_call_id: result.id, + content: result.content + }); + } + + // Continue stream for final response + yield* this.streamChat(""); + return; + } else { + // Regular completion + if (assistantContent) { + this.messages.push({ role: "assistant", content: assistantContent }); + } + + yield { + type: "complete", + data: { reason: choice.finish_reason, content: assistantContent } + }; + } + } + } + } catch (error) { + yield { + type: "error", + data: { error: error.message } + }; + } finally { + this.abortController = null; + } + } + + private async executeToolCalls(toolCalls: any[]): Promise> { + const results = []; + + for (const call of toolCalls) { + try { + // Tool execution would be implemented here + const result = await this.executeTool(call.name, call.arguments); + results.push({ id: call.id, content: result }); + } catch (error) { + results.push({ id: call.id, content: `Error: ${error.message}` }); + } + } + + return results; + } + + private async executeTool(name: string, argsJson: string): Promise { + // Implement tool execution logic + return `Tool ${name} executed with args: ${argsJson}`; + } + + interrupt(): void { + this.abortController?.abort(); + } + + getUsage() { + return this.tokenCounter.getTotalUsage(); + } +} + +// Usage example +const client = new AdvancedStreamingClient({ + apiKey: process.env.OPENAI_API_KEY!, + model: "gpt-4o", + systemPrompt: "You are a helpful assistant.", + tools: [/* tool definitions */] +}); + +for await (const event of client.streamChat("Help me write a TypeScript function")) { + switch (event.type) { + case "content": + process.stdout.write(event.data.delta); + break; + case "tool_call": + console.log(`\nšŸ”§ Tool: ${event.data.current.name}`); + break; + case "usage": + console.log(`\nšŸ“Š Tokens: ${event.data.totalTokens}`); + break; + case "complete": + console.log(`\nāœ… Complete (${event.data.reason})`); + break; + case "error": + console.log(`\nāŒ Error: ${event.data.error}`); + break; + } +} +``` + +This comprehensive guide covers all the essential features needed to implement a robust OpenAI SDK integration. Each section provides working code examples, actual types from the SDK, and real-world patterns from the pi-mono codebase. + +## Key Takeaways + +1. **Always use AbortController** for request cancellation +2. **Handle both Chat Completions and Responses APIs** depending on model capabilities +3. **Implement comprehensive error handling** with proper error types +4. **Track token usage** for cost management and optimization +5. **Support streaming** for better user experience +6. **Handle provider-specific features** like reasoning and caching +7. **Implement proper tool calling workflows** for agentic applications +8. **Serialize conversation state** for session persistence +9. **Use appropriate system prompts** for different model types +10. **Test reasoning support** dynamically for each provider/model combination \ No newline at end of file diff --git a/packages/ai/package.json b/packages/ai/package.json new file mode 100644 index 00000000..ce3f2273 --- /dev/null +++ b/packages/ai/package.json @@ -0,0 +1,32 @@ +{ + "name": "@mariozechner/ai", + "version": "0.5.8", + "description": "Unified API for OpenAI, Anthropic, and Google Gemini LLM providers", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": ["dist", "README.md"], + "scripts": { + "clean": "rm -rf dist", + "build": "tsc -p tsconfig.build.json", + "check": "biome check --write .", + "prepublishOnly": "npm run clean && npm run build" + }, + "dependencies": { + "openai": "5.12.2", + "@anthropic-ai/sdk": "0.60.0", + "@google/genai": "1.14.0" + }, + "devDependencies": {}, + "keywords": ["ai", "llm", "openai", "anthropic", "gemini", "unified", "api"], + "author": "Mario Zechner", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/badlogic/pi-mono.git", + "directory": "packages/ai" + }, + "engines": { + "node": ">=20.0.0" + } +} \ No newline at end of file diff --git a/packages/ai/plan.md b/packages/ai/plan.md new file mode 100644 index 00000000..d09177ac --- /dev/null +++ b/packages/ai/plan.md @@ -0,0 +1,950 @@ +# Unified AI API Design Plan + +Based on comprehensive investigation of OpenAI, Anthropic, and Gemini SDKs with actual implementation examples. + +## Key API Differences Summary + +### OpenAI +- **Dual APIs**: Chat Completions (broad support) vs Responses API (o1/o3 thinking content) +- **Thinking**: Only Responses API gives actual content, Chat Completions only gives counts +- **Roles**: `system`, `user`, `assistant`, `tool` (o1/o3 use `developer` instead of `system`) +- **Streaming**: Deltas in chunks with `stream_options.include_usage` for token usage + +### Anthropic +- **Single API**: Messages API with comprehensive streaming +- **Content Blocks**: Always arrays, even for simple text +- **System**: Separate parameter, not in messages array +- **Tool Use**: Content blocks, not separate message role +- **Thinking**: Explicit budget allocation, appears as content blocks +- **Caching**: Per-block cache control with TTL options + +### Gemini +- **Parts System**: All content split into typed parts +- **System**: Separate `systemInstruction` parameter +- **Roles**: Uses `model` instead of `assistant` +- **Thinking**: `part.thought: true` flag identifies reasoning +- **Streaming**: Returns complete responses, not deltas +- **Function Calls**: Embedded in parts array + +## Unified API Design + +### Core Client + +```typescript +interface AIConfig { + provider: 'openai' | 'anthropic' | 'gemini'; + apiKey: string; + model: string; + baseURL?: string; // For OpenAI-compatible endpoints +} + +interface ModelInfo { + id: string; + name: string; + provider: string; + capabilities: { + reasoning: boolean; + toolCall: boolean; + vision: boolean; + audio?: boolean; + }; + cost: { + input: number; // per million tokens + output: number; // per million tokens + cacheRead?: number; + cacheWrite?: number; + }; + limits: { + context: number; + output: number; + }; + knowledge?: string; // Knowledge cutoff date +} + +class AI { + constructor(config: AIConfig); + + // Main streaming interface - everything else builds on this + async *stream(request: Request): AsyncGenerator; + + // Convenience method for non-streaming + async complete(request: Request): Promise; + + // Get model information + getModelInfo(): ModelInfo; + + // Abort current request + abort(): void; +} +``` + +### Message Format + +```typescript +type Message = + | { + role: 'user'; + content: string | Content[]; + } + | { + role: 'assistant'; + content: string | Content[]; + model: string; + usage: TokenUsage; + toolCalls?: { + id: string; + name: string; + arguments: Record; + }[]; + } + | { + role: 'tool'; + content: string | Content[]; + toolCallId: string; + }; + +interface Content { + type: 'text' | 'image'; + text?: string; + image?: { + data: string; // base64 + mimeType: string; + }; +} +``` + +### Request Format + +```typescript +interface Request { + messages: Message[]; + + // System prompt (separated for Anthropic/Gemini compatibility) + systemPrompt?: string; + + // Common parameters + temperature?: number; + maxTokens?: number; + stopSequences?: string[]; + + // Tools + tools?: { + name: string; + description: string; + parameters: Record; // JSON Schema + }[]; + toolChoice?: 'auto' | 'none' | 'required' | { name: string }; + + // Thinking/reasoning + reasoning?: { + enabled: boolean; + effort?: 'low' | 'medium' | 'high'; // OpenAI reasoning_effort + maxTokens?: number; // Anthropic thinking budget + }; + + // Abort signal + signal?: AbortSignal; +} +``` + +### Event Stream + +```typescript +type Event = + | { type: 'start'; model: string; provider: string } + | { type: 'text'; content: string; delta: string } + | { type: 'thinking'; content: string; delta: string } + | { type: 'toolCall'; toolCall: ToolCall } + | { type: 'usage'; usage: TokenUsage } + | { type: 'done'; reason: StopReason; message: Message } // message includes model and usage + | { type: 'error'; error: Error }; + +interface TokenUsage { + input: number; + output: number; + total: number; + thinking?: number; + cacheRead?: number; + cacheWrite?: number; + cost?: { + input: number; + output: number; + cache?: number; + total: number; + }; +} + +type StopReason = 'stop' | 'length' | 'toolUse' | 'safety' | 'error'; +``` + +## Caching Strategy + +Caching is handled automatically by each provider adapter: + +- **OpenAI**: Automatic prompt caching (no configuration needed) +- **Gemini**: Automatic context caching (no configuration needed) +- **Anthropic**: We automatically add cache_control to the system prompt and older messages + +```typescript +class AnthropicAdapter { + private addCaching(messages: Message[]): any[] { + const anthropicMessages = []; + + // Automatically cache older messages (assuming incremental context) + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + const isOld = i < messages.length - 2; // Cache all but last 2 messages + + // Convert to Anthropic format with automatic caching + const blocks = this.toContentBlocks(msg); + if (isOld && blocks.length > 0) { + blocks[0].cache_control = { type: 'ephemeral' }; + } + + anthropicMessages.push({ + role: msg.role === 'assistant' ? 'assistant' : 'user', + content: blocks + }); + } + + return anthropicMessages; + } +} +``` + +## Provider Adapter Implementation + +### OpenAI Adapter + +```typescript +class OpenAIAdapter { + private client: OpenAI; + private useResponsesAPI: boolean = false; + + async *stream(request: Request): AsyncGenerator { + // Determine which API to use + if (request.reasoning?.enabled && this.isReasoningModel()) { + yield* this.streamResponsesAPI(request); + } else { + yield* this.streamChatCompletions(request); + } + } + + private async *streamChatCompletions(request: Request) { + const stream = await this.client.chat.completions.create({ + model: this.model, + messages: this.toOpenAIMessages(request), + tools: this.toOpenAITools(request.tools), + reasoning_effort: request.reasoning?.effort, + stream: true, + stream_options: { include_usage: true } + }); + + let content = ''; + let toolCalls: any[] = []; + + for await (const chunk of stream) { + if (chunk.choices[0]?.delta?.content) { + const delta = chunk.choices[0].delta.content; + content += delta; + yield { type: 'text', content, delta }; + } + + if (chunk.choices[0]?.delta?.tool_calls) { + // Accumulate tool calls + this.mergeToolCalls(toolCalls, chunk.choices[0].delta.tool_calls); + for (const tc of toolCalls) { + yield { type: 'toolCall', toolCall: tc, partial: true }; + } + } + + if (chunk.usage) { + yield { + type: 'usage', + usage: { + input: chunk.usage.prompt_tokens, + output: chunk.usage.completion_tokens, + total: chunk.usage.total_tokens, + thinking: chunk.usage.completion_tokens_details?.reasoning_tokens + } + }; + } + } + } + + private async *streamResponsesAPI(request: Request) { + // Use Responses API for actual thinking content + const response = await this.client.responses.create({ + model: this.model, + input: this.toResponsesInput(request), + tools: this.toResponsesTools(request.tools), + stream: true + }); + + for await (const event of response) { + if (event.type === 'response.reasoning_text.delta') { + yield { + type: 'thinking', + content: event.text, + delta: event.delta + }; + } + // Handle other event types... + } + } + + private toOpenAIMessages(request: Request): any[] { + const messages: any[] = []; + + // Handle system prompt + if (request.systemPrompt) { + const role = this.isReasoningModel() ? 'developer' : 'system'; + messages.push({ role, content: request.systemPrompt }); + } + + // Convert unified messages + for (const msg of request.messages) { + if (msg.role === 'tool') { + messages.push({ + role: 'tool', + content: msg.content, + tool_call_id: msg.toolCallId + }); + } else { + messages.push({ + role: msg.role, + content: this.contentToString(msg.content), + tool_calls: msg.toolCalls + }); + } + } + + return messages; + } +} +``` + +### Anthropic Adapter + +```typescript +class AnthropicAdapter { + private client: Anthropic; + + async *stream(request: Request): AsyncGenerator { + const stream = this.client.messages.stream({ + model: this.model, + max_tokens: request.maxTokens || 1024, + messages: this.addCaching(request.messages), + system: request.systemPrompt, + tools: this.toAnthropicTools(request.tools), + thinking: request.reasoning?.enabled ? { + type: 'enabled', + budget_tokens: request.reasoning.maxTokens || 2000 + } : undefined + }); + + let content = ''; + let thinking = ''; + + stream.on('text', (delta, snapshot) => { + content = snapshot; + // Note: Can't yield from callback, need different approach + }); + + stream.on('thinking', (delta, snapshot) => { + thinking = snapshot; + }); + + // Use raw streaming instead for proper async generator + const rawStream = await this.client.messages.create({ + ...params, + stream: true + }); + + for await (const chunk of rawStream) { + switch (chunk.type) { + case 'content_block_delta': + if (chunk.delta.type === 'text_delta') { + content += chunk.delta.text; + yield { + type: 'text', + content, + delta: chunk.delta.text + }; + } + break; + + case 'message_delta': + if (chunk.usage) { + yield { + type: 'usage', + usage: { + input: chunk.usage.input_tokens, + output: chunk.usage.output_tokens, + total: chunk.usage.input_tokens + chunk.usage.output_tokens, + cacheRead: chunk.usage.cache_read_input_tokens, + cacheWrite: chunk.usage.cache_creation_input_tokens + } + }; + } + break; + } + } + } + + private toAnthropicMessages(request: Request): any[] { + return request.messages.map(msg => { + if (msg.role === 'tool') { + // Tool results go as user messages with tool_result blocks + return { + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: msg.toolCallId, + content: msg.content + }] + }; + } + + // Always use content blocks + const blocks: any[] = []; + + if (typeof msg.content === 'string') { + blocks.push({ + type: 'text', + text: msg.content, + cache_control: msg.cacheControl + }); + } else { + // Convert unified content to blocks + for (const part of msg.content) { + if (part.type === 'text') { + blocks.push({ type: 'text', text: part.text }); + } else if (part.type === 'image') { + blocks.push({ + type: 'image', + source: { + type: 'base64', + media_type: part.image.mimeType, + data: part.image.data + } + }); + } + } + } + + // Add tool calls as blocks + if (msg.toolCalls) { + for (const tc of msg.toolCalls) { + blocks.push({ + type: 'tool_use', + id: tc.id, + name: tc.name, + input: tc.arguments + }); + } + } + + return { + role: msg.role === 'assistant' ? 'assistant' : 'user', + content: blocks + }; + }); + } +} +``` + +### Gemini Adapter + +```typescript +class GeminiAdapter { + private client: GoogleGenAI; + + async *stream(request: Request): AsyncGenerator { + const stream = await this.client.models.generateContentStream({ + model: this.model, + systemInstruction: request.systemPrompt ? { + parts: [{ text: request.systemPrompt }] + } : undefined, + contents: this.toGeminiContents(request), + tools: this.toGeminiTools(request.tools), + abortSignal: request.signal + }); + + let content = ''; + let thinking = ''; + + for await (const chunk of stream) { + const candidate = chunk.candidates?.[0]; + if (!candidate?.content?.parts) continue; + + for (const part of candidate.content.parts) { + if (part.text && !part.thought) { + content += part.text; + yield { + type: 'text', + content, + delta: part.text + }; + } else if (part.text && part.thought) { + thinking += part.text; + yield { + type: 'thinking', + content: thinking, + delta: part.text + }; + } else if (part.functionCall) { + yield { + type: 'toolCall', + toolCall: { + id: part.functionCall.id || crypto.randomUUID(), + name: part.functionCall.name, + arguments: part.functionCall.args + } + }; + } + } + + if (chunk.usageMetadata) { + yield { + type: 'usage', + usage: { + input: chunk.usageMetadata.promptTokenCount || 0, + output: chunk.usageMetadata.candidatesTokenCount || 0, + total: chunk.usageMetadata.totalTokenCount || 0, + thinking: chunk.usageMetadata.thoughtsTokenCount, + cacheRead: chunk.usageMetadata.cachedContentTokenCount + } + }; + } + } + } + + private toGeminiContents(request: Request): any[] { + return request.messages.map(msg => { + const parts: any[] = []; + + if (typeof msg.content === 'string') { + parts.push({ text: msg.content }); + } else { + for (const part of msg.content) { + if (part.type === 'text') { + parts.push({ text: part.text }); + } else if (part.type === 'image') { + parts.push({ + inlineData: { + mimeType: part.image.mimeType, + data: part.image.data + } + }); + } + } + } + + // Add function calls as parts + if (msg.toolCalls) { + for (const tc of msg.toolCalls) { + parts.push({ + functionCall: { + name: tc.name, + args: tc.arguments + } + }); + } + } + + // Add tool results as function responses + if (msg.role === 'tool') { + parts.push({ + functionResponse: { + name: msg.toolCallId, + response: { result: msg.content } + } + }); + } + + return { + role: msg.role === 'assistant' ? 'model' : msg.role === 'tool' ? 'user' : msg.role, + parts + }; + }); + } +} +``` + +## Usage Examples + +### Basic Streaming + +```typescript +const ai = new AI({ + provider: 'openai', + apiKey: process.env.OPENAI_API_KEY, + model: 'gpt-4' +}); + +const stream = ai.stream({ + messages: [ + { role: 'user', content: 'Write a haiku about coding' } + ], + systemPrompt: 'You are a poetic programmer' +}); + +for await (const event of stream) { + switch (event.type) { + case 'text': + process.stdout.write(event.delta); + break; + case 'usage': + console.log(`\nTokens: ${event.usage.total}`); + break; + case 'done': + console.log(`\nFinished: ${event.reason}`); + break; + } +} +``` + +### Cross-Provider Tool Calling + +```typescript +async function callWithTools(provider: 'openai' | 'anthropic' | 'gemini') { + const ai = new AI({ + provider, + apiKey: process.env[`${provider.toUpperCase()}_API_KEY`], + model: getDefaultModel(provider) + }); + + const messages: Message[] = [{ + role: 'user', + content: 'What is the weather in Paris and calculate 15 * 23?' + }]; + + const stream = ai.stream({ + messages, + tools: [ + { + name: 'weather', + description: 'Get weather for a location', + parameters: { + type: 'object', + properties: { + location: { type: 'string' } + }, + required: ['location'] + } + }, + { + name: 'calculator', + description: 'Calculate math expressions', + parameters: { + type: 'object', + properties: { + expression: { type: 'string' } + }, + required: ['expression'] + } + } + ] + }); + + const toolCalls: any[] = []; + + for await (const event of stream) { + if (event.type === 'toolCall') { + toolCalls.push(event.toolCall); + + // Execute tool + const result = await executeToolCall(event.toolCall); + + // Add tool result to conversation + messages.push({ + role: 'assistant', + toolCalls: [event.toolCall] + }); + + messages.push({ + role: 'tool', + content: JSON.stringify(result), + toolCallId: event.toolCall.id + }); + } + } + + // Continue conversation with tool results + if (toolCalls.length > 0) { + const finalStream = ai.stream({ messages }); + + for await (const event of finalStream) { + if (event.type === 'text') { + process.stdout.write(event.delta); + } + } + } +} +``` + +### Thinking/Reasoning + +```typescript +async function withThinking() { + // OpenAI o1 + const openai = new AI({ + provider: 'openai', + model: 'o1-preview' + }); + + // Anthropic Claude + const anthropic = new AI({ + provider: 'anthropic', + model: 'claude-3-opus-20240229' + }); + + // Gemini thinking model + const gemini = new AI({ + provider: 'gemini', + model: 'gemini-2.0-flash-thinking-exp-1219' + }); + + for (const ai of [openai, anthropic, gemini]) { + const stream = ai.stream({ + messages: [{ + role: 'user', + content: 'Solve this step by step: If a tree falls in a forest...' + }], + reasoning: { + enabled: true, + effort: 'high', // OpenAI reasoning_effort + maxTokens: 2000 // Anthropic budget + } + }); + + for await (const event of stream) { + if (event.type === 'thinking') { + console.log('[THINKING]', event.delta); + } else if (event.type === 'text') { + console.log('[RESPONSE]', event.delta); + } else if (event.type === 'done') { + // Final message includes model and usage with cost + console.log('Model:', event.message.model); + console.log('Tokens:', event.message.usage?.total); + console.log('Cost: $', event.message.usage?.cost?.total); + } + } + } +} +``` + +## Implementation Notes + +### Critical Decisions + +1. **Streaming First**: All providers support streaming, non-streaming is just collected events +2. **Unified Events**: Same event types across all providers for consistent handling +3. **Separate System Prompt**: Required for Anthropic/Gemini compatibility +4. **Tool Role**: Unified way to handle tool responses across providers +5. **Content Arrays**: Support both string and structured content +6. **Thinking Extraction**: Normalize reasoning across different provider formats + +### Provider-Specific Handling + +**OpenAI**: +- Choose between Chat Completions and Responses API based on model and thinking needs +- Map `developer` role for o1/o3 models +- Handle streaming tool call deltas + +**Anthropic**: +- Convert to content blocks (always arrays) +- Tool results as user messages with tool_result blocks +- Handle MessageStream events or raw streaming + +**Gemini**: +- Convert to parts system +- Extract thinking from `part.thought` flag +- Map `assistant` to `model` role +- Handle function calls/responses in parts + +### Error Handling + +```typescript +class AIError extends Error { + constructor( + message: string, + public code: string, + public provider: string, + public retryable: boolean, + public statusCode?: number + ) { + super(message); + } +} + +// In adapters +try { + // API call +} catch (error) { + if (error instanceof RateLimitError) { + throw new AIError( + 'Rate limit exceeded', + 'rate_limit', + this.provider, + true, + 429 + ); + } + // Map other errors... +} +``` + +## Model Information & Cost Tracking + +### Models Database + +We cache the models.dev API data at build time for fast, offline access: + +```typescript +// scripts/update-models.ts - Run during build or manually +async function updateModels() { + const response = await fetch('https://models.dev/api.json'); + const data = await response.json(); + + // Transform to our format + const models: ModelsDatabase = transformModelsData(data); + + // Generate TypeScript file + const content = `// Auto-generated from models.dev API +// Last updated: ${new Date().toISOString()} +// Run 'npm run update-models' to refresh + +export const MODELS_DATABASE: ModelsDatabase = ${JSON.stringify(models, null, 2)}; +`; + + await fs.writeFile('src/models-data.ts', content); +} + +// src/models.ts - Runtime model lookup +import { MODELS_DATABASE } from './models-data.js'; + +// Simple lookup with fallback +export function getModelInfo(provider: string, model: string): ModelInfo { + const info = MODELS_DATABASE.providers[provider]?.models[model]; + + if (!info) { + // Fallback for unknown models + return { + id: model, + name: model, + provider, + capabilities: { + reasoning: false, + toolCall: true, + vision: false + }, + cost: { input: 0, output: 0 }, + limits: { context: 128000, output: 4096 } + }; + } + + return info; +} + +// Optional: Runtime override for testing new models +const runtimeOverrides = new Map(); + +export function registerModel(provider: string, model: string, info: ModelInfo) { + runtimeOverrides.set(`${provider}:${model}`, info); +} +``` + +### Cost Calculation + +```typescript +class CostTracker { + private usage: TokenUsage = { + input: 0, + output: 0, + total: 0, + cacheRead: 0, + cacheWrite: 0 + }; + + private modelInfo: ModelInfo; + + constructor(modelInfo: ModelInfo) { + this.modelInfo = modelInfo; + } + + addUsage(tokens: Partial): TokenUsage { + this.usage.input += tokens.input || 0; + this.usage.output += tokens.output || 0; + this.usage.thinking += tokens.thinking || 0; + this.usage.cacheRead += tokens.cacheRead || 0; + this.usage.cacheWrite += tokens.cacheWrite || 0; + this.usage.total = this.usage.input + this.usage.output + (this.usage.thinking || 0); + + // Calculate costs (per million tokens) + const cost = this.modelInfo.cost; + this.usage.cost = { + input: (this.usage.input / 1_000_000) * cost.input, + output: (this.usage.output / 1_000_000) * cost.output, + cache: + ((this.usage.cacheRead || 0) / 1_000_000) * (cost.cacheRead || 0) + + ((this.usage.cacheWrite || 0) / 1_000_000) * (cost.cacheWrite || 0), + total: 0 + }; + + this.usage.cost.total = + this.usage.cost.input + + this.usage.cost.output + + this.usage.cost.cache; + + return { ...this.usage }; + } + + getTotalCost(): number { + return this.usage.cost?.total || 0; + } + + getUsageSummary(): string { + return `Tokens: ${this.usage.total} (${this.usage.input}→${this.usage.output}) | Cost: $${this.getTotalCost().toFixed(4)}`; + } +} +``` + +### Integration in Adapters + +```typescript +class OpenAIAdapter { + private costTracker: CostTracker; + + constructor(config: AIConfig) { + const modelInfo = getModelInfo('openai', config.model); + this.costTracker = new CostTracker(modelInfo); + } + + async *stream(request: Request): AsyncGenerator { + // ... streaming logic ... + + if (chunk.usage) { + const usage = this.costTracker.addUsage({ + input: chunk.usage.prompt_tokens, + output: chunk.usage.completion_tokens, + thinking: chunk.usage.completion_tokens_details?.reasoning_tokens, + cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens + }); + + yield { type: 'usage', usage }; + } + } +} +``` + +## Next Steps + +1. Create models.ts with models.dev integration +2. Implement base `AI` class with adapter pattern +3. Create three provider adapters with full streaming support +4. Add comprehensive error mapping +5. Implement token counting and cost tracking +6. Add test suite for each provider +7. Create migration guide from native SDKs \ No newline at end of file diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts new file mode 100644 index 00000000..46691ff0 --- /dev/null +++ b/packages/ai/src/index.ts @@ -0,0 +1,5 @@ +// @mariozechner/ai - Unified API for OpenAI, Anthropic, and Google Gemini +// This package provides a common interface for working with multiple LLM providers + +// TODO: Export types and implementations once defined +export const version = "0.5.8"; diff --git a/packages/ai/tsconfig.build.json b/packages/ai/tsconfig.build.json new file mode 100644 index 00000000..5ce43029 --- /dev/null +++ b/packages/ai/tsconfig.build.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} \ No newline at end of file diff --git a/todos/done/20250817-183528-ai-unified-api-package-analysis.md b/todos/done/20250817-183528-ai-unified-api-package-analysis.md new file mode 100644 index 00000000..750c96cb --- /dev/null +++ b/todos/done/20250817-183528-ai-unified-api-package-analysis.md @@ -0,0 +1,606 @@ +# Analysis: Creating Unified AI Package + +## Package Structure Analysis for Pi Monorepo + +Based on my examination of the existing packages (`tui`, `agent`, and `pods`), here are the comprehensive patterns and conventions used in this monorepo: + +### 1. Package Naming Conventions + +**Scoped NPM packages with consistent naming:** +- All packages use the `@mariozechner/` scope +- Package names follow the pattern: `@mariozechner/pi-` +- Special case: the main CLI package is simply `@mariozechner/pi` (not `pi-pods`) + +**Directory structure:** +- Packages are located in `/packages//` +- Directory names match the suffix of the npm package name (e.g., `tui`, `agent`, `pods`) + +### 2. Package.json Structure Patterns + +**Common fields across all packages:** +```json +{ + "name": "@mariozechner/pi-", + "version": "0.5.8", // Lockstep versioning - all packages share same version + "description": "...", + "type": "module", // All packages use ES modules + "author": "Mario Zechner", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/badlogic/pi-mono.git", + "directory": "packages/" + }, + "engines": { + "node": ">=20.0.0" // Consistent Node.js requirement + } +} +``` + +**Binary packages (agent, pods):** +- Include `"bin"` field with CLI command mapping +- Examples: `"pi-agent": "dist/cli.js"` and `"pi": "dist/cli.js"` + +**Library packages (tui):** +- Include `"main"` field pointing to built entry point +- Include `"types"` field for TypeScript definitions + +### 3. Scripts Configuration + +**Universal scripts across all packages:** +- `"clean": "rm -rf dist"` - Removes build artifacts +- `"build": "tsc -p tsconfig.build.json"` - Builds with dedicated build config +- `"check": "biome check --write ."` - Linting and formatting +- `"prepublishOnly": "npm run clean && npm run build"` - Pre-publish cleanup + +**CLI-specific build scripts:** +- Add `&& chmod +x dist/cli.js` for executable permissions +- Copy additional assets (e.g., `&& cp src/models.json dist/` for pods package) + +### 4. Dependencies Structure + +**Dependency hierarchy follows a clear pattern:** +``` +pi-tui (foundation) -> pi-agent (uses tui) -> pi (uses agent) +``` + +**Internal dependencies:** +- Use exact version matching for internal packages (e.g., `"^0.5.8"`) +- Agent depends on TUI: `"@mariozechner/pi-tui": "^0.5.8"` +- Pods depends on Agent: `"@mariozechner/pi-agent": "^0.5.8"` + +**External dependencies:** +- Common dependencies like `chalk` are used across multiple packages +- Specialized dependencies are package-specific (e.g., `marked` for tui, `openai` for agent) + +### 5. TypeScript Configuration + +**Dual TypeScript configuration approach:** + +**`tsconfig.build.json` (for production builds):** +```json +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} +``` + +**Root `tsconfig.json` (for development and type checking):** +- Contains path mappings for cross-package imports during development +- Includes all source and test files +- Uses `"noEmit": true` for type checking without building + +### 6. Source Directory Structure + +**Standard structure across all packages:** +``` +src/ +ā”œā”€ā”€ index.ts # Main export file +ā”œā”€ā”€ cli.ts # CLI entry point (if applicable) +ā”œā”€ā”€ .ts # Core functionality +ā”œā”€ā”€ components/ # Components (for tui) +ā”œā”€ā”€ tools/ # Tool implementations (for agent) +ā”œā”€ā”€ commands/ # Command implementations (for pods) +└── renderers/ # Output renderers (for agent) +``` + +### 7. Export Patterns (index.ts) + +**Comprehensive type and function exports:** +- Export both types and implementation classes +- Use `export type` for type-only exports +- Group exports logically with comments +- Example from tui: exports components, interfaces, and utilities +- Example from agent: exports core classes, types, and utilities + +### 8. Files Configuration + +**Files included in NPM packages:** +- `"files": ["dist"]` or `"files": ["dist/**/*", "README.md"]` +- All packages include built `dist/` directory +- Some include additional files like README.md or scripts + +### 9. README.md Structure + +**Comprehensive documentation pattern:** +- Feature overview with key capabilities +- Quick start section with code examples +- Detailed API documentation +- Installation instructions +- Development setup +- Testing information (especially for tui) +- Examples and usage patterns + +### 10. Testing Structure (TUI package) + +**Dedicated test directory:** +- `test/` directory with `.test.ts` files for unit tests +- Example applications (e.g., `chat-app.ts`, `file-browser.ts`) +- Custom testing infrastructure (e.g., `virtual-terminal.ts`) +- Test script: `"test": "node --test --import tsx test/*.test.ts"` + +### 11. Version Management + +**Lockstep versioning:** +- All packages share the same version number +- Root package.json scripts handle version bumping across all packages +- Version sync script ensures internal dependency versions match + +### 12. Build Order + +**Dependency-aware build order:** +- Root build script builds packages in dependency order +- `"build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi"` + +### 13. Common Configuration Files + +**Shared across monorepo:** +- `biome.json` - Unified linting and formatting configuration +- `tsconfig.base.json` - Base TypeScript configuration +- `.gitignore` - Ignores `dist/`, `node_modules/`, and other build artifacts +- Husky pre-commit hooks for formatting and type checking + +### 14. Keywords and Metadata + +**Descriptive keywords for NPM discovery:** +- Each package includes relevant keywords (e.g., "tui", "terminal", "agent", "ai", "llm") +- Keywords help with package discoverability + +This analysis shows a well-structured monorepo with consistent patterns that would make adding new packages straightforward by following these established conventions. + +## Monorepo Configuration Analysis + +Based on my analysis of the pi-mono monorepo configuration, here's a comprehensive guide on how to properly integrate a new package: + +### 1. Root Package.json Configuration + +**Workspace Configuration:** +- Uses npm workspaces with `"workspaces": ["packages/*"]` +- All packages are located under `/packages/` directory +- Private monorepo (`"private": true`) with ESM modules (`"type": "module"`) + +**Build System:** +- **Sequential Build Order**: The build script explicitly defines dependency order: + ```json + "build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi" + ``` +- **Dependency Chain**: `pi-tui` → `pi-agent` → `pi` (pods) +- **Important**: New packages must be inserted in the correct dependency order in the build script + +**Scripts Available:** +- `clean`: Cleans all package dist folders +- `build`: Sequential build respecting dependencies +- `check`: Runs Biome formatting, package checks, and TypeScript checking +- `test`: Runs tests across all packages +- Version management scripts (lockstep versioning) +- Publishing scripts with dry-run capability + +### 2. Root TypeScript Configuration + +**Dual Configuration System:** +- **`tsconfig.base.json`**: Base TypeScript settings for all packages +- **`tsconfig.json`**: Development configuration with path mappings for cross-package imports +- **Package `tsconfig.build.json`**: Clean build configs per package + +**Path Mappings** (in `/Users/badlogic/workspaces/pi-mono/tsconfig.json`): +```json +"paths": { + "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"], + "@mariozechner/pi-agent": ["./packages/agent/src/index.ts"], + "@mariozechner/pi": ["./packages/pods/src/index.ts"] +} +``` + +### 3. Package Dependencies and Structure + +**Dependency Structure:** +- `pi-tui` (base library) - no internal dependencies +- `pi-agent` depends on `pi-tui` +- `pi` (pods) depends on `pi-agent` + +**Standard Package Structure:** +``` +packages/new-package/ +ā”œā”€ā”€ src/ +│ ā”œā”€ā”€ index.ts # Main export file +│ └── ... # Implementation files +ā”œā”€ā”€ package.json # Package configuration +ā”œā”€ā”€ tsconfig.build.json # Build-specific TypeScript config +ā”œā”€ā”€ README.md # Package documentation +└── dist/ # Build output (gitignored) +``` + +### 4. Version Management + +**Lockstep Versioning:** +- All packages share the same version number (currently 0.5.8) +- Automated version sync script: `/Users/badlogic/workspaces/pi-mono/scripts/sync-versions.js` +- Inter-package dependencies are automatically updated to match current versions + +**Version Scripts:** +- `npm run version:patch/minor/major` - Updates all package versions and syncs dependencies +- Automatic dependency version synchronization + +### 5. GitIgnore Patterns + +**Package-Level Ignores:** +``` +packages/*/node_modules/ +packages/*/dist/ +``` +Plus standard ignores for logs, IDE files, environment files, etc. + +## How to Integrate a New Package + +### Step 1: Create Package Structure +```bash +mkdir packages/your-new-package +cd packages/your-new-package +``` + +### Step 2: Create package.json +```json +{ + "name": "@mariozechner/your-new-package", + "version": "0.5.8", + "description": "Your package description", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": ["dist"], + "scripts": { + "clean": "rm -rf dist", + "build": "tsc -p tsconfig.build.json", + "check": "biome check --write .", + "prepublishOnly": "npm run clean && npm run build" + }, + "dependencies": { + // Add dependencies on other packages in the monorepo if needed + // "@mariozechner/pi-tui": "^0.5.8" + }, + "devDependencies": {}, + "keywords": ["relevant", "keywords"], + "author": "Mario Zechner", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/badlogic/pi-mono.git", + "directory": "packages/your-new-package" + }, + "engines": { + "node": ">=20.0.0" + } +} +``` + +### Step 3: Create tsconfig.build.json +```json +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} +``` + +### Step 4: Create src/index.ts +```typescript +// Main exports for your package +export * from './your-main-module.js'; +``` + +### Step 5: Update Root Configuration + +**Add to `/Users/badlogic/workspaces/pi-mono/tsconfig.json` paths:** +```json +"paths": { + "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"], + "@mariozechner/pi-agent": ["./packages/agent/src/index.ts"], + "@mariozechner/pi": ["./packages/pods/src/index.ts"], + "@mariozechner/your-new-package": ["./packages/your-new-package/src/index.ts"] +} +``` + +**Update build script in root `/Users/badlogic/workspaces/pi-mono/package.json`:** +```json +"build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/your-new-package && npm run build -w @mariozechner/pi" +``` +(Insert in correct dependency order) + +### Step 6: Update sync-versions.js +If your package depends on other monorepo packages, add synchronization logic to `/Users/badlogic/workspaces/pi-mono/scripts/sync-versions.js`. + +### Step 7: Install and Test +```bash +# From monorepo root +npm install +npm run build +npm run check +``` + +## Key Requirements for New Packages + +1. **Must use ESM modules** (`"type": "module"`) +2. **Must follow lockstep versioning** (same version as other packages) +3. **Must be placed in correct build order** based on dependencies +4. **Must use tab indentation** (Biome config: `"indentStyle": "tab"`) +5. **Must avoid `any` types** unless absolutely necessary (project instruction) +6. **Must include proper TypeScript declarations** (`"declaration": true`) +7. **Must use Node.js >= 20.0.0** (engine requirement) +8. **Must follow the standard package structure** with src/, dist/, proper exports + +## Development Workflow + +1. **Development**: Use `tsx` to run source files directly (no build needed) +2. **Type Checking**: `npm run check` works across all packages +3. **Building**: Sequential builds respect dependency order +4. **Publishing**: Automatic version sync and cross-package dependency updates +5. **Testing**: Each package can have its own test suite + +This monorepo is well-structured for maintaining multiple related packages with clean dependency management and automated version synchronization. + +## Detailed Findings: Unified AI API Requirements Based on Current pi-agent Usage + +After thoroughly analyzing the existing agent package (`/Users/badlogic/workspaces/pi-mono/packages/agent`), here are the comprehensive requirements for a unified AI API based on current usage patterns: + +### **1. Core API Structure & Event System** + +**Current Pattern:** +- Event-driven architecture using `AgentEvent` types +- Single `AgentEventReceiver` interface for all output handling +- Support for both single-shot and interactive modes + +**Required API Features:** +```typescript +type AgentEvent = + | { type: "session_start"; sessionId: string; model: string; api: string; baseURL: string; systemPrompt: string } + | { type: "assistant_start" } + | { type: "reasoning"; text: string } + | { type: "tool_call"; toolCallId: string; name: string; args: string } + | { type: "tool_result"; toolCallId: string; result: string; isError: boolean } + | { type: "assistant_message"; text: string } + | { type: "error"; message: string } + | { type: "user_message"; text: string } + | { type: "interrupted" } + | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number; cacheReadTokens: number; cacheWriteTokens: number; reasoningTokens: number } +``` + +### **2. OpenAI API Integration Patterns** + +**Current Implementation:** +- Uses OpenAI SDK v5.12.2 (`import OpenAI from "openai"`) +- Supports both Chat Completions (`/v1/chat/completions`) and Responses API (`/v1/responses`) +- Provider detection based on base URL patterns + +**Provider Support Required:** +```typescript +// Detected providers based on baseURL patterns +type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other" + +// Provider-specific configurations +interface ProviderConfig { + openai: { reasoning_effort: "minimal" | "low" | "medium" | "high" } + gemini: { extra_body: { google: { thinking_config: { thinking_budget: number, include_thoughts: boolean } } } } + groq: { reasoning_format: "parsed", reasoning_effort: string } + openrouter: { reasoning: { effort: "low" | "medium" | "high" } } +} +``` + +### **3. Streaming vs Non-Streaming** + +**Current Status:** +- **No streaming currently implemented** - uses standard request/response +- All API calls are non-streaming: `await client.chat.completions.create()` and `await client.responses.create()` +- Events are emitted synchronously after full response + +**Streaming Requirements for Unified API:** +- Support for streaming responses with partial content updates +- Event-driven streaming with `assistant_message_delta` events +- Proper handling of tool call streaming +- Reasoning token streaming for supported models + +### **4. Tool Calling Architecture** + +**Current Implementation:** +```typescript +// Tool definitions for both APIs +toolsForResponses: Array<{type: "function", name: string, description: string, parameters: object}> +toolsForChat: ChatCompletionTool[] + +// Tool execution with abort support +async function executeTool(name: string, args: string, signal?: AbortSignal): Promise + +// Built-in tools: read, list, bash, glob, rg (ripgrep) +``` + +**Unified API Requirements:** +- Automatic tool format conversion between Chat Completions and Responses API +- Built-in tools with filesystem and shell access +- Custom tool registration capability +- Tool execution with proper abort/interrupt handling +- Tool result streaming for long-running operations + +### **5. Message Structure Handling** + +**Current Pattern:** +- Dual message format support based on API type +- Automatic conversion between formats in `setEvents()` method + +**Chat Completions Format:** +```typescript +{ role: "system" | "user" | "assistant" | "tool", content: string, tool_calls?: any[] } +``` + +**Responses API Format:** +```typescript +{ type: "message" | "function_call" | "function_call_output", content: any[] } +``` + +### **6. Session Persistence System** + +**Current Implementation:** +```typescript +interface SessionData { + config: AgentConfig + events: SessionEvent[] + totalUsage: TokenUsage +} + +// File-based persistence in ~/.pi/sessions/ +// JSONL format with session headers and event entries +// Automatic session continuation support +``` + +**Requirements:** +- Directory-based session organization +- Event replay capability for session restoration +- Cumulative token usage tracking +- Session metadata (config, timestamps, working directory) + +### **7. Token Counting & Usage Tracking** + +**Current Implementation:** +```typescript +interface TokenUsage { + inputTokens: number + outputTokens: number + totalTokens: number + cacheReadTokens: number + cacheWriteTokens: number + reasoningTokens: number // For o1/o3 and reasoning models +} +``` + +**Provider-Specific Token Mapping:** +- OpenAI: `prompt_tokens`, `completion_tokens`, `cached_tokens`, `reasoning_tokens` +- Responses API: `input_tokens`, `output_tokens`, `cached_tokens`, `reasoning_tokens` +- Cumulative tracking across conversations + +### **8. Abort/Interrupt Handling** + +**Current Pattern:** +```typescript +class Agent { + private abortController: AbortController | null = null + + async ask(message: string) { + this.abortController = new AbortController() + // Pass signal to all API calls and tool executions + } + + interrupt(): void { + this.abortController?.abort() + } +} +``` + +**Requirements:** +- AbortController integration for all async operations +- Graceful interruption of API calls, tool execution, and streaming +- Proper cleanup and "interrupted" event emission +- Signal propagation to nested operations + +### **9. Reasoning/Thinking Support** + +**Current Implementation:** +```typescript +// Provider-specific reasoning extraction +function parseReasoningFromMessage(message: any, baseURL?: string): { + cleanContent: string + reasoningTexts: string[] +} + +// Automatic reasoning support detection +async function checkReasoningSupport(client, model, api, baseURL, signal): Promise +``` + +**Provider Support:** +- **OpenAI o1/o3**: Full thinking content via Responses API +- **Groq GPT-OSS**: Reasoning via `reasoning_format: "parsed"` +- **Gemini 2.5**: Thinking content via `` tags +- **OpenRouter**: Model-dependent reasoning support + +### **10. Error Handling Patterns** + +**Current Approach:** +- Try/catch blocks around all API calls +- Error events emitted through event system +- Specific error handling for reasoning model failures +- Provider-specific error interpretation + +### **11. Configuration Management** + +**Current Structure:** +```typescript +interface AgentConfig { + apiKey: string + baseURL: string + model: string + api: "completions" | "responses" + systemPrompt: string +} +``` + +**Provider Detection:** +```typescript +function detectProvider(baseURL?: string): Provider { + // URL pattern matching for automatic provider configuration +} +``` + +### **12. Output Rendering System** + +**Current Renderers:** +- **ConsoleRenderer**: Terminal output with animations, token display +- **TuiRenderer**: Full interactive TUI with pi-tui integration +- **JsonRenderer**: JSONL event stream output + +**Requirements:** +- Event-based rendering architecture +- Real-time token usage display +- Loading animations for async operations +- Markdown rendering support +- Tool execution progress indication + +### **Summary: Key Unified API Requirements** + +1. **Event-driven architecture** with standardized event types +2. **Dual API support** (Chat Completions + Responses API) with automatic format conversion +3. **Provider abstraction** with automatic detection and configuration +4. **Comprehensive tool system** with abort support and built-in tools +5. **Session persistence** with event replay and token tracking +6. **Reasoning/thinking support** across multiple providers +7. **Interrupt handling** with AbortController integration +8. **Token usage tracking** with provider-specific mapping +9. **Flexible rendering** through event receiver pattern +10. **Configuration management** with provider-specific settings + +The unified API should maintain this event-driven, provider-agnostic approach while adding streaming capabilities and enhanced tool execution features that the current implementation lacks. \ No newline at end of file diff --git a/todos/done/20250817-183528-ai-unified-api-package.md b/todos/done/20250817-183528-ai-unified-api-package.md new file mode 100644 index 00000000..eca2bbc1 --- /dev/null +++ b/todos/done/20250817-183528-ai-unified-api-package.md @@ -0,0 +1,46 @@ +# Create AI Package with Unified API + +**Status:** Done +**Agent PID:** 10965 + +## Original Todo +ai: create a new package ai (package name @mariozechner/ai) which implements a common api for the openai, anthropic, and google gemini apis + - look at the other packages and how they are set up, mirror that setup for ai + - install the latest version of each dependency via npm in the ai package + - openai@5.12.2 + - @anthropic-ai/sdk@0.60.0 + - @google/genai@1.14.0 + - investigate the APIs in their respective node_modules folder so you understand how to use them. specifically, we need to understand how to: + - stream responses, including reasoning/thinking tokens and tool calls + - abort requests + - handle errors + - handle stop reasons + - maintain the context (message history) such that it can be serialized in a uniform format to disk, and deserialized again later and used with the other api + - count tokens (input, output, cached read, cached write) + - enable caching + - Create a plan.md in the ai package that details how the unified API on top of all three could look like. we want the most minimal api possible, which allows serialization/deserialization, turning on/off reasoning/thinking, and handle system prompt and tool specifications + +## Description +Create the initial package scaffold for @mariozechner/ai following the established monorepo patterns, install the required dependencies (openai, anthropic, google genai SDKs), and create a plan.md file that details the unified API design for all three providers. + +*Read [analysis.md](./analysis.md) in full for detailed codebase research and context* + +## Implementation Plan +- [x] Create package directory structure at packages/ai/ +- [x] Create package.json with proper configuration following monorepo patterns +- [x] Create tsconfig.build.json for build configuration +- [x] Create initial src/index.ts file +- [x] Add package to root tsconfig.json path mappings +- [x] Update root package.json build script to include ai package +- [x] Install dependencies: openai@5.12.2, @anthropic-ai/sdk@0.60.0, @google/genai@1.14.0 +- [x] Create README.md with package description +- [x] Create plan.md detailing the unified API design +- [x] Investigate OpenAI, Anthropic, and Gemini APIs in detail +- [x] Document implementation details for each API +- [x] Update todos/project-description.md with "How to Create a New Package" section +- [x] Update todos/project-description.md Testing section to reflect that tui has Node.js built-in tests +- [x] Run npm install from root to link everything +- [x] Verify package builds correctly with npm run build + +## Notes +[Implementation notes] \ No newline at end of file diff --git a/todos/project-description.md b/todos/project-description.md index c75c7237..b888a76b 100644 --- a/todos/project-description.md +++ b/todos/project-description.md @@ -39,4 +39,98 @@ A comprehensive toolkit for managing Large Language Model (LLM) deployments and - Publish: `npm run publish` ## Testing -Currently no formal testing framework is configured. Test infrastructure exists but no actual test files or framework dependencies are present. \ No newline at end of file +The TUI package includes comprehensive tests using Node.js built-in test framework: +- Unit tests in `packages/tui/test/*.test.ts` +- Test runner: `node --test --import tsx test/*.test.ts` +- Virtual terminal for TUI testing via `@xterm/headless` +- Example applications for manual testing + +## How to Create a New Package + +Follow these steps to add a new package to the monorepo: + +1. **Create package directory structure:** + ```bash + mkdir -p packages/your-package/src + ``` + +2. **Create package.json:** + ```json + { + "name": "@mariozechner/your-package", + "version": "0.5.8", + "description": "Package description", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": ["dist", "README.md"], + "scripts": { + "clean": "rm -rf dist", + "build": "tsc -p tsconfig.build.json", + "check": "biome check --write .", + "prepublishOnly": "npm run clean && npm run build" + }, + "dependencies": {}, + "devDependencies": {}, + "keywords": ["relevant", "keywords"], + "author": "Mario Zechner", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/badlogic/pi-mono.git", + "directory": "packages/your-package" + }, + "engines": { + "node": ">=20.0.0" + } + } + ``` + +3. **Create tsconfig.build.json:** + ```json + { + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] + } + ``` + +4. **Create src/index.ts:** + ```typescript + // Main exports for your package + export const version = "0.5.8"; + ``` + +5. **Update root tsconfig.json paths:** + Add your package to the `paths` mapping in the correct dependency order: + ```json + "paths": { + "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"], + "@mariozechner/your-package": ["./packages/your-package/src/index.ts"], + // ... other packages + } + ``` + +6. **Update root package.json build script:** + Insert your package in the correct dependency order: + ```json + "build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/your-package && ..." + ``` + +7. **Install and verify:** + ```bash + npm install + npm run build + npm run check + ``` + +**Important Notes:** +- All packages use lockstep versioning (same version number) +- Follow dependency order: foundational packages build first +- Use ESM modules (`"type": "module"`) +- No `any` types unless absolutely necessary +- Include README.md with package documentation \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index 311ac672..464072cb 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,6 +4,7 @@ "noEmit": true, "paths": { "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"], + "@mariozechner/pi-ai": ["./packages/ai/src/index.ts"], "@mariozechner/pi-agent": ["./packages/agent/src/index.ts"], "@mariozechner/pi": ["./packages/pods/src/index.ts"] }