From f064ea0e14d969ff2108c37af7f9cbaefad44bf3 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Sun, 17 Aug 2025 20:18:45 +0200
Subject: [PATCH] feat(ai): Create unified AI package with OpenAI, Anthropic,
 and Gemini support

- Set up @mariozechner/ai package structure following monorepo patterns
- Install OpenAI, Anthropic, and Google Gemini SDK dependencies
- Document comprehensive API investigation for all three providers
- Design minimal unified API with streaming-first architecture
- Add models.dev integration for pricing and capabilities
- Implement automatic caching strategy for all providers
- Update project documentation with package creation guide
---
 package-lock.json                             |  390 ++-
 package.json                                  |    2 +-
 packages/ai/README.md                         |   62 +
 packages/ai/anthropic-api.md                  | 1706 ++++++++++++
 packages/ai/gemini-api.md                     | 1233 +++++++++
 packages/ai/openai-api.md                     | 2320 +++++++++++++++++
 packages/ai/package.json                      |   32 +
 packages/ai/plan.md                           |  950 +++++++
 packages/ai/src/index.ts                      |    5 +
 packages/ai/tsconfig.build.json               |    9 +
 ...-183528-ai-unified-api-package-analysis.md |  606 +++++
 .../20250817-183528-ai-unified-api-package.md |   46 +
 todos/project-description.md                  |   96 +-
 tsconfig.json                                 |    1 +
 14 files changed, 7437 insertions(+), 21 deletions(-)
 create mode 100644 packages/ai/README.md
 create mode 100644 packages/ai/anthropic-api.md
 create mode 100644 packages/ai/gemini-api.md
 create mode 100644 packages/ai/openai-api.md
 create mode 100644 packages/ai/package.json
 create mode 100644 packages/ai/plan.md
 create mode 100644 packages/ai/src/index.ts
 create mode 100644 packages/ai/tsconfig.build.json
 create mode 100644 todos/done/20250817-183528-ai-unified-api-package-analysis.md
 create mode 100644 todos/done/20250817-183528-ai-unified-api-package.md

diff --git a/package-lock.json b/package-lock.json
index fe555185..c434192c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -19,6 +19,15 @@
 				"node": ">=20.0.0"
 			}
 		},
+		"node_modules/@anthropic-ai/sdk": {
+			"version": "0.60.0",
+			"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.60.0.tgz",
+			"integrity": "sha512-9zu/TXaUy8BZhXedDtt1wT3H4LOlpKDO1/ftiFpeR3N1PCr3KJFKkxxlQWWt1NNp08xSwUNJ3JNY8yhl8av6eQ==",
+			"license": "MIT",
+			"bin": {
+				"anthropic-ai-sdk": "bin/cli"
+			}
+		},
 		"node_modules/@biomejs/biome": {
 			"version": "2.1.4",
 			"resolved": "https://registry.npmjs.org/@biomejs/biome/-/biome-2.1.4.tgz",
@@ -624,6 +633,31 @@
 				"node": ">=18"
 			}
 		},
+		"node_modules/@google/genai": {
+			"version": "1.14.0",
+			"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.14.0.tgz",
+			"integrity": "sha512-jirYprAAJU1svjwSDVCzyVq+FrJpJd5CSxR/g2Ga/gZ0ZYZpcWjMS75KJl9y71K1mDN+tcx6s21CzCbB2R840g==",
+			"license": "Apache-2.0",
+			"dependencies": {
+				"google-auth-library": "^9.14.2",
+				"ws": "^8.18.0"
+			},
+			"engines": {
+				"node": ">=20.0.0"
+			},
+			"peerDependencies": {
+				"@modelcontextprotocol/sdk": "^1.11.0"
+			},
+			"peerDependenciesMeta": {
+				"@modelcontextprotocol/sdk": {
+					"optional": true
+				}
+			}
+		},
+		"node_modules/@mariozechner/ai": {
+			"resolved": "packages/ai",
+			"link": true
+		},
 		"node_modules/@mariozechner/pi": {
 			"resolved": "packages/pods",
 			"link": true
@@ -659,6 +693,15 @@
 			"dev": true,
 			"license": "MIT"
 		},
+		"node_modules/agent-base": {
+			"version": "7.1.4",
+			"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
+			"integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
+			"license": "MIT",
+			"engines": {
+				"node": ">= 14"
+			}
+		},
 		"node_modules/ansi-regex": {
 			"version": "6.1.0",
 			"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
@@ -683,6 +726,41 @@
 				"url": "https://github.com/chalk/ansi-styles?sponsor=1"
 			}
 		},
+		"node_modules/base64-js": {
+			"version": "1.5.1",
+			"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+			"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+			"funding": [
+				{
+					"type": "github",
+					"url": "https://github.com/sponsors/feross"
+				},
+				{
+					"type": "patreon",
+					"url": "https://www.patreon.com/feross"
+				},
+				{
+					"type": "consulting",
+					"url": "https://feross.org/support"
+				}
+			],
+			"license": "MIT"
+		},
+		"node_modules/bignumber.js": {
+			"version": "9.3.1",
+			"resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz",
+			"integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==",
+			"license": "MIT",
+			"engines": {
+				"node": "*"
+			}
+		},
+		"node_modules/buffer-equal-constant-time": {
+			"version": "1.0.1",
+			"resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz",
+			"integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==",
+			"license": "BSD-3-Clause"
+		},
 		"node_modules/chalk": {
 			"version": "5.5.0",
 			"resolved": "https://registry.npmjs.org/chalk/-/chalk-5.5.0.tgz",
@@ -695,6 +773,32 @@
 				"url": "https://github.com/chalk/chalk?sponsor=1"
 			}
 		},
+		"node_modules/debug": {
+			"version": "4.4.1",
+			"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz",
+			"integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==",
+			"license": "MIT",
+			"dependencies": {
+				"ms": "^2.1.3"
+			},
+			"engines": {
+				"node": ">=6.0"
+			},
+			"peerDependenciesMeta": {
+				"supports-color": {
+					"optional": true
+				}
+			}
+		},
+		"node_modules/ecdsa-sig-formatter": {
+			"version": "1.0.11",
+			"resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz",
+			"integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==",
+			"license": "Apache-2.0",
+			"dependencies": {
+				"safe-buffer": "^5.0.1"
+			}
+		},
 		"node_modules/esbuild": {
 			"version": "0.25.8",
 			"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.8.tgz",
@@ -737,6 +841,12 @@
 				"@esbuild/win32-x64": "0.25.8"
 			}
 		},
+		"node_modules/extend": {
+			"version": "3.0.2",
+			"resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
+			"integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==",
+			"license": "MIT"
+		},
 		"node_modules/fsevents": {
 			"version": "2.3.3",
 			"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@@ -752,6 +862,36 @@
 				"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
 			}
 		},
+		"node_modules/gaxios": {
+			"version": "6.7.1",
+			"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz",
+			"integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==",
+			"license": "Apache-2.0",
+			"dependencies": {
+				"extend": "^3.0.2",
+				"https-proxy-agent": "^7.0.1",
+				"is-stream": "^2.0.0",
+				"node-fetch": "^2.6.9",
+				"uuid": "^9.0.1"
+			},
+			"engines": {
+				"node": ">=14"
+			}
+		},
+		"node_modules/gcp-metadata": {
+			"version": "6.1.1",
+			"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
+			"integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
+			"license": "Apache-2.0",
+			"dependencies": {
+				"gaxios": "^6.1.1",
+				"google-logging-utils": "^0.0.2",
+				"json-bigint": "^1.0.0"
+			},
+			"engines": {
+				"node": ">=14"
+			}
+		},
 		"node_modules/get-tsconfig": {
 			"version": "4.10.1",
 			"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.1.tgz",
@@ -765,6 +905,58 @@
 				"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
 			}
 		},
+		"node_modules/google-auth-library": {
+			"version": "9.15.1",
+			"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
+			"integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
+			"license": "Apache-2.0",
+			"dependencies": {
+				"base64-js": "^1.3.0",
+				"ecdsa-sig-formatter": "^1.0.11",
+				"gaxios": "^6.1.1",
+				"gcp-metadata": "^6.1.0",
+				"gtoken": "^7.0.0",
+				"jws": "^4.0.0"
+			},
+			"engines": {
+				"node": ">=14"
+			}
+		},
+		"node_modules/google-logging-utils": {
+			"version": "0.0.2",
+			"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
+			"integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
+			"license": "Apache-2.0",
+			"engines": {
+				"node": ">=14"
+			}
+		},
+		"node_modules/gtoken": {
+			"version": "7.1.0",
+			"resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
+			"integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
+			"license": "MIT",
+			"dependencies": {
+				"gaxios": "^6.0.0",
+				"jws": "^4.0.0"
+			},
+			"engines": {
+				"node": ">=14.0.0"
+			}
+		},
+		"node_modules/https-proxy-agent": {
+			"version": "7.0.6",
+			"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
+			"integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
+			"license": "MIT",
+			"dependencies": {
+				"agent-base": "^7.1.2",
+				"debug": "4"
+			},
+			"engines": {
+				"node": ">= 14"
+			}
+		},
 		"node_modules/husky": {
 			"version": "9.1.7",
 			"resolved": "https://registry.npmjs.org/husky/-/husky-9.1.7.tgz",
@@ -781,6 +973,95 @@
 				"url": "https://github.com/sponsors/typicode"
 			}
 		},
+		"node_modules/is-stream": {
+			"version": "2.0.1",
+			"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
+			"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=8"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
+		"node_modules/json-bigint": {
+			"version": "1.0.0",
+			"resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz",
+			"integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==",
+			"license": "MIT",
+			"dependencies": {
+				"bignumber.js": "^9.0.0"
+			}
+		},
+		"node_modules/jwa": {
+			"version": "2.0.1",
+			"resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz",
+			"integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==",
+			"license": "MIT",
+			"dependencies": {
+				"buffer-equal-constant-time": "^1.0.1",
+				"ecdsa-sig-formatter": "1.0.11",
+				"safe-buffer": "^5.0.1"
+			}
+		},
+		"node_modules/jws": {
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz",
+			"integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==",
+			"license": "MIT",
+			"dependencies": {
+				"jwa": "^2.0.0",
+				"safe-buffer": "^5.0.1"
+			}
+		},
+		"node_modules/ms": {
+			"version": "2.1.3",
+			"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+			"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+			"license": "MIT"
+		},
+		"node_modules/node-fetch": {
+			"version": "2.7.0",
+			"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+			"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+			"license": "MIT",
+			"dependencies": {
+				"whatwg-url": "^5.0.0"
+			},
+			"engines": {
+				"node": "4.x || >=6.0.0"
+			},
+			"peerDependencies": {
+				"encoding": "^0.1.0"
+			},
+			"peerDependenciesMeta": {
+				"encoding": {
+					"optional": true
+				}
+			}
+		},
+		"node_modules/openai": {
+			"version": "5.12.2",
+			"resolved": "https://registry.npmjs.org/openai/-/openai-5.12.2.tgz",
+			"integrity": "sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==",
+			"license": "Apache-2.0",
+			"bin": {
+				"openai": "bin/cli"
+			},
+			"peerDependencies": {
+				"ws": "^8.18.0",
+				"zod": "^3.23.8"
+			},
+			"peerDependenciesMeta": {
+				"ws": {
+					"optional": true
+				},
+				"zod": {
+					"optional": true
+				}
+			}
+		},
 		"node_modules/resolve-pkg-maps": {
 			"version": "1.0.0",
 			"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
@@ -791,6 +1072,26 @@
 				"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
 			}
 		},
+		"node_modules/safe-buffer": {
+			"version": "5.2.1",
+			"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
+			"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+			"funding": [
+				{
+					"type": "github",
+					"url": "https://github.com/sponsors/feross"
+				},
+				{
+					"type": "patreon",
+					"url": "https://www.patreon.com/feross"
+				},
+				{
+					"type": "consulting",
+					"url": "https://feross.org/support"
+				}
+			],
+			"license": "MIT"
+		},
 		"node_modules/signal-exit": {
 			"version": "4.1.0",
 			"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
@@ -818,6 +1119,12 @@
 				"url": "https://github.com/chalk/strip-ansi?sponsor=1"
 			}
 		},
+		"node_modules/tr46": {
+			"version": "0.0.3",
+			"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+			"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
+			"license": "MIT"
+		},
 		"node_modules/tsx": {
 			"version": "4.20.3",
 			"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.3.tgz",
@@ -858,6 +1165,56 @@
 			"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
 			"license": "MIT"
 		},
+		"node_modules/uuid": {
+			"version": "9.0.1",
+			"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
+			"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
+			"funding": [
+				"https://github.com/sponsors/broofa",
+				"https://github.com/sponsors/ctavan"
+			],
+			"license": "MIT",
+			"bin": {
+				"uuid": "dist/bin/uuid"
+			}
+		},
+		"node_modules/webidl-conversions": {
+			"version": "3.0.1",
+			"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+			"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
+			"license": "BSD-2-Clause"
+		},
+		"node_modules/whatwg-url": {
+			"version": "5.0.0",
+			"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+			"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+			"license": "MIT",
+			"dependencies": {
+				"tr46": "~0.0.3",
+				"webidl-conversions": "^3.0.0"
+			}
+		},
+		"node_modules/ws": {
+			"version": "8.18.3",
+			"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
+			"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=10.0.0"
+			},
+			"peerDependencies": {
+				"bufferutil": "^4.0.1",
+				"utf-8-validate": ">=5.0.2"
+			},
+			"peerDependenciesMeta": {
+				"bufferutil": {
+					"optional": true
+				},
+				"utf-8-validate": {
+					"optional": true
+				}
+			}
+		},
 		"packages/agent": {
 			"name": "@mariozechner/pi-agent",
 			"version": "0.5.8",
@@ -1041,25 +1398,6 @@
 				"node": ">=16 || 14 >=14.17"
 			}
 		},
-		"packages/agent/node_modules/openai": {
-			"version": "5.12.2",
-			"license": "Apache-2.0",
-			"bin": {
-				"openai": "bin/cli"
-			},
-			"peerDependencies": {
-				"ws": "^8.18.0",
-				"zod": "^3.23.8"
-			},
-			"peerDependenciesMeta": {
-				"ws": {
-					"optional": true
-				},
-				"zod": {
-					"optional": true
-				}
-			}
-		},
 		"packages/agent/node_modules/package-json-from-dist": {
 			"version": "1.0.1",
 			"license": "BlueOak-1.0.0"
@@ -1259,6 +1597,20 @@
 				"node": ">=8"
 			}
 		},
+		"packages/ai": {
+			"name": "@mariozechner/ai",
+			"version": "0.5.8",
+			"license": "MIT",
+			"dependencies": {
+				"@anthropic-ai/sdk": "0.60.0",
+				"@google/genai": "1.14.0",
+				"openai": "5.12.2"
+			},
+			"devDependencies": {},
+			"engines": {
+				"node": ">=20.0.0"
+			}
+		},
 		"packages/pods": {
 			"name": "@mariozechner/pi",
 			"version": "0.5.8",
diff --git a/package.json b/package.json
index 186dc90d..1ccf027d 100644
--- a/package.json
+++ b/package.json
@@ -7,7 +7,7 @@
 	],
 	"scripts": {
 		"clean": "npm run clean --workspaces",
-		"build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi",
+		"build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/ai && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi",
 		"check": "biome check --write . && npm run check --workspaces && tsc --noEmit",
 		"test": "npm run test --workspaces --if-present",
 		"version:patch": "npm version patch -ws --no-git-tag-version && node scripts/sync-versions.js",
diff --git a/packages/ai/README.md b/packages/ai/README.md
new file mode 100644
index 00000000..9cff0dab
--- /dev/null
+++ b/packages/ai/README.md
@@ -0,0 +1,62 @@
+# @mariozechner/ai
+
+Unified API for OpenAI, Anthropic, and Google Gemini LLM providers. This package provides a common interface for working with multiple LLM providers, handling their differences transparently while exposing a consistent, minimal API.
+
+## Features (Planned)
+
+- **Unified Interface**: Single API for OpenAI, Anthropic, and Google Gemini
+- **Streaming Support**: Real-time response streaming with delta events
+- **Tool Calling**: Consistent tool/function calling across providers
+- **Reasoning/Thinking**: Support for reasoning tokens where available
+- **Session Management**: Serializable conversation state across providers
+- **Token Tracking**: Unified token counting (input, output, cached, reasoning)
+- **Interrupt Handling**: Graceful cancellation of requests
+- **Provider Detection**: Automatic configuration based on endpoint
+- **Caching Support**: Provider-specific caching strategies
+
+## Installation
+
+```bash
+npm install @mariozechner/ai
+```
+
+## Quick Start (Coming Soon)
+
+```typescript
+import { createClient } from '@mariozechner/ai';
+
+// Automatically detects provider from configuration
+const client = createClient({
+  provider: 'openai',
+  apiKey: process.env.OPENAI_API_KEY,
+  model: 'gpt-4'
+});
+
+// Same API works for all providers
+const response = await client.complete({
+  messages: [
+    { role: 'user', content: 'Hello!' }
+  ],
+  stream: true
+});
+
+for await (const event of response) {
+  if (event.type === 'content') {
+    process.stdout.write(event.text);
+  }
+}
+```
+
+## Supported Providers
+
+- **OpenAI**: GPT-3.5, GPT-4, o1, o3 models
+- **Anthropic**: Claude models via native SDK
+- **Google Gemini**: Gemini models with thinking support
+
+## Development
+
+This package is part of the pi monorepo. See the main README for development instructions.
+
+## License
+
+MIT
\ No newline at end of file
diff --git a/packages/ai/anthropic-api.md b/packages/ai/anthropic-api.md
new file mode 100644
index 00000000..7ede2cb2
--- /dev/null
+++ b/packages/ai/anthropic-api.md
@@ -0,0 +1,1706 @@
+# Anthropic SDK Implementation Guide
+
+This document provides a comprehensive guide for implementing the required features using the Anthropic SDK. All examples use TypeScript and include actual code that works with the SDK.
+
+## Table of Contents
+
+1. [Basic Client Setup](#basic-client-setup)
+2. [Streaming Responses](#streaming-responses)
+3. [Request Abortion](#request-abortion)
+4. [Error Handling](#error-handling)
+5. [Stop Reasons](#stop-reasons)
+6. [Context and Message History](#context-and-message-history)
+7. [Token Counting](#token-counting)
+8. [Prompt Caching](#prompt-caching)
+9. [Tool Use (Function Calling)](#tool-use-function-calling)
+10. [System Prompts](#system-prompts)
+11. [Content Block System](#content-block-system)
+12. [MessageStream Helper Class](#messagestream-helper-class)
+13. [Thinking Tokens and Extended Reasoning](#thinking-tokens-and-extended-reasoning)
+14. [Complete Implementation Example](#complete-implementation-example)
+
+## Basic Client Setup
+
+```typescript
+import Anthropic from '@anthropic-ai/sdk';
+
+// Create client with configuration
+const anthropic = new Anthropic({
+  apiKey: process.env.ANTHROPIC_API_KEY, // Required
+  baseURL: 'https://api.anthropic.com', // Optional, this is the default
+  timeout: 60000, // Optional, in milliseconds
+  maxRetries: 3, // Optional, default is 2
+});
+```
+
+### Environment Variables
+
+The SDK automatically reads from these environment variables:
+- `ANTHROPIC_API_KEY` - Your API key
+- `ANTHROPIC_BASE_URL` - Custom base URL (optional)
+
+## Streaming Responses
+
+### Basic Streaming with MessageStream
+
+```typescript
+import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream';
+
+async function basicStream() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Hello, Claude!' }],
+  });
+
+  // Listen to different event types
+  stream.on('text', (text, snapshot) => {
+    process.stdout.write(text); // text is the delta, snapshot is accumulated
+  });
+
+  stream.on('message', (message) => {
+    console.log('\nFinal message:', message);
+  });
+
+  stream.on('error', (error) => {
+    console.error('Error:', error);
+  });
+
+  // Wait for completion
+  const finalMessage = await stream.finalMessage();
+  return finalMessage;
+}
+```
+
+### Raw Streaming with create()
+
+```typescript
+import { RawMessageStreamEvent } from '@anthropic-ai/sdk';
+
+async function rawStreaming() {
+  const stream = await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Hello!' }],
+    stream: true,
+  });
+
+  let content = '';
+  let usage: any = null;
+
+  for await (const chunk of stream) {
+    switch (chunk.type) {
+      case 'message_start':
+        console.log('Message started:', chunk.message);
+        break;
+        
+      case 'content_block_delta':
+        if (chunk.delta.type === 'text_delta') {
+          content += chunk.delta.text;
+          process.stdout.write(chunk.delta.text);
+        }
+        break;
+        
+      case 'message_delta':
+        if (chunk.usage) {
+          usage = chunk.usage;
+        }
+        console.log('\nStop reason:', chunk.delta.stop_reason);
+        break;
+        
+      case 'message_stop':
+        console.log('\nStream ended');
+        break;
+    }
+  }
+
+  return { content, usage };
+}
+```
+
+### Handling Thinking Tokens in Streams
+
+```typescript
+async function streamWithThinking() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 4000,
+    thinking: {
+      type: 'enabled',
+      budget_tokens: 2000,
+    },
+    messages: [{ role: 'user', content: 'Solve this complex math problem: ...' }],
+  });
+
+  stream.on('thinking', (thinking, snapshot) => {
+    console.log('[Thinking]', thinking); // Delta thinking content
+  });
+
+  stream.on('text', (text, snapshot) => {
+    process.stdout.write(text); // Regular response text
+  });
+
+  const message = await stream.finalMessage();
+  
+  // Access thinking content from final message
+  for (const block of message.content) {
+    if (block.type === 'thinking') {
+      console.log('Final thinking:', block.thinking);
+    }
+  }
+}
+```
+
+## Request Abortion
+
+### AbortController Integration
+
+```typescript
+async function abortableRequest() {
+  const controller = new AbortController();
+  
+  // Abort after 5 seconds
+  const timeoutId = setTimeout(() => controller.abort(), 5000);
+
+  try {
+    const stream = anthropic.messages.stream({
+      model: 'claude-sonnet-4-20250514',
+      max_tokens: 1024,
+      messages: [{ role: 'user', content: 'Long task...' }],
+    }, {
+      // Pass abort signal in request options
+      signal: controller.signal,
+    });
+
+    stream.on('error', (error) => {
+      if (error.name === 'AbortError') {
+        console.log('Request was aborted');
+      } else {
+        console.error('Other error:', error);
+      }
+    });
+
+    const result = await stream.finalMessage();
+    clearTimeout(timeoutId);
+    return result;
+    
+  } catch (error) {
+    clearTimeout(timeoutId);
+    
+    if (error.name === 'AbortError') {
+      console.log('Request aborted by user');
+    } else {
+      throw error;
+    }
+  }
+}
+
+// Manual abort from MessageStream
+async function manualAbort() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Start a story...' }],
+  });
+
+  // Abort after receiving some content
+  stream.on('text', (text, snapshot) => {
+    if (snapshot.length > 100) {
+      stream.abort(); // Built-in abort method
+    }
+  });
+
+  try {
+    await stream.finalMessage();
+  } catch (error) {
+    if (stream.aborted) {
+      console.log('Stream was manually aborted');
+    }
+  }
+}
+```
+
+## Error Handling
+
+### Comprehensive Error Types
+
+```typescript
+import {
+  AnthropicError,
+  APIError,
+  APIConnectionError,
+  APIConnectionTimeoutError,
+  APIUserAbortError,
+  NotFoundError,
+  ConflictError,
+  RateLimitError,
+  BadRequestError,
+  AuthenticationError,
+  InternalServerError,
+  PermissionDeniedError,
+  UnprocessableEntityError,
+} from '@anthropic-ai/sdk';
+
+async function handleErrors() {
+  try {
+    const message = await anthropic.messages.create({
+      model: 'claude-sonnet-4-20250514',
+      max_tokens: 1024,
+      messages: [{ role: 'user', content: 'Hello!' }],
+    });
+    
+    return message;
+    
+  } catch (error) {
+    // Handle specific error types
+    if (error instanceof RateLimitError) {
+      console.error('Rate limit exceeded:', {
+        status: error.status,
+        headers: error.headers,
+        retryAfter: error.headers.get('retry-after'),
+      });
+      
+      // Wait and retry logic
+      const retryAfter = parseInt(error.headers.get('retry-after') || '60');
+      await new Promise(resolve => setTimeout(resolve, retryAfter * 1000));
+      
+    } else if (error instanceof AuthenticationError) {
+      console.error('Authentication failed:', error.status);
+      throw new Error('Invalid API key');
+      
+    } else if (error instanceof BadRequestError) {
+      console.error('Bad request:', {
+        status: error.status,
+        error: error.error,
+        message: error.message,
+      });
+      
+    } else if (error instanceof APIConnectionTimeoutError) {
+      console.error('Request timed out');
+      // Retry with longer timeout
+      
+    } else if (error instanceof APIConnectionError) {
+      console.error('Network error:', error.message);
+      // Retry with backoff
+      
+    } else if (error instanceof APIUserAbortError) {
+      console.log('Request was aborted by user');
+      
+    } else if (error instanceof InternalServerError) {
+      console.error('Server error:', error.status);
+      // Retry with exponential backoff
+      
+    } else if (error instanceof APIError) {
+      console.error('API error:', {
+        status: error.status,
+        error: error.error,
+        requestId: error.requestID,
+      });
+      
+    } else {
+      console.error('Unexpected error:', error);
+      throw error;
+    }
+  }
+}
+
+// Error handling in streams
+function handleStreamErrors() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Hello!' }],
+  });
+
+  stream.on('error', (error) => {
+    if (error instanceof RateLimitError) {
+      console.log('Rate limited during stream');
+    } else if (error instanceof APIConnectionError) {
+      console.log('Connection lost during stream');
+    } else {
+      console.error('Stream error:', error);
+    }
+  });
+
+  return stream;
+}
+```
+
+## Stop Reasons
+
+### Understanding Stop Reasons
+
+```typescript
+import { StopReason } from '@anthropic-ai/sdk';
+
+async function handleStopReasons() {
+  const message = await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 100, // Intentionally low to trigger max_tokens
+    messages: [{ role: 'user', content: 'Write a long story...' }],
+    stop_sequences: ['THE END'], // Custom stop sequence
+  });
+
+  // Extract and handle stop reason
+  const stopReason: StopReason = message.stop_reason;
+  
+  switch (stopReason) {
+    case 'end_turn':
+      console.log('Model completed naturally');
+      break;
+      
+    case 'max_tokens':
+      console.log('Hit token limit, response may be incomplete');
+      // Consider continuing with a follow-up request
+      break;
+      
+    case 'stop_sequence':
+      console.log('Hit custom stop sequence:', message.stop_sequence);
+      break;
+      
+    case 'tool_use':
+      console.log('Model wants to use tools');
+      // Handle tool calls (see Tool Use section)
+      break;
+      
+    case 'pause_turn':
+      console.log('Long turn paused, can continue');
+      // Continue with the partial response as context
+      break;
+      
+    case 'refusal':
+      console.log('Model refused to respond due to safety');
+      break;
+      
+    default:
+      console.log('Unknown stop reason:', stopReason);
+  }
+
+  return { message, stopReason };
+}
+
+// In streaming mode
+function handleStopReasonsInStream() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Hello!' }],
+  });
+
+  stream.on('message', (message) => {
+    const stopReason = message.stop_reason;
+    console.log('Final stop reason:', stopReason);
+    
+    if (stopReason === 'max_tokens') {
+      console.log('Response was truncated');
+    }
+  });
+
+  return stream;
+}
+```
+
+## Context and Message History
+
+### Message Format and Serialization
+
+```typescript
+import { MessageParam, Message } from '@anthropic-ai/sdk';
+
+interface ConversationState {
+  messages: MessageParam[];
+  totalTokens: number;
+  model: string;
+  systemPrompt?: string;
+}
+
+class ConversationManager {
+  private state: ConversationState;
+
+  constructor(model: string, systemPrompt?: string) {
+    this.state = {
+      messages: [],
+      totalTokens: 0,
+      model,
+      systemPrompt,
+    };
+  }
+
+  // Add user message
+  addUserMessage(content: string | any[]) {
+    this.state.messages.push({
+      role: 'user',
+      content,
+    });
+  }
+
+  // Add assistant message from API response
+  addAssistantMessage(message: Message) {
+    this.state.messages.push({
+      role: 'assistant',
+      content: message.content,
+    });
+    
+    // Update token count
+    this.state.totalTokens += message.usage.input_tokens + message.usage.output_tokens;
+  }
+
+  // Add tool results
+  addToolResult(toolUseId: string, result: string, isError = false) {
+    // Find the last message and ensure it has tool use
+    const lastMessage = this.state.messages[this.state.messages.length - 1];
+    if (lastMessage?.role === 'assistant') {
+      // Add tool result as new user message
+      this.state.messages.push({
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: toolUseId,
+          content: result,
+          is_error: isError,
+        }],
+      });
+    }
+  }
+
+  // Get messages for API call
+  getMessages(): MessageParam[] {
+    return [...this.state.messages];
+  }
+
+  // Serialize for persistence
+  serialize(): string {
+    return JSON.stringify(this.state);
+  }
+
+  // Deserialize from storage
+  static deserialize(json: string): ConversationManager {
+    const state = JSON.parse(json);
+    const manager = new ConversationManager(state.model, state.systemPrompt);
+    manager.state = state;
+    return manager;
+  }
+
+  // Create request parameters
+  createRequestParams(newMessage?: string): any {
+    if (newMessage) {
+      this.addUserMessage(newMessage);
+    }
+
+    const params: any = {
+      model: this.state.model,
+      max_tokens: 4000,
+      messages: this.getMessages(),
+    };
+
+    if (this.state.systemPrompt) {
+      params.system = this.state.systemPrompt;
+    }
+
+    return params;
+  }
+
+  // Get conversation stats
+  getStats() {
+    return {
+      messageCount: this.state.messages.length,
+      totalTokens: this.state.totalTokens,
+      userMessages: this.state.messages.filter(m => m.role === 'user').length,
+      assistantMessages: this.state.messages.filter(m => m.role === 'assistant').length,
+    };
+  }
+}
+
+// Usage example
+async function conversationExample() {
+  const conversation = new ConversationManager(
+    'claude-sonnet-4-20250514',
+    'You are a helpful coding assistant.'
+  );
+
+  // First exchange
+  const params1 = conversation.createRequestParams('Hello, can you help me with Python?');
+  const response1 = await anthropic.messages.create(params1);
+  conversation.addAssistantMessage(response1);
+
+  // Second exchange
+  const params2 = conversation.createRequestParams('Show me a simple function.');
+  const response2 = await anthropic.messages.create(params2);
+  conversation.addAssistantMessage(response2);
+
+  // Save conversation
+  const saved = conversation.serialize();
+  localStorage.setItem('conversation', saved);
+
+  // Later: restore conversation
+  const restored = ConversationManager.deserialize(saved);
+  console.log('Conversation stats:', restored.getStats());
+}
+```
+
+## Token Counting
+
+### Using the Count Tokens API
+
+```typescript
+import { MessageCountTokensParams, MessageTokensCount } from '@anthropic-ai/sdk';
+
+async function countTokens() {
+  const messages = [
+    { role: 'user', content: 'Hello, how are you?' },
+    { role: 'assistant', content: 'I am doing well, thank you for asking!' },
+    { role: 'user', content: 'Can you help me write some code?' },
+  ] as const;
+
+  // Count tokens for messages
+  const tokenCount: MessageTokensCount = await anthropic.messages.countTokens({
+    model: 'claude-sonnet-4-20250514',
+    messages,
+    system: 'You are a helpful coding assistant.',
+  });
+
+  console.log('Input tokens:', tokenCount.input_tokens);
+  return tokenCount.input_tokens;
+}
+
+// Count tokens with tools
+async function countTokensWithTools() {
+  const tools = [
+    {
+      name: 'calculator',
+      description: 'Perform mathematical calculations',
+      input_schema: {
+        type: 'object',
+        properties: {
+          expression: { type: 'string' },
+        },
+        required: ['expression'],
+      },
+    },
+  ];
+
+  const tokenCount = await anthropic.messages.countTokens({
+    model: 'claude-sonnet-4-20250514',
+    messages: [{ role: 'user', content: 'Calculate 2+2' }],
+    tools,
+  });
+
+  return tokenCount.input_tokens;
+}
+
+// Extract usage from responses
+function extractUsageFromResponse(message: Message) {
+  const usage = message.usage;
+  
+  return {
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    cacheReadTokens: usage.cache_read_input_tokens || 0,
+    cacheWriteTokens: usage.cache_creation_input_tokens || 0,
+    totalTokens: usage.input_tokens + usage.output_tokens,
+    serviceTier: usage.service_tier,
+    cacheCreation: usage.cache_creation,
+  };
+}
+
+// Token usage in streaming
+function trackTokensInStream() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Hello!' }],
+  });
+
+  let finalUsage: any = null;
+
+  stream.on('message', (message) => {
+    finalUsage = extractUsageFromResponse(message);
+    console.log('Final usage:', finalUsage);
+  });
+
+  return stream;
+}
+```
+
+## Prompt Caching
+
+### Basic Caching Implementation
+
+```typescript
+import { CacheControlEphemeral } from '@anthropic-ai/sdk';
+
+async function usePromptCaching() {
+  // Cache control for system prompt
+  const systemPrompt = [
+    {
+      type: 'text',
+      text: 'You are an expert software engineer with deep knowledge of...',
+      cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral,
+    },
+  ];
+
+  // Cache control for large document
+  const messages = [
+    {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: 'Here is a large codebase to analyze:',
+        },
+        {
+          type: 'document',
+          source: {
+            type: 'text',
+            data: '// Large codebase content...',
+            media_type: 'text/plain',
+          },
+          cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral,
+        },
+        {
+          type: 'text',
+          text: 'Please analyze this code for bugs.',
+        },
+      ],
+    },
+  ] as const;
+
+  const response = await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    system: systemPrompt,
+    messages,
+  });
+
+  // Check cache usage
+  const usage = response.usage;
+  console.log('Cache read tokens:', usage.cache_read_input_tokens);
+  console.log('Cache write tokens:', usage.cache_creation_input_tokens);
+  
+  return response;
+}
+
+// Caching with different TTL options
+async function cachingWithTTL() {
+  const shortCache = {
+    type: 'ephemeral',
+    ttl: '5m', // 5 minutes
+  } as CacheControlEphemeral;
+
+  const longCache = {
+    type: 'ephemeral', 
+    ttl: '1h', // 1 hour (default)
+  } as CacheControlEphemeral;
+
+  const messages = [
+    {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: 'Short-lived context',
+          cache_control: shortCache,
+        },
+        {
+          type: 'text',
+          text: 'Long-lived context that should be cached longer',
+          cache_control: longCache,
+        },
+        {
+          type: 'text',
+          text: 'What can you tell me about this?',
+        },
+      ],
+    },
+  ] as const;
+
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages,
+  });
+}
+```
+
+## Tool Use (Function Calling)
+
+### Complete Tool Implementation
+
+```typescript
+import { Tool, ToolUseBlock, ToolChoice } from '@anthropic-ai/sdk';
+
+// Define tools
+const tools: Tool[] = [
+  {
+    name: 'calculator',
+    description: 'Perform mathematical calculations',
+    input_schema: {
+      type: 'object',
+      properties: {
+        expression: {
+          type: 'string',
+          description: 'Mathematical expression to evaluate',
+        },
+      },
+      required: ['expression'],
+    },
+  },
+  {
+    name: 'weather',
+    description: 'Get weather information for a location',
+    input_schema: {
+      type: 'object',
+      properties: {
+        location: {
+          type: 'string',
+          description: 'City name or coordinates',
+        },
+        units: {
+          type: 'string',
+          enum: ['celsius', 'fahrenheit'],
+          description: 'Temperature units',
+        },
+      },
+      required: ['location'],
+    },
+  },
+];
+
+// Tool implementations
+const toolImplementations = {
+  calculator: (args: { expression: string }) => {
+    try {
+      // Simple eval - in production, use a safe math parser
+      const result = eval(args.expression);
+      return `Result: ${result}`;
+    } catch (error) {
+      return `Error: Invalid expression - ${error.message}`;
+    }
+  },
+  
+  weather: async (args: { location: string; units?: string }) => {
+    // Mock weather API call
+    return `Weather in ${args.location}: 22°C, sunny with light clouds`;
+  },
+};
+
+async function toolUseExample() {
+  const conversation = new ConversationManager('claude-sonnet-4-20250514');
+  
+  // Send initial message with tools
+  conversation.addUserMessage('What is 15 * 23 and what is the weather in Paris?');
+  
+  const response = await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: conversation.getMessages(),
+    tools,
+    tool_choice: { type: 'auto' } as ToolChoice,
+  });
+
+  conversation.addAssistantMessage(response);
+
+  // Handle tool calls
+  const toolCalls: ToolUseBlock[] = response.content.filter(
+    (block): block is ToolUseBlock => block.type === 'tool_use'
+  );
+
+  // Execute each tool call
+  for (const toolCall of toolCalls) {
+    const toolName = toolCall.name;
+    const toolArgs = toolCall.input;
+    const toolId = toolCall.id;
+
+    console.log(`Executing tool: ${toolName} with args:`, toolArgs);
+
+    try {
+      let result: string;
+      
+      if (toolName in toolImplementations) {
+        result = await toolImplementations[toolName](toolArgs as any);
+      } else {
+        result = `Error: Unknown tool "${toolName}"`;
+      }
+
+      // Add tool result to conversation
+      conversation.addToolResult(toolId, result);
+      
+    } catch (error) {
+      // Add error result
+      conversation.addToolResult(toolId, `Error: ${error.message}`, true);
+    }
+  }
+
+  // Get final response after tool execution
+  if (toolCalls.length > 0) {
+    const finalResponse = await anthropic.messages.create({
+      model: 'claude-sonnet-4-20250514',
+      max_tokens: 1024,
+      messages: conversation.getMessages(),
+      tools,
+    });
+
+    conversation.addAssistantMessage(finalResponse);
+    return finalResponse;
+  }
+
+  return response;
+}
+
+// Streaming with tools
+async function streamingWithTools() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'Calculate 42 * 17' }],
+    tools,
+  });
+
+  const toolCalls: ToolUseBlock[] = [];
+
+  stream.on('contentBlock', (block) => {
+    if (block.type === 'tool_use') {
+      toolCalls.push(block);
+    }
+  });
+
+  stream.on('message', async (message) => {
+    if (message.stop_reason === 'tool_use') {
+      console.log('Tool calls detected:', toolCalls);
+      // Handle tools...
+    }
+  });
+
+  return stream;
+}
+
+// Force specific tool usage
+async function forceToolUsage() {
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: 'I need to do some math' }],
+    tools,
+    tool_choice: { 
+      type: 'tool',
+      name: 'calculator',
+    } as ToolChoice,
+  });
+}
+```
+
+## System Prompts
+
+### System Prompt Variations
+
+```typescript
+// Simple string system prompt
+async function basicSystemPrompt() {
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    system: 'You are a helpful coding assistant specialized in Python.',
+    messages: [{ role: 'user', content: 'Help me write a function' }],
+  });
+}
+
+// Complex system prompt with caching
+async function complexSystemPrompt() {
+  const systemPrompt = [
+    {
+      type: 'text',
+      text: `You are an expert software engineer with the following expertise:
+
+1. Python development and best practices
+2. Web frameworks like Django and FastAPI  
+3. Database design and optimization
+4. Testing strategies and TDD
+5. Code review and refactoring
+
+Guidelines for your responses:
+- Always write clean, readable code
+- Include proper error handling
+- Add type hints when using Python
+- Explain your reasoning
+- Suggest improvements when applicable
+
+When reviewing code:
+- Focus on functionality, performance, and maintainability
+- Point out potential bugs or edge cases
+- Suggest more pythonic approaches when relevant`,
+      cache_control: { type: 'ephemeral', ttl: '1h' },
+    },
+  ] as const;
+
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    system: systemPrompt,
+    messages: [{ role: 'user', content: 'Review this Python function for me' }],
+  });
+}
+
+// Dynamic system prompt based on context
+function buildSystemPrompt(userRole: string, expertise: string[]): string {
+  const basePrompt = `You are an AI assistant helping a ${userRole}.`;
+  
+  const expertisePrompt = expertise.length > 0 
+    ? `\n\nYour areas of expertise include: ${expertise.join(', ')}.`
+    : '';
+    
+  const guidelines = `
+  
+Guidelines:
+- Be helpful and accurate
+- Explain complex concepts clearly
+- Provide practical examples
+- Ask for clarification when needed`;
+
+  return basePrompt + expertisePrompt + guidelines;
+}
+
+async function dynamicSystemPrompt() {
+  const systemPrompt = buildSystemPrompt('software developer', [
+    'JavaScript', 'TypeScript', 'React', 'Node.js'
+  ]);
+
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    system: systemPrompt,
+    messages: [{ role: 'user', content: 'Help me optimize this React component' }],
+  });
+}
+```
+
+## Content Block System
+
+### Understanding Content Blocks
+
+The Anthropic API uses a content block system where message content is always an array, even for simple text.
+
+```typescript
+import { 
+  ContentBlockParam, 
+  TextBlockParam, 
+  ImageBlockParam,
+  DocumentBlockParam,
+  ToolUseBlockParam,
+  ToolResultBlockParam 
+} from '@anthropic-ai/sdk';
+
+// Text content (most common)
+const textContent: TextBlockParam = {
+  type: 'text',
+  text: 'Hello, Claude!',
+};
+
+// Image content
+const imageContent: ImageBlockParam = {
+  type: 'image',
+  source: {
+    type: 'base64',
+    media_type: 'image/jpeg',
+    data: '/9j/4AAQSkZJRg...', // base64 encoded image
+  },
+};
+
+// Document content with caching
+const documentContent: DocumentBlockParam = {
+  type: 'document',
+  source: {
+    type: 'text',
+    data: 'Large document content...',
+    media_type: 'text/plain',
+  },
+  cache_control: { type: 'ephemeral', ttl: '1h' },
+  title: 'Important Document',
+  context: 'This document contains key information for the project',
+};
+
+// Tool use block (from assistant)
+const toolUseContent: ToolUseBlockParam = {
+  type: 'tool_use',
+  id: 'tool_123',
+  name: 'calculator',
+  input: { expression: '2 + 2' },
+};
+
+// Tool result block (from user)
+const toolResultContent: ToolResultBlockParam = {
+  type: 'tool_result',
+  tool_use_id: 'tool_123',
+  content: 'Result: 4',
+};
+
+// Mixed content message
+async function mixedContentExample() {
+  const mixedMessage: ContentBlockParam[] = [
+    {
+      type: 'text',
+      text: 'Here is an image and a document to analyze:',
+    },
+    {
+      type: 'image',
+      source: {
+        type: 'base64',
+        media_type: 'image/png',
+        data: 'iVBORw0KGgoAAAANSUhEUgA...', // base64 image
+      },
+    },
+    {
+      type: 'document',
+      source: {
+        type: 'text',
+        data: 'Document content here...',
+        media_type: 'text/plain',
+      },
+      title: 'Analysis Document',
+    },
+    {
+      type: 'text',
+      text: 'What insights can you provide from these?',
+    },
+  ];
+
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    messages: [{ role: 'user', content: mixedMessage }],
+  });
+}
+
+// Helper functions for content manipulation
+function createTextBlock(text: string, cached = false): TextBlockParam {
+  const block: TextBlockParam = {
+    type: 'text',
+    text,
+  };
+  
+  if (cached) {
+    block.cache_control = { type: 'ephemeral', ttl: '1h' };
+  }
+  
+  return block;
+}
+
+function createImageBlock(base64Data: string, mimeType: string): ImageBlockParam {
+  return {
+    type: 'image',
+    source: {
+      type: 'base64',
+      media_type: mimeType as any,
+      data: base64Data,
+    },
+  };
+}
+
+// Extract text from response content blocks
+function extractTextFromResponse(content: any[]): string {
+  return content
+    .filter(block => block.type === 'text')
+    .map(block => block.text)
+    .join('\n');
+}
+
+// Extract thinking content
+function extractThinkingFromResponse(content: any[]): string | null {
+  const thinkingBlock = content.find(block => block.type === 'thinking');
+  return thinkingBlock?.thinking || null;
+}
+```
+
+## MessageStream Helper Class
+
+### Advanced MessageStream Usage
+
+```typescript
+import { MessageStream, MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream';
+
+class AdvancedMessageHandler {
+  private stream: MessageStream;
+  private content = '';
+  private thinking = '';
+  private toolCalls: any[] = [];
+  private citations: any[] = [];
+
+  constructor(stream: MessageStream) {
+    this.stream = stream;
+    this.setupEventHandlers();
+  }
+
+  private setupEventHandlers() {
+    // Connection established
+    this.stream.on('connect', () => {
+      console.log('Stream connected');
+    });
+
+    // Text content (delta and snapshot)
+    this.stream.on('text', (delta: string, snapshot: string) => {
+      process.stdout.write(delta);
+      this.content = snapshot;
+    });
+
+    // Thinking content (Claude's internal reasoning)
+    this.stream.on('thinking', (delta: string, snapshot: string) => {
+      console.log('[Thinking]', delta);
+      this.thinking = snapshot;
+    });
+
+    // Citations (when referencing documents)
+    this.stream.on('citation', (citation, citations) => {
+      console.log('Citation:', citation);
+      this.citations = citations;
+    });
+
+    // Content blocks (including tool calls)
+    this.stream.on('contentBlock', (block) => {
+      if (block.type === 'tool_use') {
+        console.log('Tool call:', block);
+        this.toolCalls.push(block);
+      }
+    });
+
+    // Raw stream events
+    this.stream.on('streamEvent', (event, snapshot) => {
+      // Handle any stream event
+      console.log('Stream event:', event.type);
+    });
+
+    // Final message
+    this.stream.on('finalMessage', (message) => {
+      console.log('\nFinal message received');
+      this.handleFinalMessage(message);
+    });
+
+    // Error handling
+    this.stream.on('error', (error) => {
+      console.error('Stream error:', error);
+    });
+
+    // Stream end
+    this.stream.on('end', () => {
+      console.log('\nStream ended');
+    });
+
+    // User abort
+    this.stream.on('abort', (error) => {
+      console.log('Stream aborted by user');
+    });
+  }
+
+  private handleFinalMessage(message: any) {
+    console.log('Stop reason:', message.stop_reason);
+    console.log('Token usage:', message.usage);
+    
+    // Process thinking content if available
+    for (const block of message.content) {
+      if (block.type === 'thinking') {
+        console.log('Final thinking content:', block.thinking);
+      }
+    }
+  }
+
+  async waitForCompletion() {
+    try {
+      const finalMessage = await this.stream.finalMessage();
+      return {
+        message: finalMessage,
+        content: this.content,
+        thinking: this.thinking,
+        toolCalls: this.toolCalls,
+        citations: this.citations,
+      };
+    } catch (error) {
+      if (this.stream.aborted) {
+        console.log('Stream was aborted');
+      } else {
+        throw error;
+      }
+    }
+  }
+
+  abort() {
+    this.stream.abort();
+  }
+
+  // Get request ID for debugging
+  getRequestId() {
+    return this.stream.request_id;
+  }
+
+  // Access the underlying Response object
+  async getResponse() {
+    const { response } = await this.stream.withResponse();
+    return response;
+  }
+}
+
+// Usage example
+async function advancedStreamExample() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 2000,
+    thinking: {
+      type: 'enabled',
+      budget_tokens: 1000,
+    },
+    messages: [{ 
+      role: 'user', 
+      content: 'Analyze this complex problem and show your reasoning...' 
+    }],
+  });
+
+  const handler = new AdvancedMessageHandler(stream);
+  
+  // Optional: abort after 30 seconds
+  const timeoutId = setTimeout(() => {
+    handler.abort();
+  }, 30000);
+
+  try {
+    const result = await handler.waitForCompletion();
+    clearTimeout(timeoutId);
+    
+    console.log('Final result:', {
+      contentLength: result.content.length,
+      thinkingLength: result.thinking.length,
+      toolCallCount: result.toolCalls.length,
+      citationCount: result.citations.length,
+    });
+    
+    return result;
+  } catch (error) {
+    clearTimeout(timeoutId);
+    throw error;
+  }
+}
+```
+
+## Thinking Tokens and Extended Reasoning
+
+### Enabling Extended Thinking
+
+```typescript
+async function extendedThinkingExample() {
+  const response = await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 4000,
+    thinking: {
+      type: 'enabled',
+      budget_tokens: 2000, // Minimum 1024, must be < max_tokens
+    },
+    messages: [{
+      role: 'user',
+      content: `Solve this complex problem step by step:
+        
+A company has 3 factories. Factory A produces 100 units/day, 
+Factory B produces 150 units/day, and Factory C produces 200 units/day.
+If the company needs to fulfill an order of 10,000 units in the most
+cost-efficient way, and the costs per unit are $5, $4, and $6 respectively,
+what's the optimal production strategy?`
+    }],
+  });
+
+  // Extract thinking content
+  for (const block of response.content) {
+    if (block.type === 'thinking') {
+      console.log('Claude\'s thinking process:');
+      console.log(block.thinking);
+      console.log('Signature:', block.signature);
+    } else if (block.type === 'text') {
+      console.log('\nFinal answer:');
+      console.log(block.text);
+    }
+  }
+
+  return response;
+}
+
+// Disable thinking
+async function disableThinking() {
+  return await anthropic.messages.create({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1024,
+    thinking: {
+      type: 'disabled',
+    },
+    messages: [{ role: 'user', content: 'Quick answer please' }],
+  });
+}
+
+// Streaming with thinking
+async function streamThinking() {
+  const stream = anthropic.messages.stream({
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 3000,
+    thinking: {
+      type: 'enabled',
+      budget_tokens: 1500,
+    },
+    messages: [{
+      role: 'user',
+      content: 'Think through this carefully: How would you design a distributed cache?'
+    }],
+  });
+
+  let thinkingContent = '';
+  let responseContent = '';
+
+  stream.on('thinking', (delta, snapshot) => {
+    // Stream thinking content as it comes
+    process.stdout.write(`[THINKING] ${delta}`);
+    thinkingContent = snapshot;
+  });
+
+  stream.on('text', (delta, snapshot) => {
+    // Stream final response
+    process.stdout.write(delta);
+    responseContent = snapshot;
+  });
+
+  const finalMessage = await stream.finalMessage();
+  
+  return {
+    thinking: thinkingContent,
+    response: responseContent,
+    usage: finalMessage.usage,
+  };
+}
+```
+
+## Complete Implementation Example
+
+Here's a comprehensive example that combines all the features:
+
+```typescript
+import Anthropic, { 
+  MessageParam, 
+  Message, 
+  Tool,
+  ToolUseBlock,
+  AnthropicError 
+} from '@anthropic-ai/sdk';
+
+class AnthropicClient {
+  private client: Anthropic;
+  private conversation: MessageParam[] = [];
+  private totalTokens = 0;
+
+  constructor(apiKey: string) {
+    this.client = new Anthropic({ apiKey });
+  }
+
+  async sendMessage(
+    content: string,
+    options: {
+      stream?: boolean;
+      tools?: Tool[];
+      thinking?: boolean;
+      systemPrompt?: string;
+      maxTokens?: number;
+      temperature?: number;
+      cached?: boolean;
+    } = {}
+  ) {
+    const {
+      stream = false,
+      tools = [],
+      thinking = false,
+      systemPrompt,
+      maxTokens = 1024,
+      temperature = 1.0,
+      cached = false,
+    } = options;
+
+    // Add user message
+    this.conversation.push({
+      role: 'user',
+      content: cached 
+        ? [{ type: 'text', text: content, cache_control: { type: 'ephemeral', ttl: '1h' } }]
+        : content,
+    });
+
+    const params: any = {
+      model: 'claude-sonnet-4-20250514',
+      max_tokens: maxTokens,
+      temperature,
+      messages: [...this.conversation],
+    };
+
+    if (systemPrompt) {
+      params.system = systemPrompt;
+    }
+
+    if (tools.length > 0) {
+      params.tools = tools;
+      params.tool_choice = { type: 'auto' };
+    }
+
+    if (thinking) {
+      params.thinking = {
+        type: 'enabled',
+        budget_tokens: Math.min(maxTokens / 2, 2000),
+      };
+    }
+
+    try {
+      if (stream) {
+        return await this.handleStreamingResponse(params, tools);
+      } else {
+        return await this.handleSingleResponse(params, tools);
+      }
+    } catch (error) {
+      return this.handleError(error);
+    }
+  }
+
+  private async handleSingleResponse(params: any, tools: Tool[]) {
+    const response = await this.client.messages.create(params);
+    
+    // Track tokens
+    this.totalTokens += response.usage.input_tokens + response.usage.output_tokens;
+    
+    // Add assistant response
+    this.conversation.push({
+      role: 'assistant',
+      content: response.content,
+    });
+
+    // Handle tool calls
+    const toolCalls = response.content.filter(
+      (block): block is ToolUseBlock => block.type === 'tool_use'
+    );
+
+    if (toolCalls.length > 0 && tools.length > 0) {
+      await this.handleToolCalls(toolCalls, params, tools);
+    }
+
+    return {
+      content: this.extractText(response.content),
+      thinking: this.extractThinking(response.content),
+      toolCalls,
+      usage: response.usage,
+      stopReason: response.stop_reason,
+    };
+  }
+
+  private async handleStreamingResponse(params: any, tools: Tool[]) {
+    const stream = this.client.messages.stream(params);
+    
+    let content = '';
+    let thinking = '';
+    const toolCalls: ToolUseBlock[] = [];
+    let finalMessage: Message;
+
+    return new Promise((resolve, reject) => {
+      stream.on('text', (delta, snapshot) => {
+        process.stdout.write(delta);
+        content = snapshot;
+      });
+
+      stream.on('thinking', (delta, snapshot) => {
+        console.log(`[THINKING] ${delta}`);
+        thinking = snapshot;
+      });
+
+      stream.on('contentBlock', (block) => {
+        if (block.type === 'tool_use') {
+          toolCalls.push(block);
+        }
+      });
+
+      stream.on('finalMessage', async (message) => {
+        finalMessage = message;
+        this.totalTokens += message.usage.input_tokens + message.usage.output_tokens;
+        
+        this.conversation.push({
+          role: 'assistant',
+          content: message.content,
+        });
+
+        if (toolCalls.length > 0 && tools.length > 0) {
+          try {
+            await this.handleToolCalls(toolCalls, params, tools);
+          } catch (error) {
+            reject(error);
+            return;
+          }
+        }
+
+        resolve({
+          content,
+          thinking,
+          toolCalls,
+          usage: message.usage,
+          stopReason: message.stop_reason,
+        });
+      });
+
+      stream.on('error', reject);
+    });
+  }
+
+  private async handleToolCalls(toolCalls: ToolUseBlock[], params: any, tools: Tool[]) {
+    // Execute tool calls
+    for (const toolCall of toolCalls) {
+      const result = await this.executeToolCall(toolCall);
+      
+      this.conversation.push({
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: toolCall.id,
+          content: result.content,
+          is_error: result.isError,
+        }],
+      });
+    }
+
+    // Get response after tool execution
+    const followUpResponse = await this.client.messages.create({
+      ...params,
+      messages: [...this.conversation],
+    });
+
+    this.conversation.push({
+      role: 'assistant',
+      content: followUpResponse.content,
+    });
+
+    this.totalTokens += followUpResponse.usage.input_tokens + followUpResponse.usage.output_tokens;
+  }
+
+  private async executeToolCall(toolCall: ToolUseBlock): Promise<{ content: string; isError: boolean }> {
+    // Mock tool implementations
+    const tools = {
+      calculator: (args: any) => {
+        try {
+          const result = eval(args.expression);
+          return { content: `Result: ${result}`, isError: false };
+        } catch (error) {
+          return { content: `Error: ${error.message}`, isError: true };
+        }
+      },
+      weather: (args: any) => {
+        return { content: `Weather in ${args.location}: 22°C, sunny`, isError: false };
+      },
+    };
+
+    const toolName = toolCall.name;
+    if (toolName in tools) {
+      return tools[toolName](toolCall.input);
+    } else {
+      return { content: `Unknown tool: ${toolName}`, isError: true };
+    }
+  }
+
+  private extractText(content: any[]): string {
+    return content
+      .filter(block => block.type === 'text')
+      .map(block => block.text)
+      .join('\n');
+  }
+
+  private extractThinking(content: any[]): string {
+    const thinkingBlock = content.find(block => block.type === 'thinking');
+    return thinkingBlock?.thinking || '';
+  }
+
+  private handleError(error: any) {
+    if (error instanceof AnthropicError) {
+      console.error('Anthropic API error:', error.message);
+      
+      if (error.status === 429) {
+        console.log('Rate limited - should retry with backoff');
+      } else if (error.status === 401) {
+        console.log('Authentication failed - check API key');
+      }
+    } else {
+      console.error('Unexpected error:', error);
+    }
+    
+    throw error;
+  }
+
+  // Utility methods
+  getConversationHistory(): MessageParam[] {
+    return [...this.conversation];
+  }
+
+  getTotalTokens(): number {
+    return this.totalTokens;
+  }
+
+  clearConversation(): void {
+    this.conversation = [];
+    this.totalTokens = 0;
+  }
+
+  async countTokens(messages: MessageParam[], systemPrompt?: string): Promise<number> {
+    const params: any = {
+      model: 'claude-sonnet-4-20250514',
+      messages,
+    };
+
+    if (systemPrompt) {
+      params.system = systemPrompt;
+    }
+
+    const result = await this.client.messages.countTokens(params);
+    return result.input_tokens;
+  }
+}
+
+// Usage example
+async function completeExample() {
+  const client = new AnthropicClient(process.env.ANTHROPIC_API_KEY!);
+
+  const tools: Tool[] = [
+    {
+      name: 'calculator',
+      description: 'Perform mathematical calculations',
+      input_schema: {
+        type: 'object',
+        properties: {
+          expression: { type: 'string' },
+        },
+        required: ['expression'],
+      },
+    },
+  ];
+
+  // Simple message
+  let result = await client.sendMessage('Hello, Claude!');
+  console.log('Response:', result.content);
+
+  // Message with thinking
+  result = await client.sendMessage(
+    'Solve this complex math problem: What is the optimal way to arrange 10 people around a circular table?',
+    { thinking: true, maxTokens: 2000 }
+  );
+  console.log('Thinking:', result.thinking);
+  console.log('Response:', result.content);
+
+  // Streaming with tools
+  result = await client.sendMessage(
+    'Calculate 15 * 23 and explain the steps',
+    { stream: true, tools, thinking: true }
+  );
+
+  console.log('Total tokens used:', client.getTotalTokens());
+}
+```
+
+## Key Implementation Notes
+
+1. **Content is Always an Array**: Even simple text messages use the content block system
+2. **Error Handling**: The SDK provides specific error types for different HTTP status codes
+3. **Streaming Events**: Use MessageStream for easier event handling, or raw streaming for more control
+4. **Token Counting**: Use the dedicated countTokens API for accurate estimates
+5. **Caching**: Add cache_control to content blocks, not to the message level
+6. **Tool Calls**: Always check stop_reason for 'tool_use' and handle the tool execution flow
+7. **Thinking**: Requires explicit configuration and sufficient token budget
+8. **Abort**: Use AbortController for request cancellation, or MessageStream.abort() for streams
+
+This guide covers all the essential patterns for working with the Anthropic SDK effectively.
\ No newline at end of file
diff --git a/packages/ai/gemini-api.md b/packages/ai/gemini-api.md
new file mode 100644
index 00000000..6b8ff549
--- /dev/null
+++ b/packages/ai/gemini-api.md
@@ -0,0 +1,1233 @@
+# Google Gemini SDK Implementation Guide
+
+This document provides comprehensive implementation guidance for the Google Gemini SDK (`@google/genai`) showing exactly how to implement all required features for our unified AI API.
+
+## Table of Contents
+
+1. [Setup and Basic Usage](#setup-and-basic-usage)
+2. [Streaming Responses](#streaming-responses)
+3. [Aborting Requests](#aborting-requests)
+4. [Error Handling](#error-handling)
+5. [Stop Reasons](#stop-reasons)
+6. [Message History and Serialization](#message-history-and-serialization)
+7. [Token Counting](#token-counting)
+8. [Context Caching](#context-caching)
+9. [Function Calling (Tools)](#function-calling-tools)
+10. [System Instructions](#system-instructions)
+11. [Parts System for Content](#parts-system-for-content)
+12. [Thinking Tokens](#thinking-tokens)
+13. [Peculiarities and Gotchas](#peculiarities-and-gotchas)
+
+## Setup and Basic Usage
+
+### Installation and Initialization
+
+```typescript
+import { GoogleGenAI, type GenerateContentResponse } from '@google/genai';
+
+// Initialize client
+const client = new GoogleGenAI({
+  apiKey: process.env.GEMINI_API_KEY,
+  // Optional: Use Vertex AI instead
+  // vertexai: true,
+  // project: 'your-project-id',
+  // location: 'us-central1',
+});
+
+// Basic non-streaming request
+const response = await client.models.generateContent({
+  model: 'gemini-2.0-flash-exp',
+  contents: 'Hello, how are you?'
+});
+
+console.log(response.text);
+```
+
+### Key Types and Interfaces
+
+```typescript
+// Core types from the SDK
+interface GoogleGenAIOptions {
+  apiKey?: string;
+  vertexai?: boolean;
+  project?: string;
+  location?: string;
+  apiVersion?: string;
+}
+
+interface Content {
+  parts?: Part[];
+  role?: string; // 'user' | 'model'
+}
+
+interface Part {
+  text?: string;
+  thought?: boolean; // For thinking content
+  functionCall?: FunctionCall;
+  functionResponse?: FunctionResponse;
+  inlineData?: Blob;
+  fileData?: FileData;
+}
+
+interface GenerateContentResponse {
+  candidates?: Candidate[];
+  usageMetadata?: GenerateContentResponseUsageMetadata;
+  promptFeedback?: GenerateContentResponsePromptFeedback;
+  text: string | undefined; // Convenience getter
+}
+```
+
+## Streaming Responses
+
+Gemini supports streaming via `generateContentStream` which returns an `AsyncGenerator`:
+
+```typescript
+async function streamContent() {
+  const stream = await client.models.generateContentStream({
+    model: 'gemini-2.0-flash-exp',
+    contents: 'Write a short story about a robot.'
+  });
+
+  let fullText = '';
+  for await (const chunk of stream) {
+    // Each chunk is a GenerateContentResponse
+    const chunkText = chunk.text;
+    if (chunkText) {
+      fullText += chunkText;
+      process.stdout.write(chunkText); // Stream to output
+    }
+
+    // Check for function calls in streaming
+    if (chunk.candidates?.[0]?.content?.parts) {
+      for (const part of chunk.candidates[0].content.parts) {
+        if (part.functionCall) {
+          console.log('Function call:', part.functionCall);
+        }
+        if (part.thought) {
+          console.log('Thinking:', part.text);
+        }
+      }
+    }
+  }
+
+  return fullText;
+}
+```
+
+### Streaming with Thinking Tokens
+
+```typescript
+async function streamWithThinking() {
+  const stream = await client.models.generateContentStream({
+    model: 'gemini-2.0-flash-thinking-exp-1219',
+    contents: 'Solve this math problem: 2x + 5 = 13'
+  });
+
+  let thinking = '';
+  let response = '';
+
+  for await (const chunk of stream) {
+    if (chunk.candidates?.[0]?.content?.parts) {
+      for (const part of chunk.candidates[0].content.parts) {
+        if (part.thought && part.text) {
+          thinking += part.text;
+          console.log('[THINKING]', part.text);
+        } else if (part.text && !part.thought) {
+          response += part.text;
+          console.log('[RESPONSE]', part.text);
+        }
+      }
+    }
+  }
+
+  return { thinking, response };
+}
+```
+
+## Aborting Requests
+
+Gemini supports request cancellation via `AbortSignal`:
+
+```typescript
+class GeminiClient {
+  private currentController: AbortController | null = null;
+
+  async generateWithCancellation(prompt: string): Promise<string> {
+    // Create new abort controller
+    this.currentController = new AbortController();
+
+    try {
+      const response = await client.models.generateContent({
+        model: 'gemini-2.0-flash-exp',
+        contents: prompt,
+        abortSignal: this.currentController.signal
+      });
+
+      return response.text || '';
+    } catch (error) {
+      if (error.name === 'AbortError') {
+        console.log('Request was cancelled');
+        throw new Error('Request cancelled by user');
+      }
+      throw error;
+    } finally {
+      this.currentController = null;
+    }
+  }
+
+  async generateStreamWithCancellation(prompt: string): Promise<AsyncGenerator<string>> {
+    this.currentController = new AbortController();
+
+    try {
+      const stream = await client.models.generateContentStream({
+        model: 'gemini-2.0-flash-exp',
+        contents: prompt,
+        abortSignal: this.currentController.signal
+      });
+
+      return this.processStream(stream);
+    } catch (error) {
+      if (error.name === 'AbortError') {
+        throw new Error('Request cancelled by user');
+      }
+      throw error;
+    }
+  }
+
+  private async* processStream(stream: AsyncGenerator<GenerateContentResponse>): AsyncGenerator<string> {
+    try {
+      for await (const chunk of stream) {
+        if (chunk.text) {
+          yield chunk.text;
+        }
+      }
+    } catch (error) {
+      if (error.name === 'AbortError') {
+        return; // Exit generator cleanly
+      }
+      throw error;
+    } finally {
+      this.currentController = null;
+    }
+  }
+
+  // Cancel current request
+  cancel(): void {
+    if (this.currentController) {
+      this.currentController.abort();
+    }
+  }
+}
+```
+
+## Error Handling
+
+### Error Types and Handling
+
+```typescript
+import { ApiError } from '@google/genai';
+
+interface GeminiErrorInfo {
+  type: 'rate_limit' | 'auth' | 'invalid_request' | 'network' | 'server' | 'unknown';
+  message: string;
+  statusCode?: number;
+  retryable: boolean;
+}
+
+function handleGeminiError(error: unknown): GeminiErrorInfo {
+  if (error instanceof ApiError) {
+    const statusCode = error.status;
+    
+    switch (statusCode) {
+      case 401:
+      case 403:
+        return {
+          type: 'auth',
+          message: 'Authentication failed - check API key',
+          statusCode,
+          retryable: false
+        };
+        
+      case 429:
+        return {
+          type: 'rate_limit',
+          message: 'Rate limit exceeded',
+          statusCode,
+          retryable: true
+        };
+        
+      case 400:
+        return {
+          type: 'invalid_request',
+          message: error.message || 'Invalid request parameters',
+          statusCode,
+          retryable: false
+        };
+        
+      case 500:
+      case 502:
+      case 503:
+      case 504:
+        return {
+          type: 'server',
+          message: 'Server error - try again later',
+          statusCode,
+          retryable: true
+        };
+        
+      default:
+        return {
+          type: 'unknown',
+          message: error.message || 'Unknown API error',
+          statusCode,
+          retryable: false
+        };
+    }
+  }
+
+  if (error instanceof Error) {
+    if (error.name === 'AbortError') {
+      return {
+        type: 'network',
+        message: 'Request was cancelled',
+        retryable: false
+      };
+    }
+
+    return {
+      type: 'network',
+      message: error.message,
+      retryable: true
+    };
+  }
+
+  return {
+    type: 'unknown',
+    message: 'Unknown error occurred',
+    retryable: false
+  };
+}
+
+// Usage with retry logic
+async function generateWithRetry(prompt: string, maxRetries = 3): Promise<string> {
+  for (let attempt = 1; attempt <= maxRetries; attempt++) {
+    try {
+      const response = await client.models.generateContent({
+        model: 'gemini-2.0-flash-exp',
+        contents: prompt
+      });
+      
+      return response.text || '';
+    } catch (error) {
+      const errorInfo = handleGeminiError(error);
+      
+      if (!errorInfo.retryable || attempt === maxRetries) {
+        throw new Error(`${errorInfo.type}: ${errorInfo.message}`);
+      }
+      
+      // Exponential backoff for retryable errors
+      const delay = Math.pow(2, attempt - 1) * 1000;
+      await new Promise(resolve => setTimeout(resolve, delay));
+    }
+  }
+  
+  throw new Error('Max retries exceeded');
+}
+```
+
+## Stop Reasons
+
+Gemini provides finish reasons in the response candidates:
+
+```typescript
+enum FinishReason {
+  FINISH_REASON_UNSPECIFIED = 'FINISH_REASON_UNSPECIFIED',
+  STOP = 'STOP', // Natural stop
+  MAX_TOKENS = 'MAX_TOKENS', // Hit token limit
+  SAFETY = 'SAFETY', // Safety filter triggered
+  RECITATION = 'RECITATION', // Recitation filter
+  LANGUAGE = 'LANGUAGE', // Language not supported
+  OTHER = 'OTHER'
+}
+
+function extractStopReason(response: GenerateContentResponse): string | null {
+  const candidate = response.candidates?.[0];
+  if (!candidate) return null;
+  
+  return candidate.finishReason || null;
+}
+
+// Handle different stop reasons
+function handleStopReason(response: GenerateContentResponse): void {
+  const reason = extractStopReason(response);
+  
+  switch (reason) {
+    case 'STOP':
+      console.log('Response completed naturally');
+      break;
+      
+    case 'MAX_TOKENS':
+      console.log('Response truncated due to token limit');
+      break;
+      
+    case 'SAFETY':
+      console.log('Response blocked by safety filters');
+      // Check promptFeedback for details
+      if (response.promptFeedback?.blockReason) {
+        console.log('Block reason:', response.promptFeedback.blockReason);
+      }
+      break;
+      
+    case 'RECITATION':
+      console.log('Response blocked due to recitation concerns');
+      break;
+      
+    default:
+      if (reason) {
+        console.log('Unexpected finish reason:', reason);
+      }
+  }
+}
+```
+
+## Message History and Serialization
+
+### Managing Conversation History
+
+```typescript
+interface SerializableMessage {
+  role: 'user' | 'model';
+  content: string;
+  functionCalls?: FunctionCall[];
+  functionResponses?: FunctionResponse[];
+  thinking?: string;
+}
+
+interface SerializableSession {
+  messages: SerializableMessage[];
+  totalUsage: {
+    promptTokens: number;
+    candidatesTokens: number;
+    totalTokens: number;
+    thoughtsTokens?: number;
+  };
+}
+
+class GeminiConversation {
+  private messages: Content[] = [];
+  private totalUsage = {
+    promptTokens: 0,
+    candidatesTokens: 0,
+    totalTokens: 0,
+    thoughtsTokens: 0
+  };
+
+  addUserMessage(text: string): void {
+    this.messages.push({
+      role: 'user',
+      parts: [{ text }]
+    });
+  }
+
+  addAssistantMessage(response: GenerateContentResponse): void {
+    const candidate = response.candidates?.[0];
+    if (!candidate?.content) return;
+
+    this.messages.push(candidate.content);
+
+    // Update usage
+    if (response.usageMetadata) {
+      this.totalUsage.promptTokens += response.usageMetadata.promptTokenCount || 0;
+      this.totalUsage.candidatesTokens += response.usageMetadata.candidatesTokenCount || 0;
+      this.totalUsage.totalTokens += response.usageMetadata.totalTokenCount || 0;
+      this.totalUsage.thoughtsTokens += response.usageMetadata.thoughtsTokenCount || 0;
+    }
+  }
+
+  async sendMessage(text: string): Promise<string> {
+    this.addUserMessage(text);
+
+    const response = await client.models.generateContent({
+      model: 'gemini-2.0-flash-exp',
+      contents: this.messages
+    });
+
+    this.addAssistantMessage(response);
+    return response.text || '';
+  }
+
+  // Serialize for persistence
+  serialize(): SerializableSession {
+    const messages: SerializableMessage[] = [];
+    
+    for (const content of this.messages) {
+      const message: SerializableMessage = {
+        role: (content.role as 'user' | 'model') || 'user',
+        content: '',
+        functionCalls: [],
+        functionResponses: [],
+        thinking: ''
+      };
+
+      for (const part of content.parts || []) {
+        if (part.text) {
+          if (part.thought) {
+            message.thinking += part.text;
+          } else {
+            message.content += part.text;
+          }
+        }
+        if (part.functionCall) {
+          message.functionCalls!.push(part.functionCall);
+        }
+        if (part.functionResponse) {
+          message.functionResponses!.push(part.functionResponse);
+        }
+      }
+
+      messages.push(message);
+    }
+
+    return {
+      messages,
+      totalUsage: { ...this.totalUsage }
+    };
+  }
+
+  // Deserialize from storage
+  static fromSerialized(session: SerializableSession): GeminiConversation {
+    const conversation = new GeminiConversation();
+    conversation.totalUsage = { ...session.totalUsage };
+
+    for (const msg of session.messages) {
+      const parts: Part[] = [];
+      
+      if (msg.content) {
+        parts.push({ text: msg.content });
+      }
+      
+      if (msg.thinking) {
+        parts.push({ text: msg.thinking, thought: true });
+      }
+      
+      for (const funcCall of msg.functionCalls || []) {
+        parts.push({ functionCall: funcCall });
+      }
+      
+      for (const funcResp of msg.functionResponses || []) {
+        parts.push({ functionResponse: funcResp });
+      }
+
+      conversation.messages.push({
+        role: msg.role,
+        parts
+      });
+    }
+
+    return conversation;
+  }
+}
+```
+
+## Token Counting
+
+### Understanding Gemini Token Usage
+
+```typescript
+interface TokenUsage {
+  promptTokens: number;
+  candidatesTokens: number; // Output tokens
+  totalTokens: number;
+  thoughtsTokens?: number; // Thinking tokens (reasoning models)
+  cachedContentTokens?: number; // Cache read tokens
+}
+
+function extractTokenUsage(response: GenerateContentResponse): TokenUsage {
+  const usage = response.usageMetadata;
+  
+  return {
+    promptTokens: usage?.promptTokenCount || 0,
+    candidatesTokens: usage?.candidatesTokenCount || 0,
+    totalTokens: usage?.totalTokenCount || 0,
+    thoughtsTokens: usage?.thoughtsTokenCount || 0,
+    cachedContentTokens: usage?.cachedContentTokenCount || 0
+  };
+}
+
+// Count tokens before sending (estimation)
+async function countTokens(content: string | Content[]): Promise<number> {
+  const response = await client.models.computeTokens({
+    model: 'gemini-2.0-flash-exp',
+    contents: typeof content === 'string' 
+      ? [{ parts: [{ text: content }] }]
+      : content
+  });
+
+  return response.totalTokens || 0;
+}
+
+// Token usage accumulation
+class TokenTracker {
+  private usage = {
+    totalPromptTokens: 0,
+    totalCandidatesTokens: 0,
+    totalThoughtsTokens: 0,
+    totalCachedTokens: 0,
+    totalRequests: 0
+  };
+
+  addUsage(response: GenerateContentResponse): void {
+    const tokenUsage = extractTokenUsage(response);
+    
+    this.usage.totalPromptTokens += tokenUsage.promptTokens;
+    this.usage.totalCandidatesTokens += tokenUsage.candidatesTokens;
+    this.usage.totalThoughtsTokens += tokenUsage.thoughtsTokens || 0;
+    this.usage.totalCachedTokens += tokenUsage.cachedContentTokens || 0;
+    this.usage.totalRequests++;
+  }
+
+  getStats() {
+    return {
+      ...this.usage,
+      totalTokens: this.usage.totalPromptTokens + this.usage.totalCandidatesTokens,
+      averageTokensPerRequest: this.usage.totalRequests > 0 
+        ? (this.usage.totalPromptTokens + this.usage.totalCandidatesTokens) / this.usage.totalRequests 
+        : 0
+    };
+  }
+}
+```
+
+## Context Caching
+
+Gemini supports context caching to reduce costs for repeated large prompts:
+
+```typescript
+import { type CachedContent } from '@google/genai';
+
+class GeminiCache {
+  async createCache(
+    systemInstruction: string,
+    contents: Content[],
+    ttlHours = 1
+  ): Promise<CachedContent> {
+    const cache = await client.caches.create({
+      model: 'gemini-2.0-flash-exp',
+      systemInstruction: { parts: [{ text: systemInstruction }] },
+      contents,
+      ttl: `${ttlHours * 3600}s` // Convert hours to seconds
+    });
+
+    return cache;
+  }
+
+  async generateWithCache(
+    cachedContent: CachedContent,
+    userMessage: string
+  ): Promise<GenerateContentResponse> {
+    return await client.models.generateContent({
+      model: cachedContent.model || 'gemini-2.0-flash-exp',
+      cachedContent: cachedContent.name,
+      contents: [{ 
+        role: 'user', 
+        parts: [{ text: userMessage }] 
+      }]
+    });
+  }
+
+  async listCaches(): Promise<CachedContent[]> {
+    const caches = [];
+    for await (const cache of client.caches.list()) {
+      caches.push(cache);
+    }
+    return caches;
+  }
+
+  async deleteCache(cacheName: string): Promise<void> {
+    await client.caches.delete({ name: cacheName });
+  }
+
+  // Example: Cache a large document for repeated analysis
+  async createDocumentCache(document: string): Promise<CachedContent> {
+    const systemInstruction = `
+      You are a document analysis assistant. The user will provide a large document,
+      and you should be ready to answer questions about it, summarize it, or extract
+      information from it.
+    `;
+
+    const contents = [{
+      role: 'user' as const,
+      parts: [{ text: `Please analyze this document:\n\n${document}` }]
+    }];
+
+    return this.createCache(systemInstruction, contents, 24); // Cache for 24 hours
+  }
+}
+
+// Usage example
+async function demonstrateCache() {
+  const cache = new GeminiCache();
+  
+  // Create cache with large document
+  const document = "... very large document content ...";
+  const cachedContent = await cache.createDocumentCache(document);
+  
+  // Now ask questions using the cache (saves tokens!)
+  const response1 = await cache.generateWithCache(
+    cachedContent, 
+    "What are the key points in this document?"
+  );
+  
+  const response2 = await cache.generateWithCache(
+    cachedContent, 
+    "Can you summarize the conclusions?"
+  );
+  
+  // Clean up when done
+  await cache.deleteCache(cachedContent.name!);
+}
+```
+
+## Function Calling (Tools)
+
+### Basic Function Calling Setup
+
+```typescript
+interface ToolDefinition {
+  name: string;
+  description: string;
+  parameters: {
+    type: 'object';
+    properties: Record<string, any>;
+    required: string[];
+  };
+}
+
+// Define tools
+const tools: ToolDefinition[] = [{
+  name: 'get_weather',
+  description: 'Get current weather for a location',
+  parameters: {
+    type: 'object',
+    properties: {
+      location: {
+        type: 'string',
+        description: 'City name or location'
+      },
+      units: {
+        type: 'string',
+        enum: ['celsius', 'fahrenheit'],
+        description: 'Temperature units'
+      }
+    },
+    required: ['location']
+  }
+}];
+
+// Convert to Gemini format
+function createGeminiTools(tools: ToolDefinition[]) {
+  return [{
+    functionDeclarations: tools.map(tool => ({
+      name: tool.name,
+      description: tool.description,
+      parametersJsonSchema: tool.parameters
+    }))
+  }];
+}
+
+// Function call handler
+async function executeFunction(functionCall: FunctionCall): Promise<any> {
+  const { name, args } = functionCall;
+  const params = typeof args === 'string' ? JSON.parse(args) : args;
+
+  switch (name) {
+    case 'get_weather':
+      return await getWeatherData(params.location, params.units);
+    default:
+      throw new Error(`Unknown function: ${name}`);
+  }
+}
+
+// Mock weather function
+async function getWeatherData(location: string, units = 'celsius') {
+  return {
+    location,
+    temperature: 22,
+    conditions: 'sunny',
+    units
+  };
+}
+```
+
+### Complete Function Calling Flow
+
+```typescript
+class GeminiFunctionCalling {
+  private tools: ToolDefinition[];
+
+  constructor(tools: ToolDefinition[]) {
+    this.tools = tools;
+  }
+
+  async processWithTools(messages: Content[]): Promise<string> {
+    let currentMessages = [...messages];
+    let iterations = 0;
+    const maxIterations = 5;
+
+    while (iterations < maxIterations) {
+      const response = await client.models.generateContent({
+        model: 'gemini-2.0-flash-exp',
+        contents: currentMessages,
+        tools: createGeminiTools(this.tools),
+        toolConfig: {
+          functionCallingConfig: {
+            mode: 'AUTO' // Let model decide when to call functions
+          }
+        }
+      });
+
+      const candidate = response.candidates?.[0];
+      if (!candidate?.content) break;
+
+      // Add assistant response to conversation
+      currentMessages.push(candidate.content);
+
+      // Check for function calls
+      const functionCalls = this.extractFunctionCalls(candidate.content);
+      
+      if (functionCalls.length === 0) {
+        // No more function calls, return final response
+        return response.text || '';
+      }
+
+      // Execute function calls
+      for (const functionCall of functionCalls) {
+        try {
+          const result = await executeFunction(functionCall);
+          
+          // Add function response to conversation
+          currentMessages.push({
+            role: 'user',
+            parts: [{
+              functionResponse: {
+                name: functionCall.name,
+                id: functionCall.id,
+                response: { result }
+              }
+            }]
+          });
+        } catch (error) {
+          // Add error response
+          currentMessages.push({
+            role: 'user',
+            parts: [{
+              functionResponse: {
+                name: functionCall.name,
+                id: functionCall.id,
+                response: { error: error.message }
+              }
+            }]
+          });
+        }
+      }
+
+      iterations++;
+    }
+
+    throw new Error('Max function calling iterations exceeded');
+  }
+
+  private extractFunctionCalls(content: Content): FunctionCall[] {
+    const calls: FunctionCall[] = [];
+    
+    for (const part of content.parts || []) {
+      if (part.functionCall) {
+        calls.push(part.functionCall);
+      }
+    }
+    
+    return calls;
+  }
+
+  // Streaming version with function calls
+  async *processStreamWithTools(messages: Content[]): AsyncGenerator<{
+    type: 'content' | 'function_call' | 'function_result';
+    content?: string;
+    functionCall?: FunctionCall;
+    functionResult?: any;
+  }> {
+    const stream = await client.models.generateContentStream({
+      model: 'gemini-2.0-flash-exp',
+      contents: messages,
+      tools: createGeminiTools(this.tools),
+      toolConfig: {
+        functionCallingConfig: { mode: 'AUTO' }
+      }
+    });
+
+    let pendingFunctionCalls: FunctionCall[] = [];
+
+    for await (const chunk of stream) {
+      const candidate = chunk.candidates?.[0];
+      if (!candidate?.content) continue;
+
+      for (const part of candidate.content.parts || []) {
+        if (part.text && !part.thought) {
+          yield { type: 'content', content: part.text };
+        }
+        
+        if (part.functionCall) {
+          pendingFunctionCalls.push(part.functionCall);
+          yield { type: 'function_call', functionCall: part.functionCall };
+        }
+      }
+    }
+
+    // Execute any pending function calls
+    for (const functionCall of pendingFunctionCalls) {
+      try {
+        const result = await executeFunction(functionCall);
+        yield { type: 'function_result', functionResult: result };
+      } catch (error) {
+        yield { 
+          type: 'function_result', 
+          functionResult: { error: error.message } 
+        };
+      }
+    }
+  }
+}
+```
+
+## System Instructions
+
+Gemini handles system instructions differently from other providers:
+
+```typescript
+// System instruction is a separate parameter, not part of messages
+async function generateWithSystemInstruction(
+  systemPrompt: string, 
+  userMessage: string
+): Promise<string> {
+  const response = await client.models.generateContent({
+    model: 'gemini-2.0-flash-exp',
+    systemInstruction: {
+      parts: [{ text: systemPrompt }]
+    },
+    contents: [{
+      role: 'user',
+      parts: [{ text: userMessage }]
+    }]
+  });
+
+  return response.text || '';
+}
+
+// For conversation with system instruction
+class GeminiConversationWithSystem {
+  private systemInstruction: Content;
+  private messages: Content[] = [];
+
+  constructor(systemPrompt: string) {
+    this.systemInstruction = {
+      parts: [{ text: systemPrompt }]
+    };
+  }
+
+  async sendMessage(text: string): Promise<string> {
+    this.messages.push({
+      role: 'user',
+      parts: [{ text }]
+    });
+
+    const response = await client.models.generateContent({
+      model: 'gemini-2.0-flash-exp',
+      systemInstruction: this.systemInstruction,
+      contents: this.messages
+    });
+
+    const candidate = response.candidates?.[0];
+    if (candidate?.content) {
+      this.messages.push(candidate.content);
+    }
+
+    return response.text || '';
+  }
+
+  updateSystemInstruction(newPrompt: string): void {
+    this.systemInstruction = {
+      parts: [{ text: newPrompt }]
+    };
+  }
+}
+```
+
+## Parts System for Content
+
+Understanding Gemini's parts-based content system:
+
+```typescript
+// Text content
+const textPart: Part = {
+  text: 'Hello, world!'
+};
+
+// Thinking content (for reasoning models)
+const thinkingPart: Part = {
+  text: 'Let me think about this problem...',
+  thought: true
+};
+
+// Function call
+const functionCallPart: Part = {
+  functionCall: {
+    name: 'get_weather',
+    args: { location: 'San Francisco' }
+  }
+};
+
+// Function response
+const functionResponsePart: Part = {
+  functionResponse: {
+    name: 'get_weather',
+    response: { temperature: 72, conditions: 'sunny' }
+  }
+};
+
+// Image data (inline)
+const imagePart: Part = {
+  inlineData: {
+    mimeType: 'image/jpeg',
+    data: 'base64-encoded-image-data'
+  }
+};
+
+// File reference
+const filePart: Part = {
+  fileData: {
+    mimeType: 'image/jpeg',
+    fileUri: 'gs://bucket/image.jpg'
+  }
+};
+
+// Creating multi-part content
+const multiPartContent: Content = {
+  role: 'user',
+  parts: [
+    { text: 'What is in this image?' },
+    {
+      inlineData: {
+        mimeType: 'image/jpeg',
+        data: await imageToBase64('path/to/image.jpg')
+      }
+    }
+  ]
+};
+
+// Utility functions for parts
+function createTextPart(text: string): Part {
+  return { text };
+}
+
+function createThinkingPart(text: string): Part {
+  return { text, thought: true };
+}
+
+function createImagePart(imageData: string, mimeType: string): Part {
+  return {
+    inlineData: {
+      mimeType,
+      data: imageData
+    }
+  };
+}
+
+async function imageToBase64(filePath: string): Promise<string> {
+  const fs = await import('fs/promises');
+  const buffer = await fs.readFile(filePath);
+  return buffer.toString('base64');
+}
+```
+
+## Thinking Tokens
+
+Gemini thinking models (like `gemini-2.0-flash-thinking-exp-1219`) provide reasoning traces:
+
+```typescript
+interface ThinkingExtractor {
+  thinking: string;
+  response: string;
+  thinkingTokens: number;
+  responseTokens: number;
+}
+
+function extractThinking(response: GenerateContentResponse): ThinkingExtractor {
+  let thinking = '';
+  let responseText = '';
+  
+  const candidate = response.candidates?.[0];
+  if (candidate?.content?.parts) {
+    for (const part of candidate.content.parts) {
+      if (part.text) {
+        if (part.thought) {
+          thinking += part.text;
+        } else {
+          responseText += part.text;
+        }
+      }
+    }
+  }
+
+  const usage = response.usageMetadata;
+  
+  return {
+    thinking,
+    response: responseText,
+    thinkingTokens: usage?.thoughtsTokenCount || 0,
+    responseTokens: usage?.candidatesTokenCount || 0
+  };
+}
+
+// Streaming thinking extraction
+async function streamWithThinkingExtraction(prompt: string) {
+  const stream = await client.models.generateContentStream({
+    model: 'gemini-2.0-flash-thinking-exp-1219',
+    contents: prompt
+  });
+
+  let thinkingContent = '';
+  let responseContent = '';
+
+  for await (const chunk of stream) {
+    const candidate = chunk.candidates?.[0];
+    if (!candidate?.content?.parts) continue;
+
+    for (const part of candidate.content.parts) {
+      if (part.text) {
+        if (part.thought) {
+          thinkingContent += part.text;
+          console.log('[THINKING DELTA]', part.text);
+        } else {
+          responseContent += part.text;
+          console.log('[RESPONSE DELTA]', part.text);
+        }
+      }
+    }
+  }
+
+  return {
+    thinking: thinkingContent,
+    response: responseContent
+  };
+}
+
+// Enable thinking for models that support it
+async function generateWithThinking(prompt: string, model = 'gemini-2.0-flash-thinking-exp-1219') {
+  const response = await client.models.generateContent({
+    model,
+    contents: prompt
+  });
+
+  return extractThinking(response);
+}
+```
+
+## Peculiarities and Gotchas
+
+### Key Differences from Other APIs
+
+1. **System Instructions**: Separate parameter, not part of message history
+2. **Parts-based Content**: Content is split into parts, each with specific types
+3. **Thinking Detection**: Must check `part.thought` flag to identify reasoning content
+4. **Function Calls**: Embedded in parts, not separate message types
+5. **Role Names**: Uses 'model' instead of 'assistant' for AI responses
+6. **Streaming**: Returns full `GenerateContentResponse` objects, not deltas
+
+### Common Pitfalls
+
+```typescript
+// ❌ Wrong: Treating text as complete response
+const response = await client.models.generateContent({...});
+console.log(response.candidates[0].content.parts[0].text); // May miss other parts
+
+// ✅ Correct: Use convenience getter or process all parts
+console.log(response.text); // Concatenates all text parts automatically
+
+// ❌ Wrong: Mixing system instruction with messages
+const messages = [
+  { role: 'system', parts: [{ text: 'You are helpful' }] }, // Not supported
+  { role: 'user', parts: [{ text: 'Hello' }] }
+];
+
+// ✅ Correct: Separate system instruction
+const response = await client.models.generateContent({
+  systemInstruction: { parts: [{ text: 'You are helpful' }] },
+  contents: [{ role: 'user', parts: [{ text: 'Hello' }] }]
+});
+
+// ❌ Wrong: Assuming single part responses
+for await (const chunk of stream) {
+  console.log(chunk.text); // May miss function calls or thinking
+}
+
+// ✅ Correct: Process all parts
+for await (const chunk of stream) {
+  const candidate = chunk.candidates?.[0];
+  if (candidate?.content?.parts) {
+    for (const part of candidate.content.parts) {
+      if (part.text && !part.thought) {
+        console.log('[RESPONSE]', part.text);
+      } else if (part.text && part.thought) {
+        console.log('[THINKING]', part.text);
+      } else if (part.functionCall) {
+        console.log('[FUNCTION CALL]', part.functionCall);
+      }
+    }
+  }
+}
+```
+
+### Performance Tips
+
+1. **Use streaming** for better user experience with long responses
+2. **Cache large prompts** to reduce token costs
+3. **Batch token counting** when possible
+4. **Set appropriate `abortSignal` timeouts** for long-running requests
+5. **Handle function calls efficiently** to avoid timeout issues
+
+### Model-Specific Behaviors
+
+```typescript
+// Different models have different capabilities
+const modelCapabilities = {
+  'gemini-2.0-flash-exp': {
+    thinking: false,
+    functionCalling: true,
+    vision: true,
+    maxTokens: 1000000
+  },
+  'gemini-2.0-flash-thinking-exp-1219': {
+    thinking: true,
+    functionCalling: true,
+    vision: true,
+    maxTokens: 32768
+  },
+  'gemini-1.5-pro': {
+    thinking: false,
+    functionCalling: true,
+    vision: true,
+    maxTokens: 2000000
+  }
+};
+
+// Check model capabilities before using features
+function supportsThinking(model: string): boolean {
+  return model.includes('thinking');
+}
+
+function getMaxTokens(model: string): number {
+  return modelCapabilities[model]?.maxTokens || 32768;
+}
+```
+
+This comprehensive guide covers all the essential aspects of implementing Gemini API features. The key is understanding Gemini's parts-based content system and properly handling the different types of content (text, thinking, function calls) that can appear in responses.
\ No newline at end of file
diff --git a/packages/ai/openai-api.md b/packages/ai/openai-api.md
new file mode 100644
index 00000000..cefe9ac3
--- /dev/null
+++ b/packages/ai/openai-api.md
@@ -0,0 +1,2320 @@
+# OpenAI SDK Implementation Guide
+
+This document provides a comprehensive guide to implementing the required features using the OpenAI SDK v5.12.2. All examples are based on actual usage patterns from the pi-mono codebase and include real TypeScript types from the SDK.
+
+## Table of Contents
+
+1. [Basic Setup](#basic-setup)
+2. [Streaming Responses](#streaming-responses)
+3. [Aborting Requests](#aborting-requests)
+4. [Error Handling](#error-handling)
+5. [Stop Reasons](#stop-reasons)
+6. [Message History & Serialization](#message-history--serialization)
+7. [Token Counting](#token-counting)
+8. [Caching](#caching)
+9. [Chat Completions vs Responses API](#chat-completions-vs-responses-api)
+10. [Tool/Function Calling](#toolfunction-calling)
+11. [System Prompts](#system-prompts)
+12. [Provider-Specific Features](#provider-specific-features)
+13. [Complete Implementation Examples](#complete-implementation-examples)
+
+## Basic Setup
+
+```typescript
+import OpenAI from "openai";
+
+// Basic client setup
+const client = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+  baseURL: "https://api.openai.com/v1", // Optional, default shown
+});
+
+// For other providers (Groq, Anthropic OpenAI-compatible, etc.)
+const groqClient = new OpenAI({
+  apiKey: process.env.GROQ_API_KEY,
+  baseURL: "https://api.groq.com/openai/v1",
+});
+```
+
+### Client Configuration Options
+
+```typescript
+interface ClientOptions {
+  apiKey?: string;
+  baseURL?: string;
+  timeout?: number;      // Request timeout in milliseconds
+  maxRetries?: number;   // Number of retry attempts
+  defaultHeaders?: Record<string, string>;
+  defaultQuery?: Record<string, unknown>;
+}
+```
+
+## Streaming Responses
+
+### Chat Completions Streaming
+
+```typescript
+import type { 
+  ChatCompletionChunk, 
+  ChatCompletionCreateParamsStreaming 
+} from "openai/resources/chat/completions";
+import { Stream } from "openai/core/streaming";
+
+async function streamChatCompletion() {
+  const params: ChatCompletionCreateParamsStreaming = {
+    model: "gpt-4o",
+    messages: [
+      { role: "user", content: "Tell me a story" }
+    ],
+    stream: true,
+    max_completion_tokens: 1000,
+  };
+
+  const stream: Stream<ChatCompletionChunk> = await client.chat.completions.create(params);
+
+  for await (const chunk of stream) {
+    const delta = chunk.choices[0]?.delta;
+    
+    if (delta?.content) {
+      process.stdout.write(delta.content);
+    }
+    
+    if (delta?.tool_calls) {
+      console.log("Tool call delta:", delta.tool_calls);
+    }
+    
+    if (chunk.choices[0]?.finish_reason) {
+      console.log("\nFinish reason:", chunk.choices[0].finish_reason);
+    }
+  }
+}
+```
+
+### Responses API Streaming
+
+```typescript
+import type { 
+  ResponseCreateParamsStreaming,
+  ResponseStreamEvent 
+} from "openai/resources/responses";
+
+async function streamResponsesAPI() {
+  const params: ResponseCreateParamsStreaming = {
+    model: "o1-mini",
+    input: [
+      {
+        role: "user",
+        content: [{ type: "input_text", text: "Solve this math problem: 2x + 5 = 11" }]
+      }
+    ],
+    stream: true,
+    max_output_tokens: 2000,
+    reasoning: {
+      effort: "low",
+      summary: "detailed"
+    }
+  };
+
+  const stream: Stream<ResponseStreamEvent> = await client.responses.create(params);
+
+  for await (const event of stream) {
+    switch (event.type) {
+      case "response.reasoning.text.delta":
+        // Reasoning/thinking tokens (o1/o3)
+        process.stdout.write(`[thinking] ${event.delta}`);
+        break;
+        
+      case "response.text.delta":
+        // Output content
+        process.stdout.write(event.delta);
+        break;
+        
+      case "response.function_call.arguments.delta":
+        // Tool call arguments being built
+        console.log("Tool call delta:", event.delta);
+        break;
+        
+      case "response.completed":
+        console.log("\nResponse completed");
+        break;
+    }
+  }
+}
+```
+
+### Streaming Patterns
+
+```typescript
+// Pattern 1: Simple content streaming
+async function simpleStream(messages: any[]) {
+  const stream = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages,
+    stream: true,
+  });
+
+  let fullContent = "";
+  for await (const chunk of stream) {
+    const content = chunk.choices[0]?.delta?.content || "";
+    fullContent += content;
+    process.stdout.write(content);
+  }
+  
+  return fullContent;
+}
+
+// Pattern 2: Event-driven streaming with handlers
+interface StreamHandlers {
+  onContent?: (delta: string) => void;
+  onToolCall?: (toolCall: any) => void;
+  onFinish?: (reason: string) => void;
+}
+
+async function eventDrivenStream(messages: any[], handlers: StreamHandlers) {
+  const stream = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages,
+    stream: true,
+  });
+
+  for await (const chunk of stream) {
+    const choice = chunk.choices[0];
+    if (!choice) continue;
+
+    if (choice.delta?.content) {
+      handlers.onContent?.(choice.delta.content);
+    }
+
+    if (choice.delta?.tool_calls) {
+      handlers.onToolCall?.(choice.delta.tool_calls);
+    }
+
+    if (choice.finish_reason) {
+      handlers.onFinish?.(choice.finish_reason);
+    }
+  }
+}
+```
+
+## Aborting Requests
+
+### Using AbortController
+
+```typescript
+class AbortableClient {
+  private client: OpenAI;
+  private abortController: AbortController | null = null;
+
+  constructor(config: { apiKey: string; baseURL?: string }) {
+    this.client = new OpenAI(config);
+  }
+
+  async askWithAbort(message: string): Promise<string> {
+    // Create new AbortController for this request
+    this.abortController = new AbortController();
+
+    try {
+      const response = await this.client.chat.completions.create({
+        model: "gpt-4o",
+        messages: [{ role: "user", content: message }],
+        max_completion_tokens: 1000,
+      }, {
+        signal: this.abortController.signal  // Pass abort signal
+      });
+
+      return response.choices[0]?.message?.content || "";
+    } catch (error) {
+      if (this.abortController.signal.aborted) {
+        throw new Error("Request was interrupted");
+      }
+      throw error;
+    } finally {
+      this.abortController = null;
+    }
+  }
+
+  // Call this to abort the current request
+  interrupt(): void {
+    this.abortController?.abort();
+  }
+}
+
+// Usage example
+const abortableClient = new AbortableClient({
+  apiKey: process.env.OPENAI_API_KEY!
+});
+
+// Start request
+const responsePromise = abortableClient.askWithAbort("Write a long essay");
+
+// Abort after 5 seconds
+setTimeout(() => {
+  abortableClient.interrupt();
+}, 5000);
+
+try {
+  const response = await responsePromise;
+  console.log(response);
+} catch (error) {
+  console.log("Request was aborted:", error.message);
+}
+```
+
+### Aborting Streaming Requests
+
+```typescript
+async function abortableStream(messages: any[]) {
+  const abortController = new AbortController();
+  
+  // Abort after 10 seconds
+  const timeoutId = setTimeout(() => {
+    abortController.abort();
+  }, 10000);
+
+  try {
+    const stream = await client.chat.completions.create({
+      model: "gpt-4o",
+      messages,
+      stream: true,
+    }, {
+      signal: abortController.signal
+    });
+
+    for await (const chunk of stream) {
+      // Check if aborted before processing each chunk
+      if (abortController.signal.aborted) {
+        break;
+      }
+
+      const content = chunk.choices[0]?.delta?.content;
+      if (content) {
+        process.stdout.write(content);
+      }
+    }
+  } catch (error) {
+    if (abortController.signal.aborted) {
+      console.log("\nStream was aborted");
+    } else {
+      throw error;
+    }
+  } finally {
+    clearTimeout(timeoutId);
+  }
+}
+```
+
+## Error Handling
+
+### Error Types from OpenAI SDK
+
+```typescript
+import {
+  OpenAIError,
+  APIError,
+  APIConnectionError,
+  APIConnectionTimeoutError,
+  APIUserAbortError,
+  AuthenticationError,
+  BadRequestError,
+  RateLimitError,
+  InternalServerError,
+  NotFoundError,
+  PermissionDeniedError,
+  UnprocessableEntityError
+} from "openai";
+
+// Comprehensive error handler
+async function handleAPICall<T>(apiCall: () => Promise<T>): Promise<T> {
+  try {
+    return await apiCall();
+  } catch (error) {
+    if (error instanceof APIUserAbortError) {
+      console.log("Request was aborted by user");
+      throw new Error("Request interrupted");
+    }
+    
+    if (error instanceof AuthenticationError) {
+      console.error("Authentication failed:", error.message);
+      throw new Error("Invalid API key");
+    }
+    
+    if (error instanceof RateLimitError) {
+      console.error("Rate limit exceeded:", error.message);
+      // Could implement exponential backoff here
+      throw new Error("Rate limited - try again later");
+    }
+    
+    if (error instanceof APIConnectionError) {
+      console.error("Connection error:", error.message);
+      throw new Error("Network connection failed");
+    }
+    
+    if (error instanceof APIConnectionTimeoutError) {
+      console.error("Request timeout:", error.message);
+      throw new Error("Request timed out");
+    }
+    
+    if (error instanceof BadRequestError) {
+      console.error("Bad request:", error.message);
+      console.error("Error details:", error.error);
+      throw new Error(`Invalid request: ${error.message}`);
+    }
+    
+    if (error instanceof UnprocessableEntityError) {
+      console.error("Unprocessable entity:", error.message);
+      throw new Error(`Validation error: ${error.message}`);
+    }
+    
+    if (error instanceof APIError) {
+      console.error(`API Error ${error.status}:`, error.message);
+      console.error("Error code:", error.code);
+      console.error("Error type:", error.type);
+      throw new Error(`API error: ${error.message}`);
+    }
+    
+    if (error instanceof OpenAIError) {
+      console.error("OpenAI SDK error:", error.message);
+      throw new Error(`SDK error: ${error.message}`);
+    }
+    
+    // Unknown error
+    console.error("Unexpected error:", error);
+    throw error;
+  }
+}
+
+// Usage with retry logic
+async function apiCallWithRetry<T>(
+  apiCall: () => Promise<T>, 
+  maxRetries: number = 3
+): Promise<T> {
+  let lastError: Error;
+  
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      return await handleAPICall(apiCall);
+    } catch (error) {
+      lastError = error as Error;
+      
+      // Don't retry on certain errors
+      if (error instanceof AuthenticationError || 
+          error instanceof BadRequestError ||
+          error instanceof APIUserAbortError) {
+        throw error;
+      }
+      
+      // Exponential backoff for retryable errors
+      if (attempt < maxRetries - 1) {
+        const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s
+        await new Promise(resolve => setTimeout(resolve, delay));
+      }
+    }
+  }
+  
+  throw lastError!;
+}
+```
+
+### Error Context Extraction
+
+```typescript
+function extractErrorDetails(error: unknown): {
+  message: string;
+  code?: string;
+  type?: string;
+  status?: number;
+  retryable: boolean;
+} {
+  if (error instanceof APIError) {
+    return {
+      message: error.message,
+      code: error.code || undefined,
+      type: error.type,
+      status: error.status,
+      retryable: error instanceof RateLimitError || 
+                error instanceof APIConnectionError ||
+                error instanceof InternalServerError
+    };
+  }
+  
+  if (error instanceof APIUserAbortError) {
+    return {
+      message: "Request was aborted",
+      retryable: false
+    };
+  }
+  
+  if (error instanceof OpenAIError) {
+    return {
+      message: error.message,
+      retryable: false
+    };
+  }
+  
+  return {
+    message: error instanceof Error ? error.message : "Unknown error",
+    retryable: false
+  };
+}
+```
+
+## Stop Reasons
+
+### Chat Completions Stop Reasons
+
+```typescript
+type ChatCompletionFinishReason = 
+  | "stop"           // Natural stopping point or stop sequence
+  | "length"         // Maximum token limit reached
+  | "content_filter" // Content filtered
+  | "tool_calls"     // Model wants to call tools
+  | "function_call"; // Legacy function calling
+
+async function handleStopReasons() {
+  const response = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages: [{ role: "user", content: "Hello" }],
+    max_completion_tokens: 10, // Low limit to trigger "length" stop
+    stop: ["END"], // Custom stop sequence
+  });
+
+  const choice = response.choices[0];
+  const finishReason = choice.finish_reason;
+
+  switch (finishReason) {
+    case "stop":
+      console.log("Completed naturally or hit stop sequence");
+      break;
+      
+    case "length":
+      console.log("Hit token limit - response may be incomplete");
+      // Could request more tokens or continue conversation
+      break;
+      
+    case "content_filter":
+      console.log("Content was filtered");
+      break;
+      
+    case "tool_calls":
+      console.log("Model wants to call tools");
+      // Handle tool calls (see Tool Calling section)
+      break;
+      
+    default:
+      console.log("Unknown finish reason:", finishReason);
+  }
+
+  return { 
+    content: choice.message.content,
+    finishReason,
+    complete: finishReason === "stop"
+  };
+}
+```
+
+### Responses API Stop Reasons
+
+```typescript
+// Responses API uses different event types to indicate completion
+async function handleResponsesStopReasons() {
+  const response = await client.responses.create({
+    model: "o1-mini",
+    input: [{ role: "user", content: [{ type: "input_text", text: "Hello" }] }],
+    max_output_tokens: 100,
+  });
+
+  for (const item of response.output || []) {
+    switch (item.type) {
+      case "message":
+        // Check for refusal in content
+        for (const content of item.content || []) {
+          if (content.type === "refusal") {
+            console.log("Response was refused:", content.refusal);
+          } else if (content.type === "output_text") {
+            console.log("Response completed normally");
+          }
+        }
+        break;
+        
+      case "function_call":
+        console.log("Tool call requested");
+        break;
+    }
+  }
+}
+```
+
+### Streaming Stop Reason Detection
+
+```typescript
+async function streamWithStopReasonHandling() {
+  const stream = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages: [{ role: "user", content: "Count to 10" }],
+    stream: true,
+    max_completion_tokens: 50,
+  });
+
+  let content = "";
+  let finishReason: string | null = null;
+
+  for await (const chunk of stream) {
+    const choice = chunk.choices[0];
+    if (!choice) continue;
+
+    if (choice.delta?.content) {
+      content += choice.delta.content;
+      process.stdout.write(choice.delta.content);
+    }
+
+    if (choice.finish_reason) {
+      finishReason = choice.finish_reason;
+      break;
+    }
+  }
+
+  console.log(`\nStreaming finished. Reason: ${finishReason}`);
+  
+  if (finishReason === "length") {
+    console.log("Response was cut off due to token limit");
+    // Could continue the conversation to get the rest
+  }
+  
+  return { content, finishReason };
+}
+```
+
+## Message History & Serialization
+
+### Message Types and Formats
+
+```typescript
+// Chat Completions message format
+interface ChatMessage {
+  role: "system" | "user" | "assistant" | "tool" | "developer";
+  content: string | null;
+  name?: string;
+  tool_calls?: Array<{
+    id: string;
+    type: "function";
+    function: {
+      name: string;
+      arguments: string;
+    };
+  }>;
+  tool_call_id?: string; // For tool response messages
+}
+
+// Responses API message format
+interface ResponseMessage {
+  role: "user" | "developer";
+  content: Array<{
+    type: "input_text" | "input_image" | "input_audio";
+    text?: string;
+    image?: { url: string };
+    audio?: { data: string };
+  }>;
+}
+
+// Unified conversation history
+interface ConversationHistory {
+  api: "completions" | "responses";
+  model: string;
+  systemPrompt?: string;
+  messages: any[]; // API-specific format
+  totalTokens: number;
+  metadata: {
+    created: number;
+    lastUpdated: number;
+    provider: string;
+  };
+}
+```
+
+### Serialization Implementation
+
+```typescript
+class ConversationManager {
+  private messages: any[] = [];
+  private api: "completions" | "responses";
+  private systemPrompt?: string;
+  private totalTokens = 0;
+
+  constructor(api: "completions" | "responses", systemPrompt?: string) {
+    this.api = api;
+    this.systemPrompt = systemPrompt;
+    
+    if (systemPrompt) {
+      if (api === "completions") {
+        this.messages.push({ role: "system", content: systemPrompt });
+      } else {
+        this.messages.push({ role: "developer", content: systemPrompt });
+      }
+    }
+  }
+
+  addUserMessage(content: string) {
+    if (this.api === "completions") {
+      this.messages.push({ role: "user", content });
+    } else {
+      this.messages.push({
+        role: "user",
+        content: [{ type: "input_text", text: content }]
+      });
+    }
+  }
+
+  addAssistantMessage(content: string) {
+    if (this.api === "completions") {
+      this.messages.push({ role: "assistant", content });
+    } else {
+      this.messages.push({
+        type: "message",
+        content: [{ type: "output_text", text: content }]
+      });
+    }
+  }
+
+  addToolCall(id: string, name: string, args: string) {
+    if (this.api === "completions") {
+      // Add assistant message with tool calls
+      this.messages.push({
+        role: "assistant",
+        content: null,
+        tool_calls: [{
+          id,
+          type: "function" as const,
+          function: { name, arguments: args }
+        }]
+      });
+    } else {
+      // Add function call to responses format
+      this.messages.push({
+        type: "function_call",
+        call_id: id,
+        name,
+        arguments: args
+      });
+    }
+  }
+
+  addToolResult(id: string, result: string) {
+    if (this.api === "completions") {
+      this.messages.push({
+        role: "tool",
+        tool_call_id: id,
+        content: result
+      });
+    } else {
+      this.messages.push({
+        type: "function_call_output",
+        call_id: id,
+        output: result
+      });
+    }
+  }
+
+  // Serialize to JSON
+  serialize(): string {
+    const data: ConversationHistory = {
+      api: this.api,
+      model: "unknown", // Set externally
+      systemPrompt: this.systemPrompt,
+      messages: this.messages,
+      totalTokens: this.totalTokens,
+      metadata: {
+        created: Date.now(),
+        lastUpdated: Date.now(),
+        provider: "openai"
+      }
+    };
+    return JSON.stringify(data, null, 2);
+  }
+
+  // Deserialize from JSON
+  static deserialize(json: string): ConversationManager {
+    const data: ConversationHistory = JSON.parse(json);
+    const manager = new ConversationManager(data.api, data.systemPrompt);
+    manager.messages = data.messages;
+    manager.totalTokens = data.totalTokens;
+    return manager;
+  }
+
+  getMessages() {
+    return this.messages;
+  }
+
+  updateTokenUsage(tokens: number) {
+    this.totalTokens += tokens;
+  }
+}
+
+// Usage example
+const conversation = new ConversationManager("completions", "You are a helpful assistant");
+conversation.addUserMessage("Hello");
+conversation.addAssistantMessage("Hi there!");
+conversation.updateTokenUsage(25);
+
+// Save to file
+const serialized = conversation.serialize();
+await fs.writeFile("conversation.json", serialized);
+
+// Load from file
+const loaded = await fs.readFile("conversation.json", "utf-8");
+const restored = ConversationManager.deserialize(loaded);
+```
+
+### Event-Based History Reconstruction
+
+```typescript
+// From pi-agent codebase - reconstruct messages from events
+type AgentEvent = 
+  | { type: "user_message"; text: string }
+  | { type: "assistant_message"; text: string }
+  | { type: "tool_call"; toolCallId: string; name: string; args: string }
+  | { type: "tool_result"; toolCallId: string; result: string; isError: boolean }
+  | { type: "reasoning"; text: string }
+  | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number };
+
+function reconstructMessagesFromEvents(
+  events: AgentEvent[], 
+  api: "completions" | "responses", 
+  systemPrompt?: string
+): any[] {
+  const messages: any[] = [];
+  
+  // Add system prompt
+  if (systemPrompt) {
+    if (api === "completions") {
+      messages.push({ role: "system", content: systemPrompt });
+    } else {
+      messages.push({ role: "developer", content: systemPrompt });
+    }
+  }
+
+  if (api === "responses") {
+    // Responses API format reconstruction
+    for (const event of events) {
+      switch (event.type) {
+        case "user_message":
+          messages.push({
+            role: "user",
+            content: [{ type: "input_text", text: event.text }]
+          });
+          break;
+          
+        case "reasoning":
+          messages.push({
+            type: "reasoning",
+            content: [{ type: "reasoning_text", text: event.text }]
+          });
+          break;
+          
+        case "tool_call":
+          messages.push({
+            type: "function_call",
+            call_id: event.toolCallId,
+            name: event.name,
+            arguments: event.args
+          });
+          break;
+          
+        case "tool_result":
+          messages.push({
+            type: "function_call_output",
+            call_id: event.toolCallId,
+            output: event.result
+          });
+          break;
+          
+        case "assistant_message":
+          messages.push({
+            type: "message",
+            content: [{ type: "output_text", text: event.text }]
+          });
+          break;
+      }
+    }
+  } else {
+    // Chat Completions format reconstruction
+    let pendingToolCalls: any[] = [];
+    
+    for (const event of events) {
+      switch (event.type) {
+        case "user_message":
+          messages.push({ role: "user", content: event.text });
+          break;
+          
+        case "tool_call":
+          pendingToolCalls.push({
+            id: event.toolCallId,
+            type: "function",
+            function: {
+              name: event.name,
+              arguments: event.args
+            }
+          });
+          break;
+          
+        case "tool_result":
+          // Add assistant message with tool calls when we see first result
+          if (pendingToolCalls.length > 0) {
+            messages.push({
+              role: "assistant",
+              content: null,
+              tool_calls: pendingToolCalls
+            });
+            pendingToolCalls = [];
+          }
+          
+          messages.push({
+            role: "tool",
+            tool_call_id: event.toolCallId,
+            content: event.result
+          });
+          break;
+          
+        case "assistant_message":
+          messages.push({ role: "assistant", content: event.text });
+          break;
+      }
+    }
+  }
+  
+  return messages;
+}
+```
+
+## Token Counting
+
+### Usage Types from OpenAI SDK
+
+```typescript
+// Chat Completions usage
+interface CompletionUsage {
+  completion_tokens: number;
+  prompt_tokens: number;
+  total_tokens: number;
+  completion_tokens_details?: {
+    reasoning_tokens?: number; // o1/o3 reasoning tokens
+    cached_tokens?: number;
+  };
+  prompt_tokens_details?: {
+    cached_tokens?: number;
+  };
+}
+
+// Responses API usage
+interface ResponseUsage {
+  input_tokens: number;
+  output_tokens: number;
+  total_tokens: number;
+  input_tokens_details: {
+    cached_tokens?: number;
+  };
+  output_tokens_details: {
+    reasoning_tokens?: number; // o1/o3 reasoning tokens
+  };
+}
+```
+
+### Token Counting Implementation
+
+```typescript
+interface TokenUsage {
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+  reasoningTokens: number;
+  cacheReadTokens: number;
+  cacheWriteTokens: number;
+}
+
+class TokenCounter {
+  private totalUsage: TokenUsage = {
+    inputTokens: 0,
+    outputTokens: 0,
+    totalTokens: 0,
+    reasoningTokens: 0,
+    cacheReadTokens: 0,
+    cacheWriteTokens: 0
+  };
+
+  // Extract tokens from Chat Completions response
+  extractChatCompletionUsage(usage?: CompletionUsage): TokenUsage | null {
+    if (!usage) return null;
+
+    const extracted: TokenUsage = {
+      inputTokens: usage.prompt_tokens || 0,
+      outputTokens: usage.completion_tokens || 0,
+      totalTokens: usage.total_tokens || 0,
+      reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0,
+      cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0,
+      cacheWriteTokens: 0 // Not available in this format
+    };
+
+    this.addUsage(extracted);
+    return extracted;
+  }
+
+  // Extract tokens from Responses API response
+  extractResponseUsage(usage?: ResponseUsage): TokenUsage | null {
+    if (!usage) return null;
+
+    const extracted: TokenUsage = {
+      inputTokens: usage.input_tokens || 0,
+      outputTokens: usage.output_tokens || 0,
+      totalTokens: usage.total_tokens || 0,
+      reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0,
+      cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0,
+      cacheWriteTokens: 0 // Not available in current API
+    };
+
+    this.addUsage(extracted);
+    return extracted;
+  }
+
+  private addUsage(usage: TokenUsage) {
+    this.totalUsage.inputTokens += usage.inputTokens;
+    this.totalUsage.outputTokens += usage.outputTokens;
+    this.totalUsage.totalTokens += usage.totalTokens;
+    this.totalUsage.reasoningTokens += usage.reasoningTokens;
+    this.totalUsage.cacheReadTokens += usage.cacheReadTokens;
+    this.totalUsage.cacheWriteTokens += usage.cacheWriteTokens;
+  }
+
+  getTotalUsage(): TokenUsage {
+    return { ...this.totalUsage };
+  }
+
+  reset() {
+    this.totalUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      totalTokens: 0,
+      reasoningTokens: 0,
+      cacheReadTokens: 0,
+      cacheWriteTokens: 0
+    };
+  }
+
+  // Format for display
+  formatUsage(usage?: TokenUsage): string {
+    const u = usage || this.totalUsage;
+    let parts = [`↑${u.inputTokens}`, `↓${u.outputTokens}`];
+    
+    if (u.reasoningTokens > 0) {
+      parts.push(`⚡${u.reasoningTokens}`);
+    }
+    
+    if (u.cacheReadTokens > 0) {
+      parts.push(`📖${u.cacheReadTokens}`);
+    }
+    
+    if (u.cacheWriteTokens > 0) {
+      parts.push(`📝${u.cacheWriteTokens}`);
+    }
+    
+    return parts.join(" ");
+  }
+}
+
+// Usage with streaming
+async function countTokensInStream() {
+  const tokenCounter = new TokenCounter();
+  
+  const stream = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages: [{ role: "user", content: "Tell me about AI" }],
+    stream: true,
+    stream_options: { include_usage: true } // Important for token counts
+  });
+
+  for await (const chunk of stream) {
+    // Token usage comes in final chunk when stream_options.include_usage = true
+    if (chunk.usage) {
+      const usage = tokenCounter.extractChatCompletionUsage(chunk.usage);
+      console.log("Token usage:", tokenCounter.formatUsage(usage));
+    }
+  }
+  
+  console.log("Total usage:", tokenCounter.formatUsage());
+}
+```
+
+### Token Estimation (for planning)
+
+```typescript
+// Rough token estimation for planning purposes
+function estimateTokens(text: string): number {
+  // Very rough approximation: ~4 characters per token for English
+  return Math.ceil(text.length / 4);
+}
+
+function estimateMessageTokens(messages: any[]): number {
+  let total = 0;
+  
+  for (const message of messages) {
+    if (typeof message.content === "string") {
+      total += estimateTokens(message.content);
+    } else if (Array.isArray(message.content)) {
+      for (const content of message.content) {
+        if (content.text) {
+          total += estimateTokens(content.text);
+        }
+      }
+    }
+    
+    // Add overhead for message formatting
+    total += 10;
+  }
+  
+  return total;
+}
+
+// Check if request will fit in context window
+function checkContextLimit(messages: any[], maxTokens: number = 128000): boolean {
+  const estimated = estimateMessageTokens(messages);
+  const safetyMargin = 1000; // Reserve tokens for response
+  
+  return estimated + safetyMargin < maxTokens;
+}
+```
+
+## Caching
+
+### Cache Headers and Configuration
+
+```typescript
+// OpenAI supports prompt caching via special message formatting
+// Cache is automatically used when messages are repeated
+
+async function demonstrateCaching() {
+  const longSystemPrompt = `
+    You are an expert software engineer with deep knowledge of TypeScript, React, Node.js...
+    [Very long system prompt - 1000+ tokens]
+  `;
+
+  // First request - will cache the system prompt
+  const response1 = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages: [
+      { role: "system", content: longSystemPrompt },
+      { role: "user", content: "Explain TypeScript generics" }
+    ]
+  });
+
+  console.log("First request usage:", response1.usage);
+
+  // Second request with same system prompt - will use cache
+  const response2 = await client.chat.completions.create({
+    model: "gpt-4o", 
+    messages: [
+      { role: "system", content: longSystemPrompt }, // Cached
+      { role: "user", content: "Explain React hooks" }
+    ]
+  });
+
+  console.log("Second request usage:", response2.usage);
+  console.log("Cache read tokens:", response2.usage?.prompt_tokens_details?.cached_tokens);
+}
+```
+
+### Manual Cache Control
+
+```typescript
+// For providers that support explicit cache control
+interface CacheConfig {
+  enabled: boolean;
+  ttl?: number; // Time to live in seconds
+}
+
+class CachedClient {
+  private client: OpenAI;
+  private cache = new Map<string, { response: any; timestamp: number; ttl: number }>();
+
+  constructor(apiKey: string, baseURL?: string) {
+    this.client = new OpenAI({ apiKey, baseURL });
+  }
+
+  private getCacheKey(messages: any[], model: string): string {
+    return JSON.stringify({ messages, model });
+  }
+
+  private isCacheValid(entry: { timestamp: number; ttl: number }): boolean {
+    return Date.now() - entry.timestamp < entry.ttl * 1000;
+  }
+
+  async completionWithCache(
+    messages: any[], 
+    model: string,
+    cacheConfig: CacheConfig = { enabled: true, ttl: 3600 }
+  ) {
+    if (cacheConfig.enabled) {
+      const cacheKey = this.getCacheKey(messages, model);
+      const cached = this.cache.get(cacheKey);
+      
+      if (cached && this.isCacheValid(cached)) {
+        console.log("Cache hit");
+        return cached.response;
+      }
+    }
+
+    const response = await this.client.chat.completions.create({
+      model,
+      messages
+    });
+
+    if (cacheConfig.enabled) {
+      const cacheKey = this.getCacheKey(messages, model);
+      this.cache.set(cacheKey, {
+        response,
+        timestamp: Date.now(),
+        ttl: cacheConfig.ttl || 3600
+      });
+    }
+
+    return response;
+  }
+
+  clearCache() {
+    this.cache.clear();
+  }
+}
+```
+
+## Chat Completions vs Responses API
+
+### When to Use Each API
+
+```typescript
+// Chat Completions API - Traditional conversational interface
+// Use for: Most general chat/completion tasks
+interface ChatCompletionsUseCase {
+  // ✅ Good for:
+  // - Regular conversations
+  // - Function/tool calling
+  // - Most models (gpt-4o, claude, gemini via compatibility)
+  // - Streaming text generation
+  // - File uploads and vision
+  
+  // ❌ Limitations:
+  // - No access to reasoning/thinking tokens for o1/o3
+  // - Less structured for complex workflows
+}
+
+// Responses API - Structured response interface  
+// Use for: Complex reasoning tasks, tool workflows
+interface ResponsesAPIUseCase {
+  // ✅ Good for:
+  // - o1/o3 models with reasoning access
+  // - Complex tool calling workflows
+  // - Structured output requirements
+  // - Background processing
+  // - Access to reasoning tokens
+  
+  // ❌ Limitations:
+  // - Newer API with less ecosystem support
+  // - More complex message format
+  // - Not all models supported
+}
+```
+
+### API Decision Logic
+
+```typescript
+function selectAPI(
+  model: string, 
+  requiresReasoning: boolean,
+  hasComplexTools: boolean
+): "completions" | "responses" {
+  // Use Responses API for o1/o3 when reasoning is needed
+  if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) {
+    return "responses";
+  }
+  
+  // Use Responses API for complex tool workflows
+  if (hasComplexTools && model.includes("gpt-4")) {
+    return "responses";
+  }
+  
+  // Default to Chat Completions for broader compatibility
+  return "completions";
+}
+
+// Usage example
+const model = "o1-mini";
+const needsReasoning = true;
+const api = selectAPI(model, needsReasoning, false);
+
+if (api === "responses") {
+  console.log("Using Responses API for reasoning access");
+} else {
+  console.log("Using Chat Completions API for compatibility");
+}
+```
+
+### Dual API Client
+
+```typescript
+class DualAPIClient {
+  private client: OpenAI;
+
+  constructor(apiKey: string, baseURL?: string) {
+    this.client = new OpenAI({ apiKey, baseURL });
+  }
+
+  async complete(params: {
+    model: string;
+    messages: any[];
+    tools?: any[];
+    maxTokens?: number;
+    temperature?: number;
+    stream?: boolean;
+    reasoning?: boolean;
+  }) {
+    const api = this.selectAPI(params.model, params.reasoning || false);
+    
+    if (api === "responses") {
+      return this.callResponsesAPI(params);
+    } else {
+      return this.callChatCompletionsAPI(params);
+    }
+  }
+
+  private selectAPI(model: string, requiresReasoning: boolean): "completions" | "responses" {
+    if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) {
+      return "responses";
+    }
+    return "completions";
+  }
+
+  private async callChatCompletionsAPI(params: any) {
+    const requestParams = {
+      model: params.model,
+      messages: params.messages,
+      max_completion_tokens: params.maxTokens,
+      temperature: params.temperature,
+      tools: params.tools,
+      stream: params.stream
+    };
+
+    if (params.stream) {
+      return this.client.chat.completions.create(requestParams);
+    } else {
+      return this.client.chat.completions.create(requestParams);
+    }
+  }
+
+  private async callResponsesAPI(params: any) {
+    // Convert messages to Responses API format
+    const input = params.messages.map((msg: any) => {
+      if (msg.role === "user") {
+        return {
+          role: "user",
+          content: [{ type: "input_text", text: msg.content }]
+        };
+      } else if (msg.role === "system") {
+        return {
+          role: "developer", 
+          content: msg.content
+        };
+      }
+      return msg;
+    });
+
+    const requestParams = {
+      model: params.model,
+      input,
+      max_output_tokens: params.maxTokens,
+      tools: params.tools,
+      stream: params.stream,
+      reasoning: params.reasoning ? { effort: "low" } : undefined
+    };
+
+    return this.client.responses.create(requestParams);
+  }
+}
+```
+
+## Tool/Function Calling
+
+### Tool Definition Format
+
+```typescript
+// OpenAI tool definition format (JSON Schema)
+interface ToolDefinition {
+  type: "function";
+  function: {
+    name: string;
+    description: string;
+    parameters: {
+      type: "object";
+      properties: Record<string, any>;
+      required: string[];
+    };
+  };
+}
+
+// Example tool definitions
+const tools: ToolDefinition[] = [
+  {
+    type: "function",
+    function: {
+      name: "read_file",
+      description: "Read the contents of a file",
+      parameters: {
+        type: "object",
+        properties: {
+          path: {
+            type: "string",
+            description: "The file path to read"
+          }
+        },
+        required: ["path"]
+      }
+    }
+  },
+  {
+    type: "function", 
+    function: {
+      name: "execute_command",
+      description: "Execute a shell command",
+      parameters: {
+        type: "object",
+        properties: {
+          command: {
+            type: "string",
+            description: "The command to execute"
+          },
+          timeout: {
+            type: "number",
+            description: "Timeout in seconds",
+            default: 30
+          }
+        },
+        required: ["command"]
+      }
+    }
+  }
+];
+```
+
+### Tool Execution Engine
+
+```typescript
+type ToolFunction = (args: any) => Promise<string>;
+
+class ToolExecutor {
+  private tools = new Map<string, ToolFunction>();
+
+  register(name: string, fn: ToolFunction) {
+    this.tools.set(name, fn);
+  }
+
+  async execute(name: string, argsJson: string): Promise<string> {
+    const tool = this.tools.get(name);
+    if (!tool) {
+      throw new Error(`Unknown tool: ${name}`);
+    }
+
+    try {
+      const args = JSON.parse(argsJson);
+      return await tool(args);
+    } catch (error) {
+      throw new Error(`Tool execution failed: ${error.message}`);
+    }
+  }
+
+  getAvailableTools(): string[] {
+    return Array.from(this.tools.keys());
+  }
+}
+
+// Register tool implementations
+const toolExecutor = new ToolExecutor();
+
+toolExecutor.register("read_file", async (args: { path: string }) => {
+  const fs = await import("fs/promises");
+  try {
+    const content = await fs.readFile(args.path, "utf-8");
+    return content;
+  } catch (error) {
+    return `Error reading file: ${error.message}`;
+  }
+});
+
+toolExecutor.register("execute_command", async (args: { command: string; timeout?: number }) => {
+  const { exec } = await import("child_process");
+  const { promisify } = await import("util");
+  const execAsync = promisify(exec);
+
+  try {
+    const { stdout, stderr } = await execAsync(args.command, {
+      timeout: (args.timeout || 30) * 1000
+    });
+    return stdout + (stderr ? `\nSTDERR: ${stderr}` : "");
+  } catch (error) {
+    return `Command failed: ${error.message}`;
+  }
+});
+```
+
+### Complete Tool Calling Flow
+
+```typescript
+async function completeChatWithTools(userMessage: string) {
+  const conversation = new ConversationManager("completions", "You are a helpful assistant with file system access.");
+  const tokenCounter = new TokenCounter();
+  
+  conversation.addUserMessage(userMessage);
+  
+  while (true) {
+    const response = await client.chat.completions.create({
+      model: "gpt-4o",
+      messages: conversation.getMessages(),
+      tools,
+      tool_choice: "auto",
+      max_completion_tokens: 1000
+    });
+
+    // Track token usage
+    if (response.usage) {
+      tokenCounter.extractChatCompletionUsage(response.usage);
+    }
+
+    const message = response.choices[0].message;
+    
+    if (message.tool_calls && message.tool_calls.length > 0) {
+      // Add assistant message with tool calls to conversation
+      conversation.getMessages().push({
+        role: "assistant",
+        content: message.content,
+        tool_calls: message.tool_calls
+      });
+
+      // Execute each tool call
+      for (const toolCall of message.tool_calls) {
+        console.log(`🔧 Calling ${toolCall.function.name}...`);
+        
+        try {
+          const result = await toolExecutor.execute(
+            toolCall.function.name, 
+            toolCall.function.arguments
+          );
+          
+          console.log(`✅ Tool result: ${result.substring(0, 100)}...`);
+          conversation.addToolResult(toolCall.id, result);
+          
+        } catch (error) {
+          console.log(`❌ Tool error: ${error.message}`);
+          conversation.addToolResult(toolCall.id, `Error: ${error.message}`);
+        }
+      }
+      
+      // Continue conversation with tool results
+      continue;
+    } else {
+      // Final response
+      const content = message.content || "";
+      conversation.addAssistantMessage(content);
+      
+      console.log("🤖 Assistant:", content);
+      console.log("📊 Token usage:", tokenCounter.formatUsage());
+      
+      return content;
+    }
+  }
+}
+
+// Usage
+await completeChatWithTools("Read the package.json file and tell me about this project");
+```
+
+### Streaming Tool Calls
+
+```typescript
+async function streamingToolCalls(userMessage: string) {
+  const stream = await client.chat.completions.create({
+    model: "gpt-4o",
+    messages: [{ role: "user", content: userMessage }],
+    tools,
+    tool_choice: "auto",
+    stream: true
+  });
+
+  let currentToolCalls: Map<string, { name: string; args: string }> = new Map();
+  let assistantMessage = "";
+
+  for await (const chunk of stream) {
+    const choice = chunk.choices[0];
+    if (!choice) continue;
+
+    const delta = choice.delta;
+
+    // Regular content
+    if (delta.content) {
+      assistantMessage += delta.content;
+      process.stdout.write(delta.content);
+    }
+
+    // Tool call deltas
+    if (delta.tool_calls) {
+      for (const toolCallDelta of delta.tool_calls) {
+        const id = toolCallDelta.id;
+        if (!id) continue;
+
+        if (!currentToolCalls.has(id)) {
+          currentToolCalls.set(id, { name: "", args: "" });
+        }
+
+        const toolCall = currentToolCalls.get(id)!;
+        
+        if (toolCallDelta.function?.name) {
+          toolCall.name += toolCallDelta.function.name;
+        }
+        
+        if (toolCallDelta.function?.arguments) {
+          toolCall.args += toolCallDelta.function.arguments;
+        }
+      }
+    }
+
+    // When finished, execute accumulated tool calls
+    if (choice.finish_reason === "tool_calls") {
+      console.log("\n🔧 Executing tools...");
+      
+      for (const [id, toolCall] of currentToolCalls) {
+        try {
+          const result = await toolExecutor.execute(toolCall.name, toolCall.args);
+          console.log(`✅ ${toolCall.name}: ${result.substring(0, 100)}...`);
+        } catch (error) {
+          console.log(`❌ ${toolCall.name}: ${error.message}`);
+        }
+      }
+      
+      break;
+    }
+  }
+}
+```
+
+### Responses API Tool Calling
+
+```typescript
+async function responsesAPIToolCalling() {
+  const response = await client.responses.create({
+    model: "gpt-4o",
+    input: [
+      {
+        role: "user",
+        content: [{ type: "input_text", text: "List files in current directory" }]
+      }
+    ],
+    tools: [
+      {
+        type: "function",
+        function: {
+          name: "list_directory",
+          description: "List files in a directory",
+          parameters: {
+            type: "object",
+            properties: {
+              path: { type: "string", description: "Directory path" }
+            },
+            required: ["path"]
+          }
+        }
+      }
+    ]
+  });
+
+  for (const item of response.output || []) {
+    switch (item.type) {
+      case "function_call":
+        console.log(`🔧 Tool call: ${item.name}`);
+        console.log(`📝 Arguments: ${item.arguments}`);
+        
+        try {
+          const result = await toolExecutor.execute(item.name, item.arguments);
+          console.log(`✅ Result: ${result}`);
+          
+          // In a real implementation, you'd add this result back to the conversation
+          // and continue the response
+        } catch (error) {
+          console.log(`❌ Error: ${error.message}`);
+        }
+        break;
+        
+      case "message":
+        for (const content of item.content || []) {
+          if (content.type === "output_text") {
+            console.log("🤖 Response:", content.text);
+          }
+        }
+        break;
+    }
+  }
+}
+```
+
+## System Prompts
+
+### System Prompt Handling by Model Type
+
+```typescript
+interface SystemPromptConfig {
+  content: string;
+  role: "system" | "developer";  // Different models use different roles
+}
+
+function formatSystemPrompt(prompt: string, model: string, api: "completions" | "responses"): any {
+  // Chat Completions API
+  if (api === "completions") {
+    // Most models use "system" role
+    if (model.includes("claude") || model.includes("gemini")) {
+      // Some providers via OpenAI compatibility might expect "system"
+      return { role: "system", content: prompt };
+    }
+    
+    // OpenAI native models
+    return { role: "system", content: prompt };
+  }
+  
+  // Responses API uses "developer" role for system messages
+  return { role: "developer", content: prompt };
+}
+
+// System prompt best practices
+const systemPrompts = {
+  // General assistant
+  assistant: "You are a helpful, accurate, and reliable AI assistant. Provide clear, concise, and helpful responses.",
+  
+  // Code assistant
+  coder: `You are an expert software engineer with deep knowledge of multiple programming languages, frameworks, and best practices. 
+
+Key principles:
+- Write clean, maintainable, and well-documented code
+- Follow language-specific conventions and best practices  
+- Explain your reasoning and trade-offs
+- Suggest improvements and alternatives when appropriate
+- Always test your code mentally before providing it
+
+When helping with code:
+1. Understand the requirements fully
+2. Choose appropriate tools and patterns
+3. Provide working, tested solutions
+4. Explain key concepts and decisions`,
+
+  // Research assistant
+  researcher: `You are a thorough research assistant. When answering questions:
+
+1. Provide accurate, well-sourced information
+2. Acknowledge limitations in your knowledge
+3. Structure responses clearly with headings and bullet points
+4. Cite sources when possible
+5. Distinguish between facts, analysis, and opinions
+6. Ask clarifying questions when the request is ambiguous`,
+
+  // Tool-enabled assistant
+  toolEnabled: `You are an AI assistant with access to various tools for file operations, web searches, and code execution.
+
+Guidelines for tool use:
+- Use tools when they would be helpful to answer the user's question
+- Always explain what you're doing before calling a tool
+- Interpret and summarize tool results for the user
+- If a tool fails, try alternative approaches
+- Be transparent about what information comes from tools vs your training
+
+Available capabilities:
+- Read and write files
+- Execute shell commands
+- Search the web
+- Analyze code and data`
+};
+```
+
+### Dynamic System Prompt Building
+
+```typescript
+class SystemPromptBuilder {
+  private sections: string[] = [];
+
+  addRole(role: string): this {
+    this.sections.push(`You are ${role}.`);
+    return this;
+  }
+
+  addCapabilities(capabilities: string[]): this {
+    if (capabilities.length > 0) {
+      this.sections.push(`You have access to: ${capabilities.join(", ")}.`);
+    }
+    return this;
+  }
+
+  addGuidelines(guidelines: string[]): this {
+    if (guidelines.length > 0) {
+      this.sections.push("Guidelines:\n" + guidelines.map(g => `- ${g}`).join("\n"));
+    }
+    return this;
+  }
+
+  addContext(context: string): this {
+    if (context.trim()) {
+      this.sections.push(`Context: ${context}`);
+    }
+    return this;
+  }
+
+  build(): string {
+    return this.sections.join("\n\n");
+  }
+
+  reset(): this {
+    this.sections = [];
+    return this;
+  }
+}
+
+// Usage examples
+const codeAssistantPrompt = new SystemPromptBuilder()
+  .addRole("an expert TypeScript developer")
+  .addCapabilities(["file system access", "code execution", "documentation lookup"])
+  .addGuidelines([
+    "Write clean, type-safe code",
+    "Explain complex concepts clearly", 
+    "Suggest best practices",
+    "Test code before providing it"
+  ])
+  .build();
+
+const customerServicePrompt = new SystemPromptBuilder()
+  .addRole("a helpful customer service representative")
+  .addGuidelines([
+    "Be polite and professional",
+    "Listen carefully to customer concerns",
+    "Provide accurate information",
+    "Escalate complex issues when needed"
+  ])
+  .addContext("You work for TechCorp, a software company that makes productivity tools.")
+  .build();
+```
+
+### Model-Specific System Prompt Optimization
+
+```typescript
+function optimizeSystemPromptForModel(basePrompt: string, model: string): string {
+  // OpenAI models (especially o1/o3) work well with detailed, structured prompts
+  if (model.includes("gpt") || model.includes("o1") || model.includes("o3")) {
+    return `${basePrompt}
+
+Think step by step when solving complex problems. Show your reasoning process clearly.`;
+  }
+  
+  // Claude models prefer more conversational, principle-based prompts
+  if (model.includes("claude")) {
+    return `${basePrompt}
+
+I value helpful, harmless, and honest responses. Please be thoughtful and thorough in your analysis.`;
+  }
+  
+  // Gemini models work well with structured instructions
+  if (model.includes("gemini")) {
+    return `${basePrompt}
+
+Please structure your responses clearly and provide specific, actionable advice.`;
+  }
+  
+  // Default: return as-is
+  return basePrompt;
+}
+
+// Provider-specific prompt injection handling
+function detectAndMitigatePromptInjection(userInput: string): { safe: boolean; cleaned?: string } {
+  const injectionPatterns = [
+    /ignore.*previous.*instruction/i,
+    /forget.*system.*prompt/i,
+    /act.*as.*different/i,
+    /pretend.*you.*are/i,
+    /new.*role.*now/i
+  ];
+
+  for (const pattern of injectionPatterns) {
+    if (pattern.test(userInput)) {
+      return { 
+        safe: false, 
+        cleaned: userInput.replace(pattern, "[FILTERED]")
+      };
+    }
+  }
+
+  return { safe: true };
+}
+```
+
+## Provider-Specific Features
+
+### Reasoning Support Detection
+
+```typescript
+// From pi-agent codebase - detect and handle reasoning support per provider
+type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other";
+
+function detectProvider(baseURL?: string): Provider {
+  if (!baseURL) return "openai";
+  if (baseURL.includes("api.openai.com")) return "openai";
+  if (baseURL.includes("generativelanguage.googleapis.com")) return "gemini";
+  if (baseURL.includes("api.groq.com")) return "groq";
+  if (baseURL.includes("api.anthropic.com")) return "anthropic";
+  if (baseURL.includes("openrouter.ai")) return "openrouter";
+  return "other";
+}
+
+// Provider-specific reasoning parameter handling
+function adjustRequestForReasoning(
+  requestOptions: any,
+  api: "completions" | "responses",
+  provider: Provider,
+  supportsReasoning: boolean
+): any {
+  if (!supportsReasoning) return requestOptions;
+
+  switch (provider) {
+    case "openai":
+      // OpenAI standard format
+      if (api === "responses") {
+        requestOptions.reasoning = {
+          effort: "low",
+          summary: "detailed"
+        };
+      } else {
+        requestOptions.reasoning_effort = "low";
+      }
+      break;
+
+    case "gemini":
+      // Gemini uses extra_body for thinking configuration
+      if (api === "completions") {
+        requestOptions.extra_body = {
+          google: {
+            thinking_config: {
+              thinking_budget: 1024,
+              include_thoughts: true
+            }
+          }
+        };
+        // Remove reasoning_effort when using thinking_config
+        delete requestOptions.reasoning_effort;
+      }
+      break;
+
+    case "groq":
+      // Groq uses reasoning_format for Chat Completions
+      if (api === "completions") {
+        requestOptions.reasoning_format = "parsed";
+        requestOptions.reasoning_effort = "low";
+      } else {
+        // Groq Responses API doesn't support reasoning.summary
+        requestOptions.reasoning = { effort: "low" };
+      }
+      break;
+
+    case "openrouter":
+      // OpenRouter unified reasoning format
+      if (api === "completions") {
+        requestOptions.reasoning = { effort: "low" };
+        delete requestOptions.reasoning_effort;
+      }
+      break;
+
+    default:
+      // Standard OpenAI format for others
+      if (api === "responses") {
+        requestOptions.reasoning = { effort: "low" };
+      } else {
+        requestOptions.reasoning_effort = "low";
+      }
+  }
+
+  return requestOptions;
+}
+```
+
+### Provider-Specific Response Parsing
+
+```typescript
+// Extract reasoning content from provider-specific response formats
+function parseReasoningFromMessage(message: any, provider: Provider): {
+  cleanContent: string;
+  reasoningTexts: string[];
+} {
+  const reasoningTexts: string[] = [];
+  let cleanContent = message.content || "";
+
+  switch (provider) {
+    case "gemini":
+      // Gemini returns thinking in <thought> tags
+      if (cleanContent.includes("<thought>")) {
+        const thoughtMatches = cleanContent.matchAll(/<thought>([\s\S]*?)<\/thought>/g);
+        for (const match of thoughtMatches) {
+          reasoningTexts.push(match[1].trim());
+        }
+        // Remove thought tags from response
+        cleanContent = cleanContent.replace(/<thought>[\s\S]*?<\/thought>/g, "").trim();
+      }
+      break;
+
+    case "groq":
+      // Groq returns reasoning in separate field
+      if (message.reasoning) {
+        reasoningTexts.push(message.reasoning);
+      }
+      break;
+
+    case "openrouter":
+      // OpenRouter uses message.reasoning field
+      if (message.reasoning) {
+        reasoningTexts.push(message.reasoning);
+      }
+      break;
+
+    default:
+      // OpenAI and others handle reasoning via events
+      break;
+  }
+
+  return { cleanContent, reasoningTexts };
+}
+```
+
+### Provider-Specific Error Handling
+
+```typescript
+function handleProviderSpecificErrors(error: any, provider: Provider): Error {
+  switch (provider) {
+    case "groq":
+      if (error.message?.includes("reasoning_format")) {
+        return new Error("Reasoning not supported by this Groq model");
+      }
+      break;
+
+    case "gemini":
+      if (error.message?.includes("thinking_config")) {
+        return new Error("Thinking mode not supported by this Gemini model");
+      }
+      break;
+
+    case "anthropic":
+      if (error.message?.includes("reasoning")) {
+        return new Error("Reasoning not available via Anthropic's OpenAI compatibility layer");
+      }
+      break;
+
+    case "openrouter":
+      // OpenRouter passes through underlying provider errors
+      if (error.message?.includes("not supported")) {
+        return new Error("Feature not supported by the selected model on OpenRouter");
+      }
+      break;
+  }
+
+  return error;
+}
+```
+
+## Complete Implementation Examples
+
+### Basic Chat Client
+
+```typescript
+import OpenAI from "openai";
+import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
+
+class BasicChatClient {
+  private client: OpenAI;
+  private messages: ChatCompletionMessageParam[] = [];
+
+  constructor(apiKey: string, baseURL?: string, systemPrompt?: string) {
+    this.client = new OpenAI({ apiKey, baseURL });
+    
+    if (systemPrompt) {
+      this.messages.push({ role: "system", content: systemPrompt });
+    }
+  }
+
+  async chat(userMessage: string): Promise<string> {
+    this.messages.push({ role: "user", content: userMessage });
+
+    try {
+      const response = await this.client.chat.completions.create({
+        model: "gpt-4o",
+        messages: this.messages,
+        max_completion_tokens: 1000,
+        temperature: 0.7
+      });
+
+      const assistantMessage = response.choices[0]?.message?.content || "";
+      this.messages.push({ role: "assistant", content: assistantMessage });
+
+      return assistantMessage;
+    } catch (error) {
+      console.error("Chat error:", error);
+      throw error;
+    }
+  }
+
+  getHistory(): ChatCompletionMessageParam[] {
+    return [...this.messages];
+  }
+
+  clearHistory(): void {
+    this.messages = this.messages.filter(m => m.role === "system");
+  }
+}
+```
+
+### Advanced Streaming Client with All Features
+
+```typescript
+import OpenAI from "openai";
+import type { 
+  ChatCompletionCreateParamsStreaming,
+  ChatCompletionChunk 
+} from "openai/resources/chat/completions";
+
+interface StreamingClientConfig {
+  apiKey: string;
+  baseURL?: string;
+  model: string;
+  systemPrompt?: string;
+  tools?: any[];
+  maxTokens?: number;
+  temperature?: number;
+}
+
+interface StreamEvent {
+  type: "content" | "tool_call" | "reasoning" | "usage" | "error" | "complete";
+  data: any;
+}
+
+class AdvancedStreamingClient {
+  private client: OpenAI;
+  private config: StreamingClientConfig;
+  private messages: any[] = [];
+  private abortController: AbortController | null = null;
+  private tokenCounter = new TokenCounter();
+
+  constructor(config: StreamingClientConfig) {
+    this.config = config;
+    this.client = new OpenAI({
+      apiKey: config.apiKey,
+      baseURL: config.baseURL
+    });
+
+    if (config.systemPrompt) {
+      this.messages.push({ role: "system", content: config.systemPrompt });
+    }
+  }
+
+  async *streamChat(userMessage: string): AsyncGenerator<StreamEvent> {
+    this.messages.push({ role: "user", content: userMessage });
+    this.abortController = new AbortController();
+
+    try {
+      const params: ChatCompletionCreateParamsStreaming = {
+        model: this.config.model,
+        messages: this.messages,
+        stream: true,
+        max_completion_tokens: this.config.maxTokens || 1000,
+        temperature: this.config.temperature || 0.7,
+        tools: this.config.tools,
+        tool_choice: this.config.tools ? "auto" : undefined,
+        stream_options: { include_usage: true }
+      };
+
+      const stream = await this.client.chat.completions.create(params, {
+        signal: this.abortController.signal
+      });
+
+      let assistantContent = "";
+      let currentToolCalls = new Map<string, any>();
+
+      for await (const chunk of stream) {
+        if (this.abortController.signal.aborted) break;
+
+        const choice = chunk.choices[0];
+        if (!choice) continue;
+
+        // Handle content
+        if (choice.delta?.content) {
+          assistantContent += choice.delta.content;
+          yield {
+            type: "content",
+            data: { delta: choice.delta.content, content: assistantContent }
+          };
+        }
+
+        // Handle tool calls
+        if (choice.delta?.tool_calls) {
+          for (const toolCall of choice.delta.tool_calls) {
+            if (!toolCall.id) continue;
+
+            if (!currentToolCalls.has(toolCall.id)) {
+              currentToolCalls.set(toolCall.id, {
+                id: toolCall.id,
+                name: "",
+                arguments: ""
+              });
+            }
+
+            const call = currentToolCalls.get(toolCall.id);
+            if (toolCall.function?.name) {
+              call.name += toolCall.function.name;
+            }
+            if (toolCall.function?.arguments) {
+              call.arguments += toolCall.function.arguments;
+            }
+
+            yield {
+              type: "tool_call",
+              data: { id: toolCall.id, delta: toolCall, current: call }
+            };
+          }
+        }
+
+        // Handle usage
+        if (chunk.usage) {
+          const usage = this.tokenCounter.extractChatCompletionUsage(chunk.usage);
+          yield {
+            type: "usage",
+            data: usage
+          };
+        }
+
+        // Handle completion
+        if (choice.finish_reason) {
+          if (choice.finish_reason === "tool_calls") {
+            // Execute tool calls
+            const toolResults = await this.executeToolCalls(Array.from(currentToolCalls.values()));
+            
+            // Add messages and continue
+            this.messages.push({
+              role: "assistant",
+              content: assistantContent || null,
+              tool_calls: Array.from(currentToolCalls.values()).map(call => ({
+                id: call.id,
+                type: "function",
+                function: {
+                  name: call.name,
+                  arguments: call.arguments
+                }
+              }))
+            });
+
+            for (const result of toolResults) {
+              this.messages.push({
+                role: "tool",
+                tool_call_id: result.id,
+                content: result.content
+              });
+            }
+
+            // Continue stream for final response
+            yield* this.streamChat("");
+            return;
+          } else {
+            // Regular completion
+            if (assistantContent) {
+              this.messages.push({ role: "assistant", content: assistantContent });
+            }
+
+            yield {
+              type: "complete",
+              data: { reason: choice.finish_reason, content: assistantContent }
+            };
+          }
+        }
+      }
+    } catch (error) {
+      yield {
+        type: "error",
+        data: { error: error.message }
+      };
+    } finally {
+      this.abortController = null;
+    }
+  }
+
+  private async executeToolCalls(toolCalls: any[]): Promise<Array<{ id: string; content: string }>> {
+    const results = [];
+    
+    for (const call of toolCalls) {
+      try {
+        // Tool execution would be implemented here
+        const result = await this.executeTool(call.name, call.arguments);
+        results.push({ id: call.id, content: result });
+      } catch (error) {
+        results.push({ id: call.id, content: `Error: ${error.message}` });
+      }
+    }
+    
+    return results;
+  }
+
+  private async executeTool(name: string, argsJson: string): Promise<string> {
+    // Implement tool execution logic
+    return `Tool ${name} executed with args: ${argsJson}`;
+  }
+
+  interrupt(): void {
+    this.abortController?.abort();
+  }
+
+  getUsage() {
+    return this.tokenCounter.getTotalUsage();
+  }
+}
+
+// Usage example
+const client = new AdvancedStreamingClient({
+  apiKey: process.env.OPENAI_API_KEY!,
+  model: "gpt-4o",
+  systemPrompt: "You are a helpful assistant.",
+  tools: [/* tool definitions */]
+});
+
+for await (const event of client.streamChat("Help me write a TypeScript function")) {
+  switch (event.type) {
+    case "content":
+      process.stdout.write(event.data.delta);
+      break;
+    case "tool_call":
+      console.log(`\n🔧 Tool: ${event.data.current.name}`);
+      break;
+    case "usage":
+      console.log(`\n📊 Tokens: ${event.data.totalTokens}`);
+      break;
+    case "complete":
+      console.log(`\n✅ Complete (${event.data.reason})`);
+      break;
+    case "error":
+      console.log(`\n❌ Error: ${event.data.error}`);
+      break;
+  }
+}
+```
+
+This comprehensive guide covers all the essential features needed to implement a robust OpenAI SDK integration. Each section provides working code examples, actual types from the SDK, and real-world patterns from the pi-mono codebase.
+
+## Key Takeaways
+
+1. **Always use AbortController** for request cancellation
+2. **Handle both Chat Completions and Responses APIs** depending on model capabilities
+3. **Implement comprehensive error handling** with proper error types
+4. **Track token usage** for cost management and optimization
+5. **Support streaming** for better user experience
+6. **Handle provider-specific features** like reasoning and caching
+7. **Implement proper tool calling workflows** for agentic applications
+8. **Serialize conversation state** for session persistence
+9. **Use appropriate system prompts** for different model types
+10. **Test reasoning support** dynamically for each provider/model combination
\ No newline at end of file
diff --git a/packages/ai/package.json b/packages/ai/package.json
new file mode 100644
index 00000000..ce3f2273
--- /dev/null
+++ b/packages/ai/package.json
@@ -0,0 +1,32 @@
+{
+	"name": "@mariozechner/ai",
+	"version": "0.5.8",
+	"description": "Unified API for OpenAI, Anthropic, and Google Gemini LLM providers",
+	"type": "module",
+	"main": "./dist/index.js",
+	"types": "./dist/index.d.ts",
+	"files": ["dist", "README.md"],
+	"scripts": {
+		"clean": "rm -rf dist",
+		"build": "tsc -p tsconfig.build.json",
+		"check": "biome check --write .",
+		"prepublishOnly": "npm run clean && npm run build"
+	},
+	"dependencies": {
+		"openai": "5.12.2",
+		"@anthropic-ai/sdk": "0.60.0",
+		"@google/genai": "1.14.0"
+	},
+	"devDependencies": {},
+	"keywords": ["ai", "llm", "openai", "anthropic", "gemini", "unified", "api"],
+	"author": "Mario Zechner",
+	"license": "MIT",
+	"repository": {
+		"type": "git",
+		"url": "git+https://github.com/badlogic/pi-mono.git",
+		"directory": "packages/ai"
+	},
+	"engines": {
+		"node": ">=20.0.0"
+	}
+}
\ No newline at end of file
diff --git a/packages/ai/plan.md b/packages/ai/plan.md
new file mode 100644
index 00000000..d09177ac
--- /dev/null
+++ b/packages/ai/plan.md
@@ -0,0 +1,950 @@
+# Unified AI API Design Plan
+
+Based on comprehensive investigation of OpenAI, Anthropic, and Gemini SDKs with actual implementation examples.
+
+## Key API Differences Summary
+
+### OpenAI
+- **Dual APIs**: Chat Completions (broad support) vs Responses API (o1/o3 thinking content)
+- **Thinking**: Only Responses API gives actual content, Chat Completions only gives counts
+- **Roles**: `system`, `user`, `assistant`, `tool` (o1/o3 use `developer` instead of `system`)
+- **Streaming**: Deltas in chunks with `stream_options.include_usage` for token usage
+
+### Anthropic
+- **Single API**: Messages API with comprehensive streaming
+- **Content Blocks**: Always arrays, even for simple text
+- **System**: Separate parameter, not in messages array
+- **Tool Use**: Content blocks, not separate message role
+- **Thinking**: Explicit budget allocation, appears as content blocks
+- **Caching**: Per-block cache control with TTL options
+
+### Gemini
+- **Parts System**: All content split into typed parts
+- **System**: Separate `systemInstruction` parameter
+- **Roles**: Uses `model` instead of `assistant`
+- **Thinking**: `part.thought: true` flag identifies reasoning
+- **Streaming**: Returns complete responses, not deltas
+- **Function Calls**: Embedded in parts array
+
+## Unified API Design
+
+### Core Client
+
+```typescript
+interface AIConfig {
+  provider: 'openai' | 'anthropic' | 'gemini';
+  apiKey: string;
+  model: string;
+  baseURL?: string; // For OpenAI-compatible endpoints
+}
+
+interface ModelInfo {
+  id: string;
+  name: string;
+  provider: string;
+  capabilities: {
+    reasoning: boolean;
+    toolCall: boolean;
+    vision: boolean;
+    audio?: boolean;
+  };
+  cost: {
+    input: number;  // per million tokens
+    output: number; // per million tokens
+    cacheRead?: number;
+    cacheWrite?: number;
+  };
+  limits: {
+    context: number;
+    output: number;
+  };
+  knowledge?: string; // Knowledge cutoff date
+}
+
+class AI {
+  constructor(config: AIConfig);
+  
+  // Main streaming interface - everything else builds on this
+  async *stream(request: Request): AsyncGenerator<Event>;
+  
+  // Convenience method for non-streaming
+  async complete(request: Request): Promise<Response>;
+  
+  // Get model information
+  getModelInfo(): ModelInfo;
+  
+  // Abort current request
+  abort(): void;
+}
+```
+
+### Message Format
+
+```typescript
+type Message = 
+  | {
+      role: 'user';
+      content: string | Content[];
+    }
+  | {
+      role: 'assistant';
+      content: string | Content[];
+      model: string;
+      usage: TokenUsage;
+      toolCalls?: {
+        id: string;
+        name: string;
+        arguments: Record<string, any>;
+      }[];
+    }
+  | {
+      role: 'tool';
+      content: string | Content[];
+      toolCallId: string;
+    };
+
+interface Content {
+  type: 'text' | 'image';
+  text?: string;
+  image?: {
+    data: string; // base64
+    mimeType: string;
+  };
+}
+```
+
+### Request Format
+
+```typescript
+interface Request {
+  messages: Message[];
+  
+  // System prompt (separated for Anthropic/Gemini compatibility)
+  systemPrompt?: string;
+  
+  // Common parameters
+  temperature?: number;
+  maxTokens?: number;
+  stopSequences?: string[];
+  
+  // Tools
+  tools?: {
+    name: string;
+    description: string;
+    parameters: Record<string, any>; // JSON Schema
+  }[];
+  toolChoice?: 'auto' | 'none' | 'required' | { name: string };
+  
+  // Thinking/reasoning
+  reasoning?: {
+    enabled: boolean;
+    effort?: 'low' | 'medium' | 'high'; // OpenAI reasoning_effort
+    maxTokens?: number; // Anthropic thinking budget
+  };
+  
+  // Abort signal
+  signal?: AbortSignal;
+}
+```
+
+### Event Stream
+
+```typescript
+type Event =
+  | { type: 'start'; model: string; provider: string }
+  | { type: 'text'; content: string; delta: string }
+  | { type: 'thinking'; content: string; delta: string }
+  | { type: 'toolCall'; toolCall: ToolCall }
+  | { type: 'usage'; usage: TokenUsage }
+  | { type: 'done'; reason: StopReason; message: Message } // message includes model and usage
+  | { type: 'error'; error: Error };
+
+interface TokenUsage {
+  input: number;
+  output: number;
+  total: number;
+  thinking?: number;
+  cacheRead?: number;
+  cacheWrite?: number;
+  cost?: {
+    input: number;
+    output: number;
+    cache?: number;
+    total: number;
+  };
+}
+
+type StopReason = 'stop' | 'length' | 'toolUse' | 'safety' | 'error';
+```
+
+## Caching Strategy
+
+Caching is handled automatically by each provider adapter:
+
+- **OpenAI**: Automatic prompt caching (no configuration needed)
+- **Gemini**: Automatic context caching (no configuration needed)  
+- **Anthropic**: We automatically add cache_control to the system prompt and older messages
+
+```typescript
+class AnthropicAdapter {
+  private addCaching(messages: Message[]): any[] {
+    const anthropicMessages = [];
+    
+    // Automatically cache older messages (assuming incremental context)
+    for (let i = 0; i < messages.length; i++) {
+      const msg = messages[i];
+      const isOld = i < messages.length - 2; // Cache all but last 2 messages
+      
+      // Convert to Anthropic format with automatic caching
+      const blocks = this.toContentBlocks(msg);
+      if (isOld && blocks.length > 0) {
+        blocks[0].cache_control = { type: 'ephemeral' };
+      }
+      
+      anthropicMessages.push({
+        role: msg.role === 'assistant' ? 'assistant' : 'user',
+        content: blocks
+      });
+    }
+    
+    return anthropicMessages;
+  }
+}
+```
+
+## Provider Adapter Implementation
+
+### OpenAI Adapter
+
+```typescript
+class OpenAIAdapter {
+  private client: OpenAI;
+  private useResponsesAPI: boolean = false;
+  
+  async *stream(request: Request): AsyncGenerator<Event> {
+    // Determine which API to use
+    if (request.reasoning?.enabled && this.isReasoningModel()) {
+      yield* this.streamResponsesAPI(request);
+    } else {
+      yield* this.streamChatCompletions(request);
+    }
+  }
+  
+  private async *streamChatCompletions(request: Request) {
+    const stream = await this.client.chat.completions.create({
+      model: this.model,
+      messages: this.toOpenAIMessages(request),
+      tools: this.toOpenAITools(request.tools),
+      reasoning_effort: request.reasoning?.effort,
+      stream: true,
+      stream_options: { include_usage: true }
+    });
+    
+    let content = '';
+    let toolCalls: any[] = [];
+    
+    for await (const chunk of stream) {
+      if (chunk.choices[0]?.delta?.content) {
+        const delta = chunk.choices[0].delta.content;
+        content += delta;
+        yield { type: 'text', content, delta };
+      }
+      
+      if (chunk.choices[0]?.delta?.tool_calls) {
+        // Accumulate tool calls
+        this.mergeToolCalls(toolCalls, chunk.choices[0].delta.tool_calls);
+        for (const tc of toolCalls) {
+          yield { type: 'toolCall', toolCall: tc, partial: true };
+        }
+      }
+      
+      if (chunk.usage) {
+        yield {
+          type: 'usage',
+          usage: {
+            input: chunk.usage.prompt_tokens,
+            output: chunk.usage.completion_tokens,
+            total: chunk.usage.total_tokens,
+            thinking: chunk.usage.completion_tokens_details?.reasoning_tokens
+          }
+        };
+      }
+    }
+  }
+  
+  private async *streamResponsesAPI(request: Request) {
+    // Use Responses API for actual thinking content
+    const response = await this.client.responses.create({
+      model: this.model,
+      input: this.toResponsesInput(request),
+      tools: this.toResponsesTools(request.tools),
+      stream: true
+    });
+    
+    for await (const event of response) {
+      if (event.type === 'response.reasoning_text.delta') {
+        yield {
+          type: 'thinking',
+          content: event.text,
+          delta: event.delta
+        };
+      }
+      // Handle other event types...
+    }
+  }
+  
+  private toOpenAIMessages(request: Request): any[] {
+    const messages: any[] = [];
+    
+    // Handle system prompt
+    if (request.systemPrompt) {
+      const role = this.isReasoningModel() ? 'developer' : 'system';
+      messages.push({ role, content: request.systemPrompt });
+    }
+    
+    // Convert unified messages
+    for (const msg of request.messages) {
+      if (msg.role === 'tool') {
+        messages.push({
+          role: 'tool',
+          content: msg.content,
+          tool_call_id: msg.toolCallId
+        });
+      } else {
+        messages.push({
+          role: msg.role,
+          content: this.contentToString(msg.content),
+          tool_calls: msg.toolCalls
+        });
+      }
+    }
+    
+    return messages;
+  }
+}
+```
+
+### Anthropic Adapter
+
+```typescript
+class AnthropicAdapter {
+  private client: Anthropic;
+  
+  async *stream(request: Request): AsyncGenerator<Event> {
+    const stream = this.client.messages.stream({
+      model: this.model,
+      max_tokens: request.maxTokens || 1024,
+      messages: this.addCaching(request.messages),
+      system: request.systemPrompt,
+      tools: this.toAnthropicTools(request.tools),
+      thinking: request.reasoning?.enabled ? {
+        type: 'enabled',
+        budget_tokens: request.reasoning.maxTokens || 2000
+      } : undefined
+    });
+    
+    let content = '';
+    let thinking = '';
+    
+    stream.on('text', (delta, snapshot) => {
+      content = snapshot;
+      // Note: Can't yield from callback, need different approach
+    });
+    
+    stream.on('thinking', (delta, snapshot) => {
+      thinking = snapshot;
+    });
+    
+    // Use raw streaming instead for proper async generator
+    const rawStream = await this.client.messages.create({
+      ...params,
+      stream: true
+    });
+    
+    for await (const chunk of rawStream) {
+      switch (chunk.type) {
+        case 'content_block_delta':
+          if (chunk.delta.type === 'text_delta') {
+            content += chunk.delta.text;
+            yield {
+              type: 'text',
+              content,
+              delta: chunk.delta.text
+            };
+          }
+          break;
+          
+        case 'message_delta':
+          if (chunk.usage) {
+            yield {
+              type: 'usage',
+              usage: {
+                input: chunk.usage.input_tokens,
+                output: chunk.usage.output_tokens,
+                total: chunk.usage.input_tokens + chunk.usage.output_tokens,
+                cacheRead: chunk.usage.cache_read_input_tokens,
+                cacheWrite: chunk.usage.cache_creation_input_tokens
+              }
+            };
+          }
+          break;
+      }
+    }
+  }
+  
+  private toAnthropicMessages(request: Request): any[] {
+    return request.messages.map(msg => {
+      if (msg.role === 'tool') {
+        // Tool results go as user messages with tool_result blocks
+        return {
+          role: 'user',
+          content: [{
+            type: 'tool_result',
+            tool_use_id: msg.toolCallId,
+            content: msg.content
+          }]
+        };
+      }
+      
+      // Always use content blocks
+      const blocks: any[] = [];
+      
+      if (typeof msg.content === 'string') {
+        blocks.push({
+          type: 'text',
+          text: msg.content,
+          cache_control: msg.cacheControl
+        });
+      } else {
+        // Convert unified content to blocks
+        for (const part of msg.content) {
+          if (part.type === 'text') {
+            blocks.push({ type: 'text', text: part.text });
+          } else if (part.type === 'image') {
+            blocks.push({
+              type: 'image',
+              source: {
+                type: 'base64',
+                media_type: part.image.mimeType,
+                data: part.image.data
+              }
+            });
+          }
+        }
+      }
+      
+      // Add tool calls as blocks
+      if (msg.toolCalls) {
+        for (const tc of msg.toolCalls) {
+          blocks.push({
+            type: 'tool_use',
+            id: tc.id,
+            name: tc.name,
+            input: tc.arguments
+          });
+        }
+      }
+      
+      return {
+        role: msg.role === 'assistant' ? 'assistant' : 'user',
+        content: blocks
+      };
+    });
+  }
+}
+```
+
+### Gemini Adapter
+
+```typescript
+class GeminiAdapter {
+  private client: GoogleGenAI;
+  
+  async *stream(request: Request): AsyncGenerator<Event> {
+    const stream = await this.client.models.generateContentStream({
+      model: this.model,
+      systemInstruction: request.systemPrompt ? {
+        parts: [{ text: request.systemPrompt }]
+      } : undefined,
+      contents: this.toGeminiContents(request),
+      tools: this.toGeminiTools(request.tools),
+      abortSignal: request.signal
+    });
+    
+    let content = '';
+    let thinking = '';
+    
+    for await (const chunk of stream) {
+      const candidate = chunk.candidates?.[0];
+      if (!candidate?.content?.parts) continue;
+      
+      for (const part of candidate.content.parts) {
+        if (part.text && !part.thought) {
+          content += part.text;
+          yield {
+            type: 'text',
+            content,
+            delta: part.text
+          };
+        } else if (part.text && part.thought) {
+          thinking += part.text;
+          yield {
+            type: 'thinking',
+            content: thinking,
+            delta: part.text
+          };
+        } else if (part.functionCall) {
+          yield {
+            type: 'toolCall',
+            toolCall: {
+              id: part.functionCall.id || crypto.randomUUID(),
+              name: part.functionCall.name,
+              arguments: part.functionCall.args
+            }
+          };
+        }
+      }
+      
+      if (chunk.usageMetadata) {
+        yield {
+          type: 'usage',
+          usage: {
+            input: chunk.usageMetadata.promptTokenCount || 0,
+            output: chunk.usageMetadata.candidatesTokenCount || 0,
+            total: chunk.usageMetadata.totalTokenCount || 0,
+            thinking: chunk.usageMetadata.thoughtsTokenCount,
+            cacheRead: chunk.usageMetadata.cachedContentTokenCount
+          }
+        };
+      }
+    }
+  }
+  
+  private toGeminiContents(request: Request): any[] {
+    return request.messages.map(msg => {
+      const parts: any[] = [];
+      
+      if (typeof msg.content === 'string') {
+        parts.push({ text: msg.content });
+      } else {
+        for (const part of msg.content) {
+          if (part.type === 'text') {
+            parts.push({ text: part.text });
+          } else if (part.type === 'image') {
+            parts.push({
+              inlineData: {
+                mimeType: part.image.mimeType,
+                data: part.image.data
+              }
+            });
+          }
+        }
+      }
+      
+      // Add function calls as parts
+      if (msg.toolCalls) {
+        for (const tc of msg.toolCalls) {
+          parts.push({
+            functionCall: {
+              name: tc.name,
+              args: tc.arguments
+            }
+          });
+        }
+      }
+      
+      // Add tool results as function responses
+      if (msg.role === 'tool') {
+        parts.push({
+          functionResponse: {
+            name: msg.toolCallId,
+            response: { result: msg.content }
+          }
+        });
+      }
+      
+      return {
+        role: msg.role === 'assistant' ? 'model' : msg.role === 'tool' ? 'user' : msg.role,
+        parts
+      };
+    });
+  }
+}
+```
+
+## Usage Examples
+
+### Basic Streaming
+
+```typescript
+const ai = new AI({
+  provider: 'openai',
+  apiKey: process.env.OPENAI_API_KEY,
+  model: 'gpt-4'
+});
+
+const stream = ai.stream({
+  messages: [
+    { role: 'user', content: 'Write a haiku about coding' }
+  ],
+  systemPrompt: 'You are a poetic programmer'
+});
+
+for await (const event of stream) {
+  switch (event.type) {
+    case 'text':
+      process.stdout.write(event.delta);
+      break;
+    case 'usage':
+      console.log(`\nTokens: ${event.usage.total}`);
+      break;
+    case 'done':
+      console.log(`\nFinished: ${event.reason}`);
+      break;
+  }
+}
+```
+
+### Cross-Provider Tool Calling
+
+```typescript
+async function callWithTools(provider: 'openai' | 'anthropic' | 'gemini') {
+  const ai = new AI({
+    provider,
+    apiKey: process.env[`${provider.toUpperCase()}_API_KEY`],
+    model: getDefaultModel(provider)
+  });
+  
+  const messages: Message[] = [{
+    role: 'user',
+    content: 'What is the weather in Paris and calculate 15 * 23?'
+  }];
+  
+  const stream = ai.stream({
+    messages,
+    tools: [
+      {
+        name: 'weather',
+        description: 'Get weather for a location',
+        parameters: {
+          type: 'object',
+          properties: {
+            location: { type: 'string' }
+          },
+          required: ['location']
+        }
+      },
+      {
+        name: 'calculator',
+        description: 'Calculate math expressions',
+        parameters: {
+          type: 'object',
+          properties: {
+            expression: { type: 'string' }
+          },
+          required: ['expression']
+        }
+      }
+    ]
+  });
+  
+  const toolCalls: any[] = [];
+  
+  for await (const event of stream) {
+    if (event.type === 'toolCall') {
+      toolCalls.push(event.toolCall);
+      
+      // Execute tool
+      const result = await executeToolCall(event.toolCall);
+      
+      // Add tool result to conversation
+      messages.push({
+        role: 'assistant',
+        toolCalls: [event.toolCall]
+      });
+      
+      messages.push({
+        role: 'tool',
+        content: JSON.stringify(result),
+        toolCallId: event.toolCall.id
+      });
+    }
+  }
+  
+  // Continue conversation with tool results
+  if (toolCalls.length > 0) {
+    const finalStream = ai.stream({ messages });
+    
+    for await (const event of finalStream) {
+      if (event.type === 'text') {
+        process.stdout.write(event.delta);
+      }
+    }
+  }
+}
+```
+
+### Thinking/Reasoning
+
+```typescript
+async function withThinking() {
+  // OpenAI o1
+  const openai = new AI({
+    provider: 'openai',
+    model: 'o1-preview'
+  });
+  
+  // Anthropic Claude
+  const anthropic = new AI({
+    provider: 'anthropic',
+    model: 'claude-3-opus-20240229'
+  });
+  
+  // Gemini thinking model
+  const gemini = new AI({
+    provider: 'gemini',
+    model: 'gemini-2.0-flash-thinking-exp-1219'
+  });
+  
+  for (const ai of [openai, anthropic, gemini]) {
+    const stream = ai.stream({
+      messages: [{
+        role: 'user',
+        content: 'Solve this step by step: If a tree falls in a forest...'
+      }],
+      reasoning: {
+        enabled: true,
+        effort: 'high', // OpenAI reasoning_effort
+        maxTokens: 2000 // Anthropic budget
+      }
+    });
+    
+    for await (const event of stream) {
+      if (event.type === 'thinking') {
+        console.log('[THINKING]', event.delta);
+      } else if (event.type === 'text') {
+        console.log('[RESPONSE]', event.delta);
+      } else if (event.type === 'done') {
+        // Final message includes model and usage with cost
+        console.log('Model:', event.message.model);
+        console.log('Tokens:', event.message.usage?.total);
+        console.log('Cost: $', event.message.usage?.cost?.total);
+      }
+    }
+  }
+}
+```
+
+## Implementation Notes
+
+### Critical Decisions
+
+1. **Streaming First**: All providers support streaming, non-streaming is just collected events
+2. **Unified Events**: Same event types across all providers for consistent handling
+3. **Separate System Prompt**: Required for Anthropic/Gemini compatibility
+4. **Tool Role**: Unified way to handle tool responses across providers
+5. **Content Arrays**: Support both string and structured content
+6. **Thinking Extraction**: Normalize reasoning across different provider formats
+
+### Provider-Specific Handling
+
+**OpenAI**:
+- Choose between Chat Completions and Responses API based on model and thinking needs
+- Map `developer` role for o1/o3 models
+- Handle streaming tool call deltas
+
+**Anthropic**:
+- Convert to content blocks (always arrays)
+- Tool results as user messages with tool_result blocks
+- Handle MessageStream events or raw streaming
+
+**Gemini**:
+- Convert to parts system
+- Extract thinking from `part.thought` flag
+- Map `assistant` to `model` role
+- Handle function calls/responses in parts
+
+### Error Handling
+
+```typescript
+class AIError extends Error {
+  constructor(
+    message: string,
+    public code: string,
+    public provider: string,
+    public retryable: boolean,
+    public statusCode?: number
+  ) {
+    super(message);
+  }
+}
+
+// In adapters
+try {
+  // API call
+} catch (error) {
+  if (error instanceof RateLimitError) {
+    throw new AIError(
+      'Rate limit exceeded',
+      'rate_limit',
+      this.provider,
+      true,
+      429
+    );
+  }
+  // Map other errors...
+}
+```
+
+## Model Information & Cost Tracking
+
+### Models Database
+
+We cache the models.dev API data at build time for fast, offline access:
+
+```typescript
+// scripts/update-models.ts - Run during build or manually
+async function updateModels() {
+  const response = await fetch('https://models.dev/api.json');
+  const data = await response.json();
+  
+  // Transform to our format
+  const models: ModelsDatabase = transformModelsData(data);
+  
+  // Generate TypeScript file
+  const content = `// Auto-generated from models.dev API
+// Last updated: ${new Date().toISOString()}
+// Run 'npm run update-models' to refresh
+
+export const MODELS_DATABASE: ModelsDatabase = ${JSON.stringify(models, null, 2)};
+`;
+  
+  await fs.writeFile('src/models-data.ts', content);
+}
+
+// src/models.ts - Runtime model lookup
+import { MODELS_DATABASE } from './models-data.js';
+
+// Simple lookup with fallback
+export function getModelInfo(provider: string, model: string): ModelInfo {
+  const info = MODELS_DATABASE.providers[provider]?.models[model];
+  
+  if (!info) {
+    // Fallback for unknown models
+    return {
+      id: model,
+      name: model,
+      provider,
+      capabilities: {
+        reasoning: false,
+        toolCall: true,
+        vision: false
+      },
+      cost: { input: 0, output: 0 },
+      limits: { context: 128000, output: 4096 }
+    };
+  }
+  
+  return info;
+}
+
+// Optional: Runtime override for testing new models
+const runtimeOverrides = new Map<string, ModelInfo>();
+
+export function registerModel(provider: string, model: string, info: ModelInfo) {
+  runtimeOverrides.set(`${provider}:${model}`, info);
+}
+```
+
+### Cost Calculation
+
+```typescript
+class CostTracker {
+  private usage: TokenUsage = {
+    input: 0,
+    output: 0,
+    total: 0,
+    cacheRead: 0,
+    cacheWrite: 0
+  };
+  
+  private modelInfo: ModelInfo;
+  
+  constructor(modelInfo: ModelInfo) {
+    this.modelInfo = modelInfo;
+  }
+  
+  addUsage(tokens: Partial<TokenUsage>): TokenUsage {
+    this.usage.input += tokens.input || 0;
+    this.usage.output += tokens.output || 0;
+    this.usage.thinking += tokens.thinking || 0;
+    this.usage.cacheRead += tokens.cacheRead || 0;
+    this.usage.cacheWrite += tokens.cacheWrite || 0;
+    this.usage.total = this.usage.input + this.usage.output + (this.usage.thinking || 0);
+    
+    // Calculate costs (per million tokens)
+    const cost = this.modelInfo.cost;
+    this.usage.cost = {
+      input: (this.usage.input / 1_000_000) * cost.input,
+      output: (this.usage.output / 1_000_000) * cost.output,
+      cache: 
+        ((this.usage.cacheRead || 0) / 1_000_000) * (cost.cacheRead || 0) +
+        ((this.usage.cacheWrite || 0) / 1_000_000) * (cost.cacheWrite || 0),
+      total: 0
+    };
+    
+    this.usage.cost.total = 
+      this.usage.cost.input + 
+      this.usage.cost.output + 
+      this.usage.cost.cache;
+    
+    return { ...this.usage };
+  }
+  
+  getTotalCost(): number {
+    return this.usage.cost?.total || 0;
+  }
+  
+  getUsageSummary(): string {
+    return `Tokens: ${this.usage.total} (${this.usage.input}→${this.usage.output}) | Cost: $${this.getTotalCost().toFixed(4)}`;
+  }
+}
+```
+
+### Integration in Adapters
+
+```typescript
+class OpenAIAdapter {
+  private costTracker: CostTracker;
+  
+  constructor(config: AIConfig) {
+    const modelInfo = getModelInfo('openai', config.model);
+    this.costTracker = new CostTracker(modelInfo);
+  }
+  
+  async *stream(request: Request): AsyncGenerator<Event> {
+    // ... streaming logic ...
+    
+    if (chunk.usage) {
+      const usage = this.costTracker.addUsage({
+        input: chunk.usage.prompt_tokens,
+        output: chunk.usage.completion_tokens,
+        thinking: chunk.usage.completion_tokens_details?.reasoning_tokens,
+        cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens
+      });
+      
+      yield { type: 'usage', usage };
+    }
+  }
+}
+```
+
+## Next Steps
+
+1. Create models.ts with models.dev integration
+2. Implement base `AI` class with adapter pattern
+3. Create three provider adapters with full streaming support
+4. Add comprehensive error mapping
+5. Implement token counting and cost tracking
+6. Add test suite for each provider
+7. Create migration guide from native SDKs
\ No newline at end of file
diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts
new file mode 100644
index 00000000..46691ff0
--- /dev/null
+++ b/packages/ai/src/index.ts
@@ -0,0 +1,5 @@
+// @mariozechner/ai - Unified API for OpenAI, Anthropic, and Google Gemini
+// This package provides a common interface for working with multiple LLM providers
+
+// TODO: Export types and implementations once defined
+export const version = "0.5.8";
diff --git a/packages/ai/tsconfig.build.json b/packages/ai/tsconfig.build.json
new file mode 100644
index 00000000..5ce43029
--- /dev/null
+++ b/packages/ai/tsconfig.build.json
@@ -0,0 +1,9 @@
+{
+	"extends": "../../tsconfig.base.json",
+	"compilerOptions": {
+		"outDir": "./dist",
+		"rootDir": "./src"
+	},
+	"include": ["src/**/*"],
+	"exclude": ["node_modules", "dist"]
+}
\ No newline at end of file
diff --git a/todos/done/20250817-183528-ai-unified-api-package-analysis.md b/todos/done/20250817-183528-ai-unified-api-package-analysis.md
new file mode 100644
index 00000000..750c96cb
--- /dev/null
+++ b/todos/done/20250817-183528-ai-unified-api-package-analysis.md
@@ -0,0 +1,606 @@
+# Analysis: Creating Unified AI Package
+
+## Package Structure Analysis for Pi Monorepo
+
+Based on my examination of the existing packages (`tui`, `agent`, and `pods`), here are the comprehensive patterns and conventions used in this monorepo:
+
+### 1. Package Naming Conventions
+
+**Scoped NPM packages with consistent naming:**
+- All packages use the `@mariozechner/` scope
+- Package names follow the pattern: `@mariozechner/pi-<package-name>`
+- Special case: the main CLI package is simply `@mariozechner/pi` (not `pi-pods`)
+
+**Directory structure:**
+- Packages are located in `/packages/<package-name>/`
+- Directory names match the suffix of the npm package name (e.g., `tui`, `agent`, `pods`)
+
+### 2. Package.json Structure Patterns
+
+**Common fields across all packages:**
+```json
+{
+  "name": "@mariozechner/pi-<name>",
+  "version": "0.5.8",  // Lockstep versioning - all packages share same version
+  "description": "...",
+  "type": "module",    // All packages use ES modules
+  "author": "Mario Zechner",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/badlogic/pi-mono.git",
+    "directory": "packages/<name>"
+  },
+  "engines": {
+    "node": ">=20.0.0"  // Consistent Node.js requirement
+  }
+}
+```
+
+**Binary packages (agent, pods):**
+- Include `"bin"` field with CLI command mapping
+- Examples: `"pi-agent": "dist/cli.js"` and `"pi": "dist/cli.js"`
+
+**Library packages (tui):**
+- Include `"main"` field pointing to built entry point
+- Include `"types"` field for TypeScript definitions
+
+### 3. Scripts Configuration
+
+**Universal scripts across all packages:**
+- `"clean": "rm -rf dist"` - Removes build artifacts
+- `"build": "tsc -p tsconfig.build.json"` - Builds with dedicated build config
+- `"check": "biome check --write ."` - Linting and formatting
+- `"prepublishOnly": "npm run clean && npm run build"` - Pre-publish cleanup
+
+**CLI-specific build scripts:**
+- Add `&& chmod +x dist/cli.js` for executable permissions
+- Copy additional assets (e.g., `&& cp src/models.json dist/` for pods package)
+
+### 4. Dependencies Structure
+
+**Dependency hierarchy follows a clear pattern:**
+```
+pi-tui (foundation) -> pi-agent (uses tui) -> pi (uses agent)
+```
+
+**Internal dependencies:**
+- Use exact version matching for internal packages (e.g., `"^0.5.8"`)
+- Agent depends on TUI: `"@mariozechner/pi-tui": "^0.5.8"`
+- Pods depends on Agent: `"@mariozechner/pi-agent": "^0.5.8"`
+
+**External dependencies:**
+- Common dependencies like `chalk` are used across multiple packages
+- Specialized dependencies are package-specific (e.g., `marked` for tui, `openai` for agent)
+
+### 5. TypeScript Configuration
+
+**Dual TypeScript configuration approach:**
+
+**`tsconfig.build.json` (for production builds):**
+```json
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "./dist",
+    "rootDir": "./src"
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
+```
+
+**Root `tsconfig.json` (for development and type checking):**
+- Contains path mappings for cross-package imports during development
+- Includes all source and test files
+- Uses `"noEmit": true` for type checking without building
+
+### 6. Source Directory Structure
+
+**Standard structure across all packages:**
+```
+src/
+├── index.ts          # Main export file
+├── cli.ts            # CLI entry point (if applicable)
+├── <core-files>.ts   # Core functionality
+├── components/       # Components (for tui)
+├── tools/           # Tool implementations (for agent)
+├── commands/        # Command implementations (for pods)
+└── renderers/       # Output renderers (for agent)
+```
+
+### 7. Export Patterns (index.ts)
+
+**Comprehensive type and function exports:**
+- Export both types and implementation classes
+- Use `export type` for type-only exports
+- Group exports logically with comments
+- Example from tui: exports components, interfaces, and utilities
+- Example from agent: exports core classes, types, and utilities
+
+### 8. Files Configuration
+
+**Files included in NPM packages:**
+- `"files": ["dist"]` or `"files": ["dist/**/*", "README.md"]`
+- All packages include built `dist/` directory
+- Some include additional files like README.md or scripts
+
+### 9. README.md Structure
+
+**Comprehensive documentation pattern:**
+- Feature overview with key capabilities
+- Quick start section with code examples
+- Detailed API documentation
+- Installation instructions
+- Development setup
+- Testing information (especially for tui)
+- Examples and usage patterns
+
+### 10. Testing Structure (TUI package)
+
+**Dedicated test directory:**
+- `test/` directory with `.test.ts` files for unit tests
+- Example applications (e.g., `chat-app.ts`, `file-browser.ts`)
+- Custom testing infrastructure (e.g., `virtual-terminal.ts`)
+- Test script: `"test": "node --test --import tsx test/*.test.ts"`
+
+### 11. Version Management
+
+**Lockstep versioning:**
+- All packages share the same version number
+- Root package.json scripts handle version bumping across all packages
+- Version sync script ensures internal dependency versions match
+
+### 12. Build Order
+
+**Dependency-aware build order:**
+- Root build script builds packages in dependency order
+- `"build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi"`
+
+### 13. Common Configuration Files
+
+**Shared across monorepo:**
+- `biome.json` - Unified linting and formatting configuration
+- `tsconfig.base.json` - Base TypeScript configuration
+- `.gitignore` - Ignores `dist/`, `node_modules/`, and other build artifacts
+- Husky pre-commit hooks for formatting and type checking
+
+### 14. Keywords and Metadata
+
+**Descriptive keywords for NPM discovery:**
+- Each package includes relevant keywords (e.g., "tui", "terminal", "agent", "ai", "llm")
+- Keywords help with package discoverability
+
+This analysis shows a well-structured monorepo with consistent patterns that would make adding new packages straightforward by following these established conventions.
+
+## Monorepo Configuration Analysis
+
+Based on my analysis of the pi-mono monorepo configuration, here's a comprehensive guide on how to properly integrate a new package:
+
+### 1. Root Package.json Configuration
+
+**Workspace Configuration:**
+- Uses npm workspaces with `"workspaces": ["packages/*"]`
+- All packages are located under `/packages/` directory
+- Private monorepo (`"private": true`) with ESM modules (`"type": "module"`)
+
+**Build System:**
+- **Sequential Build Order**: The build script explicitly defines dependency order:
+  ```json
+  "build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/pi"
+  ```
+- **Dependency Chain**: `pi-tui` → `pi-agent` → `pi` (pods)
+- **Important**: New packages must be inserted in the correct dependency order in the build script
+
+**Scripts Available:**
+- `clean`: Cleans all package dist folders
+- `build`: Sequential build respecting dependencies  
+- `check`: Runs Biome formatting, package checks, and TypeScript checking
+- `test`: Runs tests across all packages
+- Version management scripts (lockstep versioning)
+- Publishing scripts with dry-run capability
+
+### 2. Root TypeScript Configuration
+
+**Dual Configuration System:**
+- **`tsconfig.base.json`**: Base TypeScript settings for all packages
+- **`tsconfig.json`**: Development configuration with path mappings for cross-package imports
+- **Package `tsconfig.build.json`**: Clean build configs per package
+
+**Path Mappings** (in `/Users/badlogic/workspaces/pi-mono/tsconfig.json`):
+```json
+"paths": {
+  "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"],
+  "@mariozechner/pi-agent": ["./packages/agent/src/index.ts"], 
+  "@mariozechner/pi": ["./packages/pods/src/index.ts"]
+}
+```
+
+### 3. Package Dependencies and Structure
+
+**Dependency Structure:**
+- `pi-tui` (base library) - no internal dependencies
+- `pi-agent` depends on `pi-tui`
+- `pi` (pods) depends on `pi-agent`
+
+**Standard Package Structure:**
+```
+packages/new-package/
+├── src/
+│   ├── index.ts          # Main export file
+│   └── ...               # Implementation files
+├── package.json          # Package configuration
+├── tsconfig.build.json   # Build-specific TypeScript config
+├── README.md            # Package documentation
+└── dist/                # Build output (gitignored)
+```
+
+### 4. Version Management
+
+**Lockstep Versioning:**
+- All packages share the same version number (currently 0.5.8)
+- Automated version sync script: `/Users/badlogic/workspaces/pi-mono/scripts/sync-versions.js`
+- Inter-package dependencies are automatically updated to match current versions
+
+**Version Scripts:**
+- `npm run version:patch/minor/major` - Updates all package versions and syncs dependencies
+- Automatic dependency version synchronization
+
+### 5. GitIgnore Patterns
+
+**Package-Level Ignores:**
+```
+packages/*/node_modules/
+packages/*/dist/
+```
+Plus standard ignores for logs, IDE files, environment files, etc.
+
+## How to Integrate a New Package
+
+### Step 1: Create Package Structure
+```bash
+mkdir packages/your-new-package
+cd packages/your-new-package
+```
+
+### Step 2: Create package.json
+```json
+{
+  "name": "@mariozechner/your-new-package",
+  "version": "0.5.8",
+  "description": "Your package description",
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "files": ["dist"],
+  "scripts": {
+    "clean": "rm -rf dist",
+    "build": "tsc -p tsconfig.build.json",
+    "check": "biome check --write .",
+    "prepublishOnly": "npm run clean && npm run build"
+  },
+  "dependencies": {
+    // Add dependencies on other packages in the monorepo if needed
+    // "@mariozechner/pi-tui": "^0.5.8"
+  },
+  "devDependencies": {},
+  "keywords": ["relevant", "keywords"],
+  "author": "Mario Zechner",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/badlogic/pi-mono.git",
+    "directory": "packages/your-new-package"
+  },
+  "engines": {
+    "node": ">=20.0.0"
+  }
+}
+```
+
+### Step 3: Create tsconfig.build.json
+```json
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "./dist",
+    "rootDir": "./src"
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
+```
+
+### Step 4: Create src/index.ts
+```typescript
+// Main exports for your package
+export * from './your-main-module.js';
+```
+
+### Step 5: Update Root Configuration
+
+**Add to `/Users/badlogic/workspaces/pi-mono/tsconfig.json` paths:**
+```json
+"paths": {
+  "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"],
+  "@mariozechner/pi-agent": ["./packages/agent/src/index.ts"],
+  "@mariozechner/pi": ["./packages/pods/src/index.ts"],
+  "@mariozechner/your-new-package": ["./packages/your-new-package/src/index.ts"]
+}
+```
+
+**Update build script in root `/Users/badlogic/workspaces/pi-mono/package.json`:**
+```json
+"build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/pi-agent && npm run build -w @mariozechner/your-new-package && npm run build -w @mariozechner/pi"
+```
+(Insert in correct dependency order)
+
+### Step 6: Update sync-versions.js
+If your package depends on other monorepo packages, add synchronization logic to `/Users/badlogic/workspaces/pi-mono/scripts/sync-versions.js`.
+
+### Step 7: Install and Test
+```bash
+# From monorepo root
+npm install
+npm run build
+npm run check
+```
+
+## Key Requirements for New Packages
+
+1. **Must use ESM modules** (`"type": "module"`)
+2. **Must follow lockstep versioning** (same version as other packages)
+3. **Must be placed in correct build order** based on dependencies
+4. **Must use tab indentation** (Biome config: `"indentStyle": "tab"`)
+5. **Must avoid `any` types** unless absolutely necessary (project instruction)
+6. **Must include proper TypeScript declarations** (`"declaration": true`)
+7. **Must use Node.js >= 20.0.0** (engine requirement)
+8. **Must follow the standard package structure** with src/, dist/, proper exports
+
+## Development Workflow
+
+1. **Development**: Use `tsx` to run source files directly (no build needed)
+2. **Type Checking**: `npm run check` works across all packages
+3. **Building**: Sequential builds respect dependency order
+4. **Publishing**: Automatic version sync and cross-package dependency updates
+5. **Testing**: Each package can have its own test suite
+
+This monorepo is well-structured for maintaining multiple related packages with clean dependency management and automated version synchronization.
+
+## Detailed Findings: Unified AI API Requirements Based on Current pi-agent Usage
+
+After thoroughly analyzing the existing agent package (`/Users/badlogic/workspaces/pi-mono/packages/agent`), here are the comprehensive requirements for a unified AI API based on current usage patterns:
+
+### **1. Core API Structure & Event System**
+
+**Current Pattern:**
+- Event-driven architecture using `AgentEvent` types
+- Single `AgentEventReceiver` interface for all output handling
+- Support for both single-shot and interactive modes
+
+**Required API Features:**
+```typescript
+type AgentEvent = 
+  | { type: "session_start"; sessionId: string; model: string; api: string; baseURL: string; systemPrompt: string }
+  | { type: "assistant_start" }
+  | { type: "reasoning"; text: string }
+  | { type: "tool_call"; toolCallId: string; name: string; args: string }
+  | { type: "tool_result"; toolCallId: string; result: string; isError: boolean }
+  | { type: "assistant_message"; text: string }
+  | { type: "error"; message: string }
+  | { type: "user_message"; text: string }
+  | { type: "interrupted" }
+  | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number; cacheReadTokens: number; cacheWriteTokens: number; reasoningTokens: number }
+```
+
+### **2. OpenAI API Integration Patterns**
+
+**Current Implementation:**
+- Uses OpenAI SDK v5.12.2 (`import OpenAI from "openai"`)
+- Supports both Chat Completions (`/v1/chat/completions`) and Responses API (`/v1/responses`)
+- Provider detection based on base URL patterns
+
+**Provider Support Required:**
+```typescript
+// Detected providers based on baseURL patterns
+type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other"
+
+// Provider-specific configurations
+interface ProviderConfig {
+  openai: { reasoning_effort: "minimal" | "low" | "medium" | "high" }
+  gemini: { extra_body: { google: { thinking_config: { thinking_budget: number, include_thoughts: boolean } } } }
+  groq: { reasoning_format: "parsed", reasoning_effort: string }
+  openrouter: { reasoning: { effort: "low" | "medium" | "high" } }
+}
+```
+
+### **3. Streaming vs Non-Streaming**
+
+**Current Status:**
+- **No streaming currently implemented** - uses standard request/response
+- All API calls are non-streaming: `await client.chat.completions.create()` and `await client.responses.create()`
+- Events are emitted synchronously after full response
+
+**Streaming Requirements for Unified API:**
+- Support for streaming responses with partial content updates
+- Event-driven streaming with `assistant_message_delta` events
+- Proper handling of tool call streaming
+- Reasoning token streaming for supported models
+
+### **4. Tool Calling Architecture**
+
+**Current Implementation:**
+```typescript
+// Tool definitions for both APIs
+toolsForResponses: Array<{type: "function", name: string, description: string, parameters: object}>
+toolsForChat: ChatCompletionTool[]
+
+// Tool execution with abort support
+async function executeTool(name: string, args: string, signal?: AbortSignal): Promise<string>
+
+// Built-in tools: read, list, bash, glob, rg (ripgrep)
+```
+
+**Unified API Requirements:**
+- Automatic tool format conversion between Chat Completions and Responses API
+- Built-in tools with filesystem and shell access
+- Custom tool registration capability
+- Tool execution with proper abort/interrupt handling
+- Tool result streaming for long-running operations
+
+### **5. Message Structure Handling**
+
+**Current Pattern:**
+- Dual message format support based on API type
+- Automatic conversion between formats in `setEvents()` method
+
+**Chat Completions Format:**
+```typescript
+{ role: "system" | "user" | "assistant" | "tool", content: string, tool_calls?: any[] }
+```
+
+**Responses API Format:**
+```typescript
+{ type: "message" | "function_call" | "function_call_output", content: any[] }
+```
+
+### **6. Session Persistence System**
+
+**Current Implementation:**
+```typescript
+interface SessionData {
+  config: AgentConfig
+  events: SessionEvent[]
+  totalUsage: TokenUsage
+}
+
+// File-based persistence in ~/.pi/sessions/
+// JSONL format with session headers and event entries
+// Automatic session continuation support
+```
+
+**Requirements:**
+- Directory-based session organization
+- Event replay capability for session restoration
+- Cumulative token usage tracking
+- Session metadata (config, timestamps, working directory)
+
+### **7. Token Counting & Usage Tracking**
+
+**Current Implementation:**
+```typescript
+interface TokenUsage {
+  inputTokens: number
+  outputTokens: number  
+  totalTokens: number
+  cacheReadTokens: number
+  cacheWriteTokens: number
+  reasoningTokens: number  // For o1/o3 and reasoning models
+}
+```
+
+**Provider-Specific Token Mapping:**
+- OpenAI: `prompt_tokens`, `completion_tokens`, `cached_tokens`, `reasoning_tokens`
+- Responses API: `input_tokens`, `output_tokens`, `cached_tokens`, `reasoning_tokens`
+- Cumulative tracking across conversations
+
+### **8. Abort/Interrupt Handling**
+
+**Current Pattern:**
+```typescript
+class Agent {
+  private abortController: AbortController | null = null
+  
+  async ask(message: string) {
+    this.abortController = new AbortController()
+    // Pass signal to all API calls and tool executions
+  }
+  
+  interrupt(): void {
+    this.abortController?.abort()
+  }
+}
+```
+
+**Requirements:**
+- AbortController integration for all async operations
+- Graceful interruption of API calls, tool execution, and streaming
+- Proper cleanup and "interrupted" event emission
+- Signal propagation to nested operations
+
+### **9. Reasoning/Thinking Support**
+
+**Current Implementation:**
+```typescript
+// Provider-specific reasoning extraction
+function parseReasoningFromMessage(message: any, baseURL?: string): {
+  cleanContent: string
+  reasoningTexts: string[]
+}
+
+// Automatic reasoning support detection
+async function checkReasoningSupport(client, model, api, baseURL, signal): Promise<boolean>
+```
+
+**Provider Support:**
+- **OpenAI o1/o3**: Full thinking content via Responses API
+- **Groq GPT-OSS**: Reasoning via `reasoning_format: "parsed"`  
+- **Gemini 2.5**: Thinking content via `<thought>` tags
+- **OpenRouter**: Model-dependent reasoning support
+
+### **10. Error Handling Patterns**
+
+**Current Approach:**
+- Try/catch blocks around all API calls
+- Error events emitted through event system
+- Specific error handling for reasoning model failures
+- Provider-specific error interpretation
+
+### **11. Configuration Management**
+
+**Current Structure:**
+```typescript
+interface AgentConfig {
+  apiKey: string
+  baseURL: string
+  model: string
+  api: "completions" | "responses"
+  systemPrompt: string
+}
+```
+
+**Provider Detection:**
+```typescript
+function detectProvider(baseURL?: string): Provider {
+  // URL pattern matching for automatic provider configuration
+}
+```
+
+### **12. Output Rendering System**
+
+**Current Renderers:**
+- **ConsoleRenderer**: Terminal output with animations, token display
+- **TuiRenderer**: Full interactive TUI with pi-tui integration
+- **JsonRenderer**: JSONL event stream output
+
+**Requirements:**
+- Event-based rendering architecture
+- Real-time token usage display
+- Loading animations for async operations
+- Markdown rendering support
+- Tool execution progress indication
+
+### **Summary: Key Unified API Requirements**
+
+1. **Event-driven architecture** with standardized event types
+2. **Dual API support** (Chat Completions + Responses API) with automatic format conversion
+3. **Provider abstraction** with automatic detection and configuration
+4. **Comprehensive tool system** with abort support and built-in tools
+5. **Session persistence** with event replay and token tracking
+6. **Reasoning/thinking support** across multiple providers
+7. **Interrupt handling** with AbortController integration
+8. **Token usage tracking** with provider-specific mapping
+9. **Flexible rendering** through event receiver pattern
+10. **Configuration management** with provider-specific settings
+
+The unified API should maintain this event-driven, provider-agnostic approach while adding streaming capabilities and enhanced tool execution features that the current implementation lacks.
\ No newline at end of file
diff --git a/todos/done/20250817-183528-ai-unified-api-package.md b/todos/done/20250817-183528-ai-unified-api-package.md
new file mode 100644
index 00000000..eca2bbc1
--- /dev/null
+++ b/todos/done/20250817-183528-ai-unified-api-package.md
@@ -0,0 +1,46 @@
+# Create AI Package with Unified API
+
+**Status:** Done
+**Agent PID:** 10965
+
+## Original Todo
+ai: create a new package ai (package name @mariozechner/ai) which implements a common api for the openai, anthropic, and google gemini apis
+    - look at the other packages and how they are set up, mirror that setup for ai
+    - install the latest version of each dependency via npm in the ai package
+        - openai@5.12.2
+        - @anthropic-ai/sdk@0.60.0
+        - @google/genai@1.14.0
+    - investigate the APIs in their respective node_modules folder so you understand how to use them. specifically, we need to understand how to:
+        - stream responses, including reasoning/thinking tokens and tool calls
+        - abort requests
+        - handle errors
+        - handle stop reasons
+        - maintain the context (message history) such that it can be serialized in a uniform format to disk, and deserialized again later and used with the other api
+        - count tokens (input, output, cached read, cached write)
+        - enable caching
+    - Create a plan.md in the ai package that details how the unified API on top of all three could look like. we want the most minimal api possible, which allows serialization/deserialization, turning on/off reasoning/thinking, and handle system prompt and tool specifications
+
+## Description
+Create the initial package scaffold for @mariozechner/ai following the established monorepo patterns, install the required dependencies (openai, anthropic, google genai SDKs), and create a plan.md file that details the unified API design for all three providers.
+
+*Read [analysis.md](./analysis.md) in full for detailed codebase research and context*
+
+## Implementation Plan
+- [x] Create package directory structure at packages/ai/
+- [x] Create package.json with proper configuration following monorepo patterns
+- [x] Create tsconfig.build.json for build configuration
+- [x] Create initial src/index.ts file
+- [x] Add package to root tsconfig.json path mappings
+- [x] Update root package.json build script to include ai package
+- [x] Install dependencies: openai@5.12.2, @anthropic-ai/sdk@0.60.0, @google/genai@1.14.0
+- [x] Create README.md with package description
+- [x] Create plan.md detailing the unified API design
+- [x] Investigate OpenAI, Anthropic, and Gemini APIs in detail
+- [x] Document implementation details for each API
+- [x] Update todos/project-description.md with "How to Create a New Package" section
+- [x] Update todos/project-description.md Testing section to reflect that tui has Node.js built-in tests
+- [x] Run npm install from root to link everything
+- [x] Verify package builds correctly with npm run build
+
+## Notes
+[Implementation notes]
\ No newline at end of file
diff --git a/todos/project-description.md b/todos/project-description.md
index c75c7237..b888a76b 100644
--- a/todos/project-description.md
+++ b/todos/project-description.md
@@ -39,4 +39,98 @@ A comprehensive toolkit for managing Large Language Model (LLM) deployments and
 - Publish: `npm run publish`
 
 ## Testing
-Currently no formal testing framework is configured. Test infrastructure exists but no actual test files or framework dependencies are present.
\ No newline at end of file
+The TUI package includes comprehensive tests using Node.js built-in test framework:
+- Unit tests in `packages/tui/test/*.test.ts`
+- Test runner: `node --test --import tsx test/*.test.ts`
+- Virtual terminal for TUI testing via `@xterm/headless`
+- Example applications for manual testing
+
+## How to Create a New Package
+
+Follow these steps to add a new package to the monorepo:
+
+1. **Create package directory structure:**
+   ```bash
+   mkdir -p packages/your-package/src
+   ```
+
+2. **Create package.json:**
+   ```json
+   {
+     "name": "@mariozechner/your-package",
+     "version": "0.5.8",
+     "description": "Package description",
+     "type": "module",
+     "main": "./dist/index.js",
+     "types": "./dist/index.d.ts",
+     "files": ["dist", "README.md"],
+     "scripts": {
+       "clean": "rm -rf dist",
+       "build": "tsc -p tsconfig.build.json",
+       "check": "biome check --write .",
+       "prepublishOnly": "npm run clean && npm run build"
+     },
+     "dependencies": {},
+     "devDependencies": {},
+     "keywords": ["relevant", "keywords"],
+     "author": "Mario Zechner",
+     "license": "MIT",
+     "repository": {
+       "type": "git",
+       "url": "git+https://github.com/badlogic/pi-mono.git",
+       "directory": "packages/your-package"
+     },
+     "engines": {
+       "node": ">=20.0.0"
+     }
+   }
+   ```
+
+3. **Create tsconfig.build.json:**
+   ```json
+   {
+     "extends": "../../tsconfig.base.json",
+     "compilerOptions": {
+       "outDir": "./dist",
+       "rootDir": "./src"
+     },
+     "include": ["src/**/*"],
+     "exclude": ["node_modules", "dist"]
+   }
+   ```
+
+4. **Create src/index.ts:**
+   ```typescript
+   // Main exports for your package
+   export const version = "0.5.8";
+   ```
+
+5. **Update root tsconfig.json paths:**
+   Add your package to the `paths` mapping in the correct dependency order:
+   ```json
+   "paths": {
+     "@mariozechner/pi-tui": ["./packages/tui/src/index.ts"],
+     "@mariozechner/your-package": ["./packages/your-package/src/index.ts"],
+     // ... other packages
+   }
+   ```
+
+6. **Update root package.json build script:**
+   Insert your package in the correct dependency order:
+   ```json
+   "build": "npm run build -w @mariozechner/pi-tui && npm run build -w @mariozechner/your-package && ..."
+   ```
+
+7. **Install and verify:**
+   ```bash
+   npm install
+   npm run build
+   npm run check
+   ```
+
+**Important Notes:**
+- All packages use lockstep versioning (same version number)
+- Follow dependency order: foundational packages build first
+- Use ESM modules (`"type": "module"`)
+- No `any` types unless absolutely necessary
+- Include README.md with package documentation
\ No newline at end of file
diff --git a/tsconfig.json b/tsconfig.json
index 311ac672..464072cb 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -4,6 +4,7 @@
 		"noEmit": true,
 		"paths": {
 			"@mariozechner/pi-tui": ["./packages/tui/src/index.ts"],
+			"@mariozechner/pi-ai": ["./packages/ai/src/index.ts"],
 			"@mariozechner/pi-agent": ["./packages/agent/src/index.ts"],
 			"@mariozechner/pi": ["./packages/pods/src/index.ts"]
 		}