diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts
index ac97f082..99ed76f6 100644
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@@ -149,7 +149,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
-			api: "anthropic-messages" as Api,
+			api: model.api as Api,
 			provider: model.provider,
 			model: model.id,
 			usage: {
diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts
index 4da13b5a..ba93e834 100644
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@@ -40,7 +40,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses", OpenAIRes
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
-			api: "openai-responses" as Api,
+			api: model.api as Api,
 			provider: model.provider,
 			model: model.id,
 			usage: {
diff --git a/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/index.ts b/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/index.ts
index 5d60c1cc..73138ec1 100644
--- a/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/index.ts
+++ b/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/index.ts
@@ -5,16 +5,8 @@
  * Delegates to pi-ai's built-in Anthropic and OpenAI streaming implementations.
  *
  * Usage:
- *   # First install dependencies
- *   cd packages/coding-agent/examples/extensions/gitlab-duo && npm install
- *
- *   # With OAuth (run /login gitlab-duo first)
- *   pi -e ./packages/coding-agent/examples/extensions/gitlab-duo
- *
- *   # With PAT
- *   GITLAB_TOKEN=glpat-... pi -e ./packages/coding-agent/examples/extensions/gitlab-duo
- *
- * Then use /model to select gitlab-duo/duo-chat-sonnet-4-5
+ *   pi -e ./packages/coding-agent/examples/extensions/custom-provider-gitlab-duo
+ *   # Then /login gitlab-duo, or set GITLAB_TOKEN=glpat-...
  */
 
 import {
@@ -26,7 +18,8 @@ import {
 	type OAuthCredentials,
 	type OAuthLoginCallbacks,
 	type SimpleStreamOptions,
-	streamSimple,
+	streamSimpleAnthropic,
+	streamSimpleOpenAIResponses,
 } from "@mariozechner/pi-ai";
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 
@@ -39,42 +32,101 @@ const AI_GATEWAY_URL = "https://cloud.gitlab.com";
 const ANTHROPIC_PROXY_URL = `${AI_GATEWAY_URL}/ai/v1/proxy/anthropic/`;
 const OPENAI_PROXY_URL = `${AI_GATEWAY_URL}/ai/v1/proxy/openai/v1`;
 
-// Bundled OAuth client ID for gitlab.com (from opencode-gitlab-auth, registered with localhost redirect)
 const BUNDLED_CLIENT_ID = "1d89f9fdb23ee96d4e603201f6861dab6e143c5c3c00469a018a2d94bdc03d4e";
 const OAUTH_SCOPES = ["api"];
 const REDIRECT_URI = "http://127.0.0.1:8080/callback";
-
-// Direct access token cache (25 min, tokens expire after 30 min)
 const DIRECT_ACCESS_TTL = 25 * 60 * 1000;
 
-// Model mappings: duo model ID -> backend config
-type OpenAIApi = "openai-completions" | "openai-responses";
-const MODEL_MAPPINGS: Record<string, { api: "anthropic-messages" | OpenAIApi; backendModel: string; baseUrl: string }> =
+// =============================================================================
+// Models - exported for use by tests
+// =============================================================================
+
+type Backend = "anthropic" | "openai";
+
+interface GitLabModel {
+	id: string;
+	name: string;
+	backend: Backend;
+	baseUrl: string;
+	reasoning: boolean;
+	input: ("text" | "image")[];
+	cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
+	contextWindow: number;
+	maxTokens: number;
+}
+
+export const MODELS: GitLabModel[] = [
+	// Anthropic
 	{
-		"duo-chat-opus-4-5": {
-			api: "anthropic-messages",
-			backendModel: "claude-opus-4-5-20251101",
-			baseUrl: ANTHROPIC_PROXY_URL,
-		},
-		"duo-chat-sonnet-4-5": {
-			api: "anthropic-messages",
-			backendModel: "claude-sonnet-4-5-20250929",
-			baseUrl: ANTHROPIC_PROXY_URL,
-		},
-		"duo-chat-haiku-4-5": {
-			api: "anthropic-messages",
-			backendModel: "claude-haiku-4-5-20251001",
-			baseUrl: ANTHROPIC_PROXY_URL,
-		},
-		// All GPT models use Responses API for consistent tool call ID format across model switches
-		"duo-chat-gpt-5-1": { api: "openai-responses", backendModel: "gpt-5.1-2025-11-13", baseUrl: OPENAI_PROXY_URL },
-		"duo-chat-gpt-5-mini": {
-			api: "openai-responses",
-			backendModel: "gpt-5-mini-2025-08-07",
-			baseUrl: OPENAI_PROXY_URL,
-		},
-		"duo-chat-gpt-5-codex": { api: "openai-responses", backendModel: "gpt-5-codex", baseUrl: OPENAI_PROXY_URL },
-	};
+		id: "claude-opus-4-5-20251101",
+		name: "Claude Opus 4.5",
+		backend: "anthropic",
+		baseUrl: ANTHROPIC_PROXY_URL,
+		reasoning: true,
+		input: ["text", "image"],
+		cost: { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 },
+		contextWindow: 200000,
+		maxTokens: 32000,
+	},
+	{
+		id: "claude-sonnet-4-5-20250929",
+		name: "Claude Sonnet 4.5",
+		backend: "anthropic",
+		baseUrl: ANTHROPIC_PROXY_URL,
+		reasoning: true,
+		input: ["text", "image"],
+		cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+		contextWindow: 200000,
+		maxTokens: 16384,
+	},
+	{
+		id: "claude-haiku-4-5-20251001",
+		name: "Claude Haiku 4.5",
+		backend: "anthropic",
+		baseUrl: ANTHROPIC_PROXY_URL,
+		reasoning: true,
+		input: ["text", "image"],
+		cost: { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 },
+		contextWindow: 200000,
+		maxTokens: 8192,
+	},
+	// OpenAI (all use Responses API)
+	{
+		id: "gpt-5.1-2025-11-13",
+		name: "GPT-5.1",
+		backend: "openai",
+		baseUrl: OPENAI_PROXY_URL,
+		reasoning: true,
+		input: ["text", "image"],
+		cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 16384,
+	},
+	{
+		id: "gpt-5-mini-2025-08-07",
+		name: "GPT-5 Mini",
+		backend: "openai",
+		baseUrl: OPENAI_PROXY_URL,
+		reasoning: true,
+		input: ["text", "image"],
+		cost: { input: 0.15, output: 0.6, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 16384,
+	},
+	{
+		id: "gpt-5-codex",
+		name: "GPT-5 Codex",
+		backend: "openai",
+		baseUrl: OPENAI_PROXY_URL,
+		reasoning: true,
+		input: ["text", "image"],
+		cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 16384,
+	},
+];
+
+const MODEL_MAP = new Map(MODELS.map((m) => [m.id, m]));
 
 // =============================================================================
 // Direct Access Token Cache
@@ -94,13 +146,9 @@ async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAc
 		return cachedDirectAccess;
 	}
 
-	const url = `${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`;
-	const response = await fetch(url, {
+	const response = await fetch(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
 		method: "POST",
-		headers: {
-			Authorization: `Bearer ${gitlabAccessToken}`,
-			"Content-Type": "application/json",
-		},
+		headers: { Authorization: `Bearer ${gitlabAccessToken}`, "Content-Type": "application/json" },
 		body: JSON.stringify({ feature_flags: { DuoAgentPlatformNext: true } }),
 	});
 
@@ -115,11 +163,7 @@ async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAc
 	}
 
 	const data = (await response.json()) as { token: string; headers: Record<string, string> };
-	cachedDirectAccess = {
-		token: data.token,
-		headers: data.headers,
-		expiresAt: now + DIRECT_ACCESS_TTL,
-	};
+	cachedDirectAccess = { token: data.token, headers: data.headers, expiresAt: now + DIRECT_ACCESS_TTL };
 	return cachedDirectAccess;
 }
 
@@ -128,7 +172,7 @@ function invalidateDirectAccessToken() {
 }
 
 // =============================================================================
-// OAuth Implementation
+// OAuth
 // =============================================================================
 
 async function generatePKCE(): Promise<{ verifier: string; challenge: string }> {
@@ -138,21 +182,16 @@ async function generatePKCE(): Promise<{ verifier: string; challenge: string }>
 		.replace(/\+/g, "-")
 		.replace(/\//g, "_")
 		.replace(/=+$/, "");
-
-	const encoder = new TextEncoder();
-	const data = encoder.encode(verifier);
-	const hash = await crypto.subtle.digest("SHA-256", data);
+	const hash = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(verifier));
 	const challenge = btoa(String.fromCharCode(...new Uint8Array(hash)))
 		.replace(/\+/g, "-")
 		.replace(/\//g, "_")
 		.replace(/=+$/, "");
-
 	return { verifier, challenge };
 }
 
 async function loginGitLab(callbacks: OAuthLoginCallbacks): Promise<OAuthCredentials> {
 	const { verifier, challenge } = await generatePKCE();
-
 	const authParams = new URLSearchParams({
 		client_id: BUNDLED_CLIENT_ID,
 		redirect_uri: REDIRECT_URI,
@@ -165,9 +204,7 @@ async function loginGitLab(callbacks: OAuthLoginCallbacks): Promise<OAuthCredent
 
 	callbacks.onAuth({ url: `${GITLAB_COM_URL}/oauth/authorize?${authParams.toString()}` });
 	const callbackUrl = await callbacks.onPrompt({ message: "Paste the callback URL:" });
-
-	const urlObj = new URL(callbackUrl);
-	const code = urlObj.searchParams.get("code");
+	const code = new URL(callbackUrl).searchParams.get("code");
 	if (!code) throw new Error("No authorization code found in callback URL");
 
 	const tokenResponse = await fetch(`${GITLAB_COM_URL}/oauth/token`, {
@@ -183,14 +220,12 @@ async function loginGitLab(callbacks: OAuthLoginCallbacks): Promise<OAuthCredent
 	});
 
 	if (!tokenResponse.ok) throw new Error(`Token exchange failed: ${await tokenResponse.text()}`);
-
 	const data = (await tokenResponse.json()) as {
 		access_token: string;
 		refresh_token: string;
 		expires_in: number;
 		created_at: number;
 	};
-
 	invalidateDirectAccessToken();
 	return {
 		refresh: data.refresh_token,
@@ -209,16 +244,13 @@ async function refreshGitLabToken(credentials: OAuthCredentials): Promise<OAuthC
 			refresh_token: credentials.refresh,
 		}).toString(),
 	});
-
 	if (!response.ok) throw new Error(`Token refresh failed: ${await response.text()}`);
-
 	const data = (await response.json()) as {
 		access_token: string;
 		refresh_token: string;
 		expires_in: number;
 		created_at: number;
 	};
-
 	invalidateDirectAccessToken();
 	return {
 		refresh: data.refresh_token,
@@ -228,10 +260,10 @@ async function refreshGitLabToken(credentials: OAuthCredentials): Promise<OAuthC
 }
 
 // =============================================================================
-// Main Stream Function - Delegates to pi-ai's built-in implementations
+// Stream Function
 // =============================================================================
 
-function streamGitLabDuo(
+export function streamGitLabDuo(
 	model: Model<Api>,
 	context: Context,
 	options?: SimpleStreamOptions,
@@ -241,57 +273,22 @@ function streamGitLabDuo(
 	(async () => {
 		try {
 			const gitlabAccessToken = options?.apiKey;
-			if (!gitlabAccessToken) {
-				throw new Error("No GitLab access token. Run /login gitlab-duo or set GITLAB_TOKEN");
-			}
+			if (!gitlabAccessToken) throw new Error("No GitLab access token. Run /login gitlab-duo or set GITLAB_TOKEN");
 
-			const mapping = MODEL_MAPPINGS[model.id];
-			if (!mapping) throw new Error(`Unknown model: ${model.id}`);
+			const cfg = MODEL_MAP.get(model.id);
+			if (!cfg) throw new Error(`Unknown model: ${model.id}`);
 
-			// Get direct access token (cached)
 			const directAccess = await getDirectAccessToken(gitlabAccessToken);
+			const modelWithBaseUrl = { ...model, baseUrl: cfg.baseUrl };
+			const headers = { ...directAccess.headers, Authorization: `Bearer ${directAccess.token}` };
+			const streamOptions = { ...options, apiKey: "gitlab-duo", headers };
 
-			// Create a proxy model that uses the backend API
-			const proxyModel: Model<typeof mapping.api> = {
-				...model,
-				id: mapping.backendModel,
-				api: mapping.api,
-				baseUrl: mapping.baseUrl,
-			};
+			const innerStream =
+				cfg.backend === "anthropic"
+					? streamSimpleAnthropic(modelWithBaseUrl as Model<"anthropic-messages">, context, streamOptions)
+					: streamSimpleOpenAIResponses(modelWithBaseUrl as Model<"openai-responses">, context, streamOptions);
 
-			// Merge GitLab headers with Authorization bearer token
-			const headers = {
-				...directAccess.headers,
-				Authorization: `Bearer ${directAccess.token}`,
-			};
-
-			// Delegate to pi-ai's built-in streaming
-			const innerStream = streamSimple(proxyModel, context, {
-				...options,
-				apiKey: "gitlab-duo", // Dummy value to pass validation
-				headers,
-			});
-
-			// Forward all events
-			for await (const event of innerStream) {
-				// Patch the model info back to gitlab-duo
-				if ("partial" in event && event.partial) {
-					event.partial.api = model.api;
-					event.partial.provider = model.provider;
-					event.partial.model = model.id;
-				}
-				if ("message" in event && event.message) {
-					event.message.api = model.api;
-					event.message.provider = model.provider;
-					event.message.model = model.id;
-				}
-				if ("error" in event && event.error) {
-					event.error.api = model.api;
-					event.error.provider = model.provider;
-					event.error.model = model.id;
-				}
-				stream.push(event);
-			}
+			for await (const event of innerStream) stream.push(event);
 			stream.end();
 		} catch (error) {
 			stream.push({
@@ -332,73 +329,21 @@ export default function (pi: ExtensionAPI) {
 		baseUrl: AI_GATEWAY_URL,
 		apiKey: "GITLAB_TOKEN",
 		api: "gitlab-duo-api",
-
-		models: [
-			// Anthropic models
-			{
-				id: "duo-chat-opus-4-5",
-				name: "GitLab Duo Claude Opus 4.5",
-				reasoning: false,
-				input: ["text"],
-				cost: { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 },
-				contextWindow: 200000,
-				maxTokens: 32000,
-			},
-			{
-				id: "duo-chat-sonnet-4-5",
-				name: "GitLab Duo Claude Sonnet 4.5",
-				reasoning: false,
-				input: ["text"],
-				cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
-				contextWindow: 200000,
-				maxTokens: 16384,
-			},
-			{
-				id: "duo-chat-haiku-4-5",
-				name: "GitLab Duo Claude Haiku 4.5",
-				reasoning: false,
-				input: ["text"],
-				cost: { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 },
-				contextWindow: 200000,
-				maxTokens: 8192,
-			},
-			// OpenAI models
-			{
-				id: "duo-chat-gpt-5-1",
-				name: "GitLab Duo GPT-5.1",
-				reasoning: false,
-				input: ["text"],
-				cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
-				contextWindow: 128000,
-				maxTokens: 16384,
-			},
-			{
-				id: "duo-chat-gpt-5-mini",
-				name: "GitLab Duo GPT-5 Mini",
-				reasoning: false,
-				input: ["text"],
-				cost: { input: 0.15, output: 0.6, cacheRead: 0, cacheWrite: 0 },
-				contextWindow: 128000,
-				maxTokens: 16384,
-			},
-			{
-				id: "duo-chat-gpt-5-codex",
-				name: "GitLab Duo GPT-5 Codex",
-				reasoning: false,
-				input: ["text"],
-				cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
-				contextWindow: 128000,
-				maxTokens: 16384,
-			},
-		],
-
+		models: MODELS.map(({ id, name, reasoning, input, cost, contextWindow, maxTokens }) => ({
+			id,
+			name,
+			reasoning,
+			input,
+			cost,
+			contextWindow,
+			maxTokens,
+		})),
 		oauth: {
 			name: "GitLab Duo",
 			login: loginGitLab,
 			refreshToken: refreshGitLabToken,
 			getApiKey: (cred) => cred.access,
 		},
-
 		streamSimple: streamGitLabDuo,
 	});
 }
diff --git a/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/test.ts b/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/test.ts
index 8ed48545..ec1f60ba 100644
--- a/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/test.ts
+++ b/packages/coding-agent/examples/extensions/custom-provider-gitlab-duo/test.ts
@@ -1,379 +1,81 @@
 /**
- * Test script for GitLab Duo - full streaming flow
- * Run: npx tsx test.ts [model-id]
+ * Test script for GitLab Duo extension
+ * Run: npx tsx test.ts [model-id] [--thinking]
  *
  * Examples:
- *   npx tsx test.ts                      # Test default (duo-chat-sonnet-4-5)
- *   npx tsx test.ts duo-chat-gpt-5-codex # Test GPT-5 Codex (Responses API)
- *   npx tsx test.ts duo-chat-gpt-5-1     # Test GPT-5.1 (Chat Completions API)
+ *   npx tsx test.ts                              # Test default (claude-sonnet-4-5-20250929)
+ *   npx tsx test.ts gpt-5-codex                  # Test GPT-5 Codex
+ *   npx tsx test.ts claude-sonnet-4-5-20250929 --thinking
  */
 
-import {
-	type Api,
-	type AssistantMessageEventStream,
-	type Context,
-	createAssistantMessageEventStream,
-	type Model,
-	registerApiProvider,
-	type SimpleStreamOptions,
-	streamSimple,
-} from "@mariozechner/pi-ai";
+import { type Api, type Context, type Model, registerApiProvider, streamSimple } from "@mariozechner/pi-ai";
 import { readFileSync } from "fs";
 import { homedir } from "os";
 import { join } from "path";
+import { MODELS, streamGitLabDuo } from "./index.js";
 
-// =============================================================================
-// Constants (copied from index.ts)
-// =============================================================================
-
-const GITLAB_COM_URL = "https://gitlab.com";
-const AI_GATEWAY_URL = "https://cloud.gitlab.com";
-const ANTHROPIC_PROXY_URL = `${AI_GATEWAY_URL}/ai/v1/proxy/anthropic/`;
-const OPENAI_PROXY_URL = `${AI_GATEWAY_URL}/ai/v1/proxy/openai/v1`;
-const DIRECT_ACCESS_TTL = 25 * 60 * 1000;
-
-type OpenAIApi = "openai-completions" | "openai-responses";
-const MODEL_MAPPINGS: Record<string, { api: "anthropic-messages" | OpenAIApi; backendModel: string; baseUrl: string }> =
-	{
-		"duo-chat-opus-4-5": {
-			api: "anthropic-messages",
-			backendModel: "claude-opus-4-5-20251101",
-			baseUrl: ANTHROPIC_PROXY_URL,
-		},
-		"duo-chat-sonnet-4-5": {
-			api: "anthropic-messages",
-			backendModel: "claude-sonnet-4-5-20250929",
-			baseUrl: ANTHROPIC_PROXY_URL,
-		},
-		"duo-chat-haiku-4-5": {
-			api: "anthropic-messages",
-			backendModel: "claude-haiku-4-5-20251001",
-			baseUrl: ANTHROPIC_PROXY_URL,
-		},
-		// All GPT models use Responses API for consistent tool call ID format across model switches
-		"duo-chat-gpt-5-1": { api: "openai-responses", backendModel: "gpt-5.1-2025-11-13", baseUrl: OPENAI_PROXY_URL },
-		"duo-chat-gpt-5-mini": {
-			api: "openai-responses",
-			backendModel: "gpt-5-mini-2025-08-07",
-			baseUrl: OPENAI_PROXY_URL,
-		},
-		"duo-chat-gpt-5-codex": { api: "openai-responses", backendModel: "gpt-5-codex", baseUrl: OPENAI_PROXY_URL },
-	};
-
-// Model definitions for cost tracking
-const MODEL_DEFS: Record<
-	string,
-	{
-		name: string;
-		cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
-		contextWindow: number;
-		maxTokens: number;
-	}
-> = {
-	"duo-chat-opus-4-5": {
-		name: "GitLab Duo Claude Opus 4.5",
-		cost: { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 },
-		contextWindow: 200000,
-		maxTokens: 32000,
-	},
-	"duo-chat-sonnet-4-5": {
-		name: "GitLab Duo Claude Sonnet 4.5",
-		cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
-		contextWindow: 200000,
-		maxTokens: 16384,
-	},
-	"duo-chat-haiku-4-5": {
-		name: "GitLab Duo Claude Haiku 4.5",
-		cost: { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 },
-		contextWindow: 200000,
-		maxTokens: 8192,
-	},
-	"duo-chat-gpt-5-1": {
-		name: "GitLab Duo GPT-5.1",
-		cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
-		contextWindow: 128000,
-		maxTokens: 16384,
-	},
-	"duo-chat-gpt-5-mini": {
-		name: "GitLab Duo GPT-5 Mini",
-		cost: { input: 0.15, output: 0.6, cacheRead: 0, cacheWrite: 0 },
-		contextWindow: 128000,
-		maxTokens: 16384,
-	},
-	"duo-chat-gpt-5-codex": {
-		name: "GitLab Duo GPT-5 Codex",
-		cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
-		contextWindow: 128000,
-		maxTokens: 16384,
-	},
-};
-
-// =============================================================================
-// Direct Access Token
-// =============================================================================
-
-interface DirectAccessToken {
-	token: string;
-	headers: Record<string, string>;
-	expiresAt: number;
-}
-
-let cachedDirectAccess: DirectAccessToken | null = null;
-
-async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAccessToken> {
-	const now = Date.now();
-	if (cachedDirectAccess && cachedDirectAccess.expiresAt > now) {
-		return cachedDirectAccess;
-	}
-
-	const url = `${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`;
-	console.log("Fetching direct access token from:", url);
-
-	const response = await fetch(url, {
-		method: "POST",
-		headers: {
-			Authorization: `Bearer ${gitlabAccessToken}`,
-			"Content-Type": "application/json",
-		},
-		body: JSON.stringify({ feature_flags: { DuoAgentPlatformNext: true } }),
-	});
-
-	if (!response.ok) {
-		const errorText = await response.text();
-		throw new Error(`Failed to get direct access token: ${response.status} ${errorText}`);
-	}
-
-	const data = (await response.json()) as { token: string; headers: Record<string, string> };
-	console.log("Got direct access token");
-	cachedDirectAccess = {
-		token: data.token,
-		headers: data.headers,
-		expiresAt: now + DIRECT_ACCESS_TTL,
-	};
-	return cachedDirectAccess;
-}
-
-// =============================================================================
-// Stream Function (copied from index.ts)
-// =============================================================================
-
-function streamGitLabDuo(
-	model: Model<Api>,
-	context: Context,
-	options?: SimpleStreamOptions,
-): AssistantMessageEventStream {
-	const stream = createAssistantMessageEventStream();
-
-	(async () => {
-		try {
-			const gitlabAccessToken = options?.apiKey;
-			console.log("streamGitLabDuo called");
-			console.log("  model.id:", model.id);
-			console.log("  options.apiKey present:", !!gitlabAccessToken);
-
-			if (!gitlabAccessToken) {
-				throw new Error("No GitLab access token provided in options.apiKey");
-			}
-
-			const mapping = MODEL_MAPPINGS[model.id];
-			if (!mapping) throw new Error(`Unknown model: ${model.id}`);
-
-			// Get direct access token (cached)
-			const directAccess = await getDirectAccessToken(gitlabAccessToken);
-
-			// Create a proxy model that uses the backend API
-			const proxyModel: Model<typeof mapping.api> = {
-				...model,
-				id: mapping.backendModel,
-				api: mapping.api,
-				baseUrl: mapping.baseUrl,
-			};
-
-			// Merge GitLab headers with Authorization bearer token
-			const headers = {
-				...directAccess.headers,
-				Authorization: `Bearer ${directAccess.token}`,
-			};
-
-			console.log("Calling streamSimple with proxy model:");
-			console.log("  proxyModel.id:", proxyModel.id);
-			console.log("  proxyModel.api:", proxyModel.api);
-			console.log("  proxyModel.baseUrl:", proxyModel.baseUrl);
-
-			// Delegate to pi-ai's built-in streaming
-			const innerStream = streamSimple(proxyModel, context, {
-				...options,
-				apiKey: "gitlab-duo", // Dummy value to pass validation
-				headers,
-			});
-
-			// Forward all events
-			for await (const event of innerStream) {
-				// Patch the model info back to gitlab-duo
-				if ("partial" in event && event.partial) {
-					event.partial.api = model.api;
-					event.partial.provider = model.provider;
-					event.partial.model = model.id;
-				}
-				if ("message" in event && event.message) {
-					event.message.api = model.api;
-					event.message.provider = model.provider;
-					event.message.model = model.id;
-				}
-				if ("error" in event && event.error) {
-					event.error.api = model.api;
-					event.error.provider = model.provider;
-					event.error.model = model.id;
-				}
-				stream.push(event);
-			}
-			stream.end();
-		} catch (error) {
-			console.error("Stream error:", error);
-			stream.push({
-				type: "error",
-				reason: "error",
-				error: {
-					role: "assistant",
-					content: [],
-					api: model.api,
-					provider: model.provider,
-					model: model.id,
-					usage: {
-						input: 0,
-						output: 0,
-						cacheRead: 0,
-						cacheWrite: 0,
-						totalTokens: 0,
-						cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-					},
-					stopReason: "error",
-					errorMessage: error instanceof Error ? error.message : String(error),
-					timestamp: Date.now(),
-				},
-			});
-			stream.end();
-		}
-	})();
-
-	return stream;
-}
-
-// =============================================================================
-// Main Test
-// =============================================================================
-
-interface AuthData {
-	[provider: string]: {
-		type: "oauth" | "api_key";
-		refresh?: string;
-		access?: string;
-		expires?: number;
-		key?: string;
-	};
-}
+const MODEL_MAP = new Map(MODELS.map((m) => [m.id, m]));
 
 async function main() {
-	const modelId = process.argv[2] || "duo-chat-sonnet-4-5";
+	const modelId = process.argv[2] || "claude-sonnet-4-5-20250929";
+	const useThinking = process.argv.includes("--thinking");
 
-	if (!MODEL_MAPPINGS[modelId]) {
+	const cfg = MODEL_MAP.get(modelId);
+	if (!cfg) {
 		console.error(`Unknown model: ${modelId}`);
-		console.error("Available models:", Object.keys(MODEL_MAPPINGS).join(", "));
+		console.error("Available:", MODELS.map((m) => m.id).join(", "));
 		process.exit(1);
 	}
 
-	// Read auth.json
+	// Read auth
 	const authPath = join(homedir(), ".pi", "agent", "auth.json");
-	console.log("Reading auth from:", authPath);
-
-	let authData: AuthData;
-	try {
-		authData = JSON.parse(readFileSync(authPath, "utf-8"));
-	} catch (e) {
-		console.error("Failed to read auth.json:", e);
-		process.exit(1);
-	}
-
+	const authData = JSON.parse(readFileSync(authPath, "utf-8"));
 	const gitlabCred = authData["gitlab-duo"];
-	if (!gitlabCred || gitlabCred.type !== "oauth" || !gitlabCred.access) {
-		console.error("No gitlab-duo OAuth credentials found. Run /login gitlab-duo first.");
+	if (!gitlabCred?.access) {
+		console.error("No gitlab-duo credentials. Run /login gitlab-duo first.");
 		process.exit(1);
 	}
 
-	console.log("Found gitlab-duo OAuth credentials");
-	const gitlabAccessToken = gitlabCred.access;
-
-	// Register our custom API provider
-	console.log("\nRegistering gitlab-duo-api provider...");
+	// Register provider
 	registerApiProvider({
 		api: "gitlab-duo-api" as Api,
-		stream: (model, context, options) => streamGitLabDuo(model, context, options as SimpleStreamOptions),
+		stream: streamGitLabDuo,
 		streamSimple: streamGitLabDuo,
 	});
 
-	// Create a test model
-	const modelDef = MODEL_DEFS[modelId];
-	const mapping = MODEL_MAPPINGS[modelId];
-	// Enable reasoning for Anthropic models
-	const supportsReasoning = mapping.api === "anthropic-messages";
-	const testModel: Model<Api> = {
-		id: modelId,
-		name: modelDef.name,
+	// Create model
+	const model: Model<Api> = {
+		id: cfg.id,
+		name: cfg.name,
 		api: "gitlab-duo-api" as Api,
 		provider: "gitlab-duo",
-		baseUrl: AI_GATEWAY_URL,
-		reasoning: supportsReasoning,
-		input: ["text"],
-		cost: modelDef.cost,
-		contextWindow: modelDef.contextWindow,
-		maxTokens: modelDef.maxTokens,
+		baseUrl: cfg.baseUrl,
+		reasoning: cfg.reasoning,
+		input: cfg.input,
+		cost: cfg.cost,
+		contextWindow: cfg.contextWindow,
+		maxTokens: cfg.maxTokens,
 	};
 
-	// Create test context
 	const context: Context = {
 		messages: [{ role: "user", content: "Say hello in exactly 3 words.", timestamp: Date.now() }],
 	};
 
-	// Check for --thinking flag
-	const useThinking = process.argv.includes("--thinking");
+	console.log(`Model: ${model.id}, Backend: ${cfg.backend}, Thinking: ${useThinking}`);
 
-	console.log("\nStarting stream test...");
-	console.log("Model:", testModel.id);
-	console.log("Backend:", MODEL_MAPPINGS[modelId].backendModel);
-	console.log("API:", MODEL_MAPPINGS[modelId].api);
-	console.log(
-		"Reasoning:",
-		supportsReasoning ? (useThinking ? "enabled" : "supported but not enabled") : "not supported",
-	);
-	console.log("Prompt:", context.messages[0].content);
-	console.log("");
-
-	// Call streamSimple
-	const stream = streamSimple(testModel, context, {
-		apiKey: gitlabAccessToken,
+	const stream = streamSimple(model, context, {
+		apiKey: gitlabCred.access,
 		maxTokens: 100,
-		reasoning: useThinking && supportsReasoning ? "low" : undefined,
+		reasoning: useThinking ? "low" : undefined,
 	});
 
-	// Consume the stream
-	let inThinking = false;
 	for await (const event of stream) {
-		if (event.type === "thinking_start") {
-			inThinking = true;
-			console.log("[Thinking]");
-		} else if (event.type === "thinking_delta") {
-			process.stdout.write(event.delta);
-		} else if (event.type === "thinking_end") {
-			inThinking = false;
-			console.log("\n[/Thinking]\n");
-		} else if (event.type === "text_delta") {
-			process.stdout.write(event.delta);
-		} else if (event.type === "error") {
-			console.error("\nError:", event.error.errorMessage);
-		} else if (event.type === "done") {
-			console.log("\n\nDone! Stop reason:", event.reason);
-			console.log("Usage:", event.message.usage);
-		}
+		if (event.type === "thinking_start") console.log("[Thinking]");
+		else if (event.type === "thinking_delta") process.stdout.write(event.delta);
+		else if (event.type === "thinking_end") console.log("\n[/Thinking]\n");
+		else if (event.type === "text_delta") process.stdout.write(event.delta);
+		else if (event.type === "error") console.error("\nError:", event.error.errorMessage);
+		else if (event.type === "done") console.log("\n\nDone!", event.reason, event.message.usage);
 	}
 }