Add Vertex AI provider with ADC support

- Implement google-vertex provider in packages/ai
- Support ADC (Application Default Credentials) via @google/generative-ai
- Add Gemini model catalog for Vertex AI
- Update packages/coding-agent to handle google-vertex provider
This commit is contained in:
Anton Kuzmenko 2025-12-23 23:03:19 -08:00 committed by Mario Zechner
parent d747ec6e23
commit 214e7dae15
11 changed files with 788 additions and 4 deletions

View file

@ -2,6 +2,7 @@ export * from "./models.js";
export * from "./providers/anthropic.js";
export * from "./providers/google.js";
export * from "./providers/google-gemini-cli.js";
export * from "./providers/google-vertex.js";
export * from "./providers/openai-completions.js";
export * from "./providers/openai-responses.js";
export * from "./stream.js";

View file

@ -7102,4 +7102,193 @@ export const MODELS = {
maxTokens: 131072,
} satisfies Model<"openai-completions">,
},
"google-vertex": {
"gemini-3-pro-preview": {
id: "gemini-3-pro-preview",
name: "Gemini 3 Pro Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 12,
cacheRead: 0.2,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 64000,
} satisfies Model<"google-vertex">,
"gemini-3-flash-preview": {
id: "gemini-3-flash-preview",
name: "Gemini 3 Flash Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.5,
output: 3,
cacheRead: 0.05,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-vertex">,
"gemini-2.0-flash": {
id: "gemini-2.0-flash",
name: "Gemini 2.0 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.1,
output: 0.4,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 8192,
} satisfies Model<"google-vertex">,
"gemini-2.0-flash-lite": {
id: "gemini-2.0-flash-lite",
name: "Gemini 2.0 Flash Lite (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.1,
output: 0.4,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-vertex">,
"gemini-2.5-pro": {
id: "gemini-2.5-pro",
name: "Gemini 2.5 Pro (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.31,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-vertex">,
"gemini-2.5-flash": {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.3,
output: 2.5,
cacheRead: 0.075,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-vertex">,
"gemini-2.5-flash-lite-preview-09-2025": {
id: "gemini-2.5-flash-lite-preview-09-2025",
name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.1,
output: 0.4,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-vertex">,
"gemini-2.5-flash-lite": {
id: "gemini-2.5-flash-lite",
name: "Gemini 2.5 Flash Lite (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.1,
output: 0.4,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-vertex">,
"gemini-1.5-pro": {
id: "gemini-1.5-pro",
name: "Gemini 1.5 Pro (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 1.25,
output: 5,
cacheRead: 0.3125,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 8192,
} satisfies Model<"google-vertex">,
"gemini-1.5-flash": {
id: "gemini-1.5-flash",
name: "Gemini 1.5 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.075,
output: 0.3,
cacheRead: 0.01875,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 8192,
} satisfies Model<"google-vertex">,
"gemini-1.5-flash-8b": {
id: "gemini-1.5-flash-8b",
name: "Gemini 1.5 Flash-8B (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: "https://{location}-aiplatform.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.0375,
output: 0.15,
cacheRead: 0.01,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 8192,
} satisfies Model<"google-vertex">,
},
} as const;

View file

@ -7,7 +7,7 @@ import type { Context, ImageContent, Model, StopReason, TextContent, Tool } from
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transorm-messages.js";
type GoogleApiType = "google-generative-ai" | "google-gemini-cli";
type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex";
/**
* Convert internal messages to Gemini Content[] format.
@ -73,6 +73,9 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
args: block.arguments,
},
};
if (model.provider === "google-vertex" && part?.functionCall?.id) {
delete part.functionCall.id; // Vertex AI does not support 'id' in functionCall
}
if (block.thoughtSignature) {
part.thoughtSignature = block.thoughtSignature;
}
@ -121,6 +124,10 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
},
};
if (model.provider === "google-vertex" && functionResponsePart.functionResponse?.id) {
delete functionResponsePart.functionResponse.id; // Vertex AI does not support 'id' in functionResponse
}
// Cloud Code Assist API requires all function responses to be in a single user turn.
// Check if the last content is already a user turn with function responses and merge.
const lastContent = contents[contents.length - 1];

View file

@ -0,0 +1,346 @@
import {
type GenerateContentConfig,
type GenerateContentParameters,
GoogleGenAI,
type ThinkingConfig,
type ThinkingLevel,
} from "@google/genai";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
Context,
Model,
StreamFunction,
StreamOptions,
TextContent,
ThinkingContent,
ToolCall,
} from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { convertMessages, convertTools, mapStopReason, mapToolChoice } from "./google-shared.js";
export interface GoogleVertexOptions extends StreamOptions {
toolChoice?: "auto" | "none" | "any";
thinking?: {
enabled: boolean;
budgetTokens?: number; // -1 for dynamic, 0 to disable
level?: ThinkingLevel;
};
project?: string;
location?: string;
}
const API_VERSION = "v1";
// Counter for generating unique tool call IDs
let toolCallCounter = 0;
export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
model: Model<"google-vertex">,
context: Context,
options?: GoogleVertexOptions,
): AssistantMessageEventStream => {
const stream = new AssistantMessageEventStream();
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: "google-vertex" as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
try {
const project = resolveProject(options);
const location = resolveLocation(options);
const client = createClient(model, project, location);
const params = buildParams(model, context, options);
const googleStream = await client.models.generateContentStream(params);
stream.push({ type: "start", partial: output });
let currentBlock: TextContent | ThinkingContent | null = null;
const blocks = output.content;
const blockIndex = () => blocks.length - 1;
for await (const chunk of googleStream) {
const candidate = chunk.candidates?.[0];
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
if (part.text !== undefined) {
const isThinking = part.thought === true;
if (
!currentBlock ||
(isThinking && currentBlock.type !== "thinking") ||
(!isThinking && currentBlock.type !== "text")
) {
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blocks.length - 1,
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
}
if (isThinking) {
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
output.content.push(currentBlock);
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
} else {
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
}
}
if (currentBlock.type === "thinking") {
currentBlock.thinking += part.text;
currentBlock.thinkingSignature = part.thoughtSignature;
stream.push({
type: "thinking_delta",
contentIndex: blockIndex(),
delta: part.text,
partial: output,
});
} else {
currentBlock.text += part.text;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: part.text,
partial: output,
});
}
}
if (part.functionCall) {
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
currentBlock = null;
}
const providedId = part.functionCall.id;
const needsNewId =
!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
const toolCallId = needsNewId
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
: providedId;
const toolCall: ToolCall = {
type: "toolCall",
id: toolCallId,
name: part.functionCall.name || "",
arguments: part.functionCall.args as Record<string, any>,
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
};
output.content.push(toolCall);
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
stream.push({
type: "toolcall_delta",
contentIndex: blockIndex(),
delta: JSON.stringify(toolCall.arguments),
partial: output,
});
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
}
}
}
if (candidate?.finishReason) {
output.stopReason = mapStopReason(candidate.finishReason);
if (output.content.some((b) => b.type === "toolCall")) {
output.stopReason = "toolUse";
}
}
if (chunk.usageMetadata) {
output.usage = {
input: chunk.usageMetadata.promptTokenCount || 0,
output:
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
cacheWrite: 0,
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
};
calculateCost(model, output.usage);
}
}
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
}
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
if (output.stopReason === "aborted" || output.stopReason === "error") {
throw new Error("An unknown error occurred");
}
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
// Remove internal index property used during streaming
for (const block of output.content) {
if ("index" in block) {
delete (block as { index?: number }).index;
}
}
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
stream.push({ type: "error", reason: output.stopReason, error: output });
stream.end();
}
})();
return stream;
};
function createClient(model: Model<"google-vertex">, project: string, location: string): GoogleGenAI {
const httpOptions: { headers?: Record<string, string> } = {};
if (model.headers) {
httpOptions.headers = { ...model.headers };
}
const hasHttpOptions = Object.values(httpOptions).some(Boolean);
return new GoogleGenAI({
vertexai: true,
project,
location,
apiVersion: API_VERSION,
httpOptions: hasHttpOptions ? httpOptions : undefined,
});
}
function resolveProject(options?: GoogleVertexOptions): string {
const project = options?.project || process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT;
if (!project) {
throw new Error(
"Vertex AI requires a project ID. Set GOOGLE_CLOUD_PROJECT/GCLOUD_PROJECT or pass project in options.",
);
}
return project;
}
function resolveLocation(options?: GoogleVertexOptions): string {
const location = options?.location || process.env.GOOGLE_CLOUD_LOCATION;
if (!location) {
throw new Error("Vertex AI requires a location. Set GOOGLE_CLOUD_LOCATION or pass location in options.");
}
return location;
}
function buildParams(
model: Model<"google-vertex">,
context: Context,
options: GoogleVertexOptions = {},
): GenerateContentParameters {
const contents = convertMessages(model, context);
const generationConfig: GenerateContentConfig = {};
if (options.temperature !== undefined) {
generationConfig.temperature = options.temperature;
}
if (options.maxTokens !== undefined) {
generationConfig.maxOutputTokens = options.maxTokens;
}
const config: GenerateContentConfig = {
...(Object.keys(generationConfig).length > 0 && generationConfig),
...(context.systemPrompt && { systemInstruction: sanitizeSurrogates(context.systemPrompt) }),
...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools) }),
};
if (context.tools && context.tools.length > 0 && options.toolChoice) {
config.toolConfig = {
functionCallingConfig: {
mode: mapToolChoice(options.toolChoice),
},
};
} else {
config.toolConfig = undefined;
}
if (options.thinking?.enabled && model.reasoning) {
const thinkingConfig: ThinkingConfig = { includeThoughts: true };
if (options.thinking.level !== undefined) {
thinkingConfig.thinkingLevel = options.thinking.level;
} else if (options.thinking.budgetTokens !== undefined) {
thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
}
config.thinkingConfig = thinkingConfig;
}
if (options.signal) {
if (options.signal.aborted) {
throw new Error("Request aborted");
}
config.abortSignal = options.signal;
}
const params: GenerateContentParameters = {
model: model.id,
contents,
config,
};
return params;
}

View file

@ -6,6 +6,7 @@ import {
type GoogleThinkingLevel,
streamGoogleGeminiCli,
} from "./providers/google-gemini-cli.js";
import { type GoogleVertexOptions, streamGoogleVertex } from "./providers/google-vertex.js";
import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions.js";
import { type OpenAIResponsesOptions, streamOpenAIResponses } from "./providers/openai-responses.js";
import type {
@ -38,6 +39,14 @@ export function getEnvApiKey(provider: any): string | undefined {
return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
}
// Vertex AI doesn't use API keys.
// It relies on Google Cloud auth: `gcloud auth application-default login`.
// @google/genai library picks up and manages the auth automatically.
// Return a dummy value to maintain consistency.
if (provider === "google-vertex") {
return "vertex-ai-authenticated";
}
const envMap: Record<string, string> = {
openai: "OPENAI_API_KEY",
google: "GEMINI_API_KEY",
@ -85,6 +94,9 @@ export function stream<TApi extends Api>(
providerOptions as GoogleGeminiCliOptions,
);
case "google-vertex":
return streamGoogleVertex(model as Model<"google-vertex">, context, providerOptions as GoogleVertexOptions);
default: {
// This should never be reached if all Api cases are handled
const _exhaustive: never = api;
@ -239,6 +251,44 @@ function mapOptionsForApi<TApi extends Api>(
} satisfies GoogleGeminiCliOptions;
}
case "google-vertex": {
// Explicitly disable thinking when reasoning is not specified
if (!options?.reasoning) {
return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
}
const vertexModel = model as Model<"google-vertex">;
const effort = clampReasoning(options.reasoning)!;
if (isGemini3ProModel(vertexModel as unknown as Model<"google-generative-ai">)) {
return {
...base,
thinking: {
enabled: true,
level: getGemini3ThinkingLevel(effort, vertexModel as unknown as Model<"google-generative-ai">),
},
} satisfies GoogleVertexOptions;
}
if (isGemini3FlashModel(vertexModel as unknown as Model<"google-generative-ai">)) {
return {
...base,
thinking: {
enabled: true,
level: getGemini3ThinkingLevel(effort, vertexModel as unknown as Model<"google-generative-ai">),
},
} satisfies GoogleVertexOptions;
}
return {
...base,
thinking: {
enabled: true,
budgetTokens: getGoogleBudget(vertexModel as unknown as Model<"google-generative-ai">, effort),
},
} satisfies GoogleVertexOptions;
}
default: {
// Exhaustiveness check
const _exhaustive: never = model.api;

View file

@ -1,6 +1,7 @@
import type { AnthropicOptions } from "./providers/anthropic.js";
import type { GoogleOptions } from "./providers/google.js";
import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli.js";
import type { GoogleVertexOptions } from "./providers/google-vertex.js";
import type { OpenAICompletionsOptions } from "./providers/openai-completions.js";
import type { OpenAIResponsesOptions } from "./providers/openai-responses.js";
import type { AssistantMessageEventStream } from "./utils/event-stream.js";
@ -12,7 +13,8 @@ export type Api =
| "openai-responses"
| "anthropic-messages"
| "google-generative-ai"
| "google-gemini-cli";
| "google-gemini-cli"
| "google-vertex";
export interface ApiOptionsMap {
"anthropic-messages": AnthropicOptions;
@ -20,6 +22,7 @@ export interface ApiOptionsMap {
"openai-responses": OpenAIResponsesOptions;
"google-generative-ai": GoogleOptions;
"google-gemini-cli": GoogleGeminiCliOptions;
"google-vertex": GoogleVertexOptions;
}
// Compile-time exhaustiveness check - this will fail if ApiOptionsMap doesn't have all KnownApi keys
@ -38,6 +41,7 @@ export type KnownProvider =
| "google"
| "google-gemini-cli"
| "google-antigravity"
| "google-vertex"
| "openai"
| "github-copilot"
| "xai"