Limit max output tokens to 32k

This commit is contained in:
Mario Zechner 2025-10-30 15:47:36 +01:00
parent 9e50bb2c37
commit cac353b3fe
2 changed files with 100 additions and 83 deletions

View file

@ -1991,6 +1991,23 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"mistralai/voxtral-small-24b-2507": {
id: "mistralai/voxtral-small-24b-2507",
name: "Mistral: Voxtral Small 24B 2507",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.3,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 32000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"inclusionai/ring-1t": {
id: "inclusionai/ring-1t",
name: "inclusionAI: Ring 1T",
@ -4915,23 +4932,6 @@ export const MODELS = {
contextWindow: 16384,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct",
name: "Meta: Llama 3.1 405B Instruct",
@ -4949,6 +4949,23 @@ export const MODELS = {
contextWindow: 32768,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-nemo": {
id: "mistralai/mistral-nemo",
name: "Mistral: Mistral Nemo",
@ -4966,9 +4983,9 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
"openai/gpt-4o-mini": {
id: "openai/gpt-4o-mini",
name: "OpenAI: GPT-4o-mini",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
@ -4983,9 +5000,9 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini": {
id: "openai/gpt-4o-mini",
name: "OpenAI: GPT-4o-mini",
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
@ -5119,23 +5136,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o": {
id: "openai/gpt-4o",
name: "OpenAI: GPT-4o",
@ -5170,22 +5170,22 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 64000,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-70b-instruct": {
id: "meta-llama/llama-3-70b-instruct",
name: "Meta: Llama 3 70B Instruct",
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
input: ["text", "image"],
cost: {
input: 0.3,
output: 0.39999999999999997,
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 16384,
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-8b-instruct": {
id: "meta-llama/llama-3-8b-instruct",
@ -5204,6 +5204,23 @@ export const MODELS = {
contextWindow: 8192,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-70b-instruct": {
id: "meta-llama/llama-3-70b-instruct",
name: "Meta: Llama 3 70B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.3,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mixtral-8x22b-instruct": {
id: "mistralai/mixtral-8x22b-instruct",
name: "Mistral: Mixtral 8x22B Instruct",
@ -5289,23 +5306,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-turbo-preview": {
id: "openai/gpt-4-turbo-preview",
name: "OpenAI: GPT-4 Turbo Preview",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 10,
output: 30,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
@ -5323,6 +5323,23 @@ export const MODELS = {
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-turbo-preview": {
id: "openai/gpt-4-turbo-preview",
name: "OpenAI: GPT-4 Turbo Preview",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 10,
output: 30,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
@ -5425,23 +5442,6 @@ export const MODELS = {
contextWindow: 16385,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-0314": {
id: "openai/gpt-4-0314",
name: "OpenAI: GPT-4 (older v0314)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo": {
id: "openai/gpt-3.5-turbo",
name: "OpenAI: GPT-3.5 Turbo",
@ -5476,5 +5476,22 @@ export const MODELS = {
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-0314": {
id: "openai/gpt-4-0314",
name: "OpenAI: GPT-4 (older v0314)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
},
} as const;

View file

@ -117,7 +117,7 @@ function mapOptionsForApi<TApi extends Api>(
): OptionsForApi<TApi> {
const base = {
temperature: options?.temperature,
maxTokens: options?.maxTokens || model.maxTokens,
maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
signal: options?.signal,
apiKey: apiKey || options?.apiKey,
};