Added automatic image resizing

This commit is contained in:
Armin Ronacher 2026-01-02 11:55:44 +01:00
parent fd35d9188c
commit 4a32af2532
10 changed files with 247 additions and 29 deletions

View file

@ -286,6 +286,8 @@ You: What's in this screenshot? /path/to/image.png
Supported formats: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp` Supported formats: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`
**Auto-resize:** Images larger than 2000x2000 pixels are automatically resized to fit within this limit for better compatibility with Anthropic models. The original dimensions are noted in the context so the model can map coordinates back if needed. Disable via `images.autoResize: false` in settings.
**Inline rendering:** On terminals that support the Kitty graphics protocol (Kitty, Ghostty, WezTerm) or iTerm2 inline images, images in tool output are rendered inline. On unsupported terminals, a text placeholder is shown instead. **Inline rendering:** On terminals that support the Kitty graphics protocol (Kitty, Ghostty, WezTerm) or iTerm2 inline images, images in tool output are rendered inline. On unsupported terminals, a text placeholder is shown instead.
Toggle inline images via `/settings` or set `terminal.showImages: false` in settings. Toggle inline images via `/settings` or set `terminal.showImages: false` in settings.
@ -519,6 +521,9 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
"terminal": { "terminal": {
"showImages": true "showImages": true
}, },
"images": {
"autoResize": true
},
"hooks": ["/path/to/hook.ts"], "hooks": ["/path/to/hook.ts"],
"customTools": ["/path/to/tool.ts"] "customTools": ["/path/to/tool.ts"]
} }
@ -543,6 +548,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
| `retry.maxRetries` | Maximum retry attempts | `3` | | `retry.maxRetries` | Maximum retry attempts | `3` |
| `retry.baseDelayMs` | Base delay for exponential backoff | `2000` | | `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
| `terminal.showImages` | Render images inline (supported terminals) | `true` | | `terminal.showImages` | Render images inline (supported terminals) | `true` |
| `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
| `hooks` | Additional hook file paths | `[]` | | `hooks` | Additional hook file paths | `[]` |
| `customTools` | Additional custom tool file paths | `[]` | | `customTools` | Additional custom tool file paths | `[]` |

View file

@ -47,7 +47,8 @@
"file-type": "^21.1.1", "file-type": "^21.1.1",
"glob": "^11.0.3", "glob": "^11.0.3",
"jiti": "^2.6.1", "jiti": "^2.6.1",
"marked": "^15.0.12" "marked": "^15.0.12",
"sharp": "^0.34.2"
}, },
"devDependencies": { "devDependencies": {
"@types/diff": "^7.0.2", "@types/diff": "^7.0.2",

View file

@ -7,6 +7,7 @@ import type { ImageContent } from "@mariozechner/pi-ai";
import chalk from "chalk"; import chalk from "chalk";
import { resolve } from "path"; import { resolve } from "path";
import { resolveReadPath } from "../core/tools/path-utils.js"; import { resolveReadPath } from "../core/tools/path-utils.js";
import { formatDimensionNote, resizeImage } from "../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js"; import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js";
export interface ProcessedFiles { export interface ProcessedFiles {
@ -14,8 +15,14 @@ export interface ProcessedFiles {
images: ImageContent[]; images: ImageContent[];
} }
export interface ProcessFileOptions {
/** Whether to auto-resize images to 2000x2000 max. Default: true */
autoResizeImages?: boolean;
}
/** Process @file arguments into text content and image attachments */ /** Process @file arguments into text content and image attachments */
export async function processFileArguments(fileArgs: string[]): Promise<ProcessedFiles> { export async function processFileArguments(fileArgs: string[], options?: ProcessFileOptions): Promise<ProcessedFiles> {
const autoResizeImages = options?.autoResizeImages ?? true;
let text = ""; let text = "";
const images: ImageContent[] = []; const images: ImageContent[] = [];
@ -45,16 +52,33 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
const content = await readFile(absolutePath); const content = await readFile(absolutePath);
const base64Content = content.toString("base64"); const base64Content = content.toString("base64");
const attachment: ImageContent = { let attachment: ImageContent;
type: "image", let dimensionNote: string | undefined;
mimeType,
data: base64Content, if (autoResizeImages) {
}; const resized = await resizeImage({ type: "image", data: base64Content, mimeType });
dimensionNote = formatDimensionNote(resized);
attachment = {
type: "image",
mimeType: resized.mimeType,
data: resized.data,
};
} else {
attachment = {
type: "image",
mimeType,
data: base64Content,
};
}
images.push(attachment); images.push(attachment);
// Add text reference to image // Add text reference to image with optional dimension note
text += `<file name="${absolutePath}"></file>\n`; if (dimensionNote) {
text += `<file name="${absolutePath}">${dimensionNote}</file>\n`;
} else {
text += `<file name="${absolutePath}"></file>\n`;
}
} else { } else {
// Handle text file // Handle text file
try { try {

View file

@ -522,7 +522,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
const contextFiles = options.contextFiles ?? discoverContextFiles(cwd, agentDir); const contextFiles = options.contextFiles ?? discoverContextFiles(cwd, agentDir);
time("discoverContextFiles"); time("discoverContextFiles");
const builtInTools = options.tools ?? createCodingTools(cwd); const autoResizeImages = settingsManager.getImageAutoResize();
const builtInTools = options.tools ?? createCodingTools(cwd, { read: { autoResizeImages } });
time("createCodingTools"); time("createCodingTools");
let customToolsResult: CustomToolsLoadResult; let customToolsResult: CustomToolsLoadResult;

View file

@ -34,6 +34,10 @@ export interface TerminalSettings {
showImages?: boolean; // default: true (only relevant if terminal supports images) showImages?: boolean; // default: true (only relevant if terminal supports images)
} }
export interface ImageSettings {
autoResize?: boolean; // default: true (resize images to 2000x2000 max for better model compatibility)
}
export interface Settings { export interface Settings {
lastChangelogVersion?: string; lastChangelogVersion?: string;
defaultProvider?: string; defaultProvider?: string;
@ -51,6 +55,7 @@ export interface Settings {
customTools?: string[]; // Array of custom tool file paths customTools?: string[]; // Array of custom tool file paths
skills?: SkillsSettings; skills?: SkillsSettings;
terminal?: TerminalSettings; terminal?: TerminalSettings;
images?: ImageSettings;
enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag) enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag)
} }
@ -368,6 +373,18 @@ export class SettingsManager {
this.save(); this.save();
} }
getImageAutoResize(): boolean {
return this.settings.images?.autoResize ?? true;
}
setImageAutoResize(enabled: boolean): void {
if (!this.globalSettings.images) {
this.globalSettings.images = {};
}
this.globalSettings.images.autoResize = enabled;
this.save();
}
getEnabledModels(): string[] | undefined { getEnabledModels(): string[] | undefined {
return this.settings.enabledModels; return this.settings.enabledModels;
} }

View file

@ -3,7 +3,7 @@ export { createEditTool, editTool } from "./edit.js";
export { createFindTool, type FindToolDetails, findTool } from "./find.js"; export { createFindTool, type FindToolDetails, findTool } from "./find.js";
export { createGrepTool, type GrepToolDetails, grepTool } from "./grep.js"; export { createGrepTool, type GrepToolDetails, grepTool } from "./grep.js";
export { createLsTool, type LsToolDetails, lsTool } from "./ls.js"; export { createLsTool, type LsToolDetails, lsTool } from "./ls.js";
export { createReadTool, type ReadToolDetails, readTool } from "./read.js"; export { createReadTool, type ReadToolDetails, type ReadToolOptions, readTool } from "./read.js";
export type { TruncationResult } from "./truncate.js"; export type { TruncationResult } from "./truncate.js";
export { createWriteTool, writeTool } from "./write.js"; export { createWriteTool, writeTool } from "./write.js";
@ -13,7 +13,7 @@ import { createEditTool, editTool } from "./edit.js";
import { createFindTool, findTool } from "./find.js"; import { createFindTool, findTool } from "./find.js";
import { createGrepTool, grepTool } from "./grep.js"; import { createGrepTool, grepTool } from "./grep.js";
import { createLsTool, lsTool } from "./ls.js"; import { createLsTool, lsTool } from "./ls.js";
import { createReadTool, readTool } from "./read.js"; import { createReadTool, type ReadToolOptions, readTool } from "./read.js";
import { createWriteTool, writeTool } from "./write.js"; import { createWriteTool, writeTool } from "./write.js";
/** Tool type (AgentTool from pi-ai) */ /** Tool type (AgentTool from pi-ai) */
@ -38,26 +38,31 @@ export const allTools = {
export type ToolName = keyof typeof allTools; export type ToolName = keyof typeof allTools;
export interface ToolsOptions {
/** Options for the read tool */
read?: ReadToolOptions;
}
/** /**
* Create coding tools configured for a specific working directory. * Create coding tools configured for a specific working directory.
*/ */
export function createCodingTools(cwd: string): Tool[] { export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
return [createReadTool(cwd), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)]; return [createReadTool(cwd, options?.read), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
} }
/** /**
* Create read-only tools configured for a specific working directory. * Create read-only tools configured for a specific working directory.
*/ */
export function createReadOnlyTools(cwd: string): Tool[] { export function createReadOnlyTools(cwd: string, options?: ToolsOptions): Tool[] {
return [createReadTool(cwd), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)]; return [createReadTool(cwd, options?.read), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
} }
/** /**
* Create all tools configured for a specific working directory. * Create all tools configured for a specific working directory.
*/ */
export function createAllTools(cwd: string): Record<ToolName, Tool> { export function createAllTools(cwd: string, options?: ToolsOptions): Record<ToolName, Tool> {
return { return {
read: createReadTool(cwd), read: createReadTool(cwd, options?.read),
bash: createBashTool(cwd), bash: createBashTool(cwd),
edit: createEditTool(cwd), edit: createEditTool(cwd),
write: createWriteTool(cwd), write: createWriteTool(cwd),

View file

@ -3,6 +3,7 @@ import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
import { Type } from "@sinclair/typebox"; import { Type } from "@sinclair/typebox";
import { constants } from "fs"; import { constants } from "fs";
import { access, readFile } from "fs/promises"; import { access, readFile } from "fs/promises";
import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js"; import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js";
import { resolveReadPath } from "./path-utils.js"; import { resolveReadPath } from "./path-utils.js";
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js"; import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";
@ -17,7 +18,13 @@ export interface ReadToolDetails {
truncation?: TruncationResult; truncation?: TruncationResult;
} }
export function createReadTool(cwd: string): AgentTool<typeof readSchema> { export interface ReadToolOptions {
/** Whether to auto-resize images to 2000x2000 max. Default: true */
autoResizeImages?: boolean;
}
export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool<typeof readSchema> {
const autoResizeImages = options?.autoResizeImages ?? true;
return { return {
name: "read", name: "read",
label: "read", label: "read",
@ -72,10 +79,26 @@ export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
const buffer = await readFile(absolutePath); const buffer = await readFile(absolutePath);
const base64 = buffer.toString("base64"); const base64 = buffer.toString("base64");
content = [ if (autoResizeImages) {
{ type: "text", text: `Read image file [${mimeType}]` }, // Resize image if needed
{ type: "image", data: base64, mimeType }, const resized = await resizeImage({ type: "image", data: base64, mimeType });
]; const dimensionNote = formatDimensionNote(resized);
let textNote = `Read image file [${resized.mimeType}]`;
if (dimensionNote) {
textNote += `\n${dimensionNote}`;
}
content = [
{ type: "text", text: textNote },
{ type: "image", data: resized.data, mimeType: resized.mimeType },
];
} else {
content = [
{ type: "text", text: `Read image file [${mimeType}]` },
{ type: "image", data: base64, mimeType },
];
}
} else { } else {
// Read as text // Read as text
const textContent = await readFile(absolutePath, "utf-8"); const textContent = await readFile(absolutePath, "utf-8");

View file

@ -120,6 +120,7 @@ export {
} from "./core/session-manager.js"; } from "./core/session-manager.js";
export { export {
type CompactionSettings, type CompactionSettings,
type ImageSettings,
type RetrySettings, type RetrySettings,
type Settings, type Settings,
SettingsManager, SettingsManager,
@ -149,7 +150,9 @@ export {
type LsToolDetails, type LsToolDetails,
lsTool, lsTool,
type ReadToolDetails, type ReadToolDetails,
type ReadToolOptions,
readTool, readTool,
type ToolsOptions,
type TruncationResult, type TruncationResult,
writeTool, writeTool,
} from "./core/tools/index.js"; } from "./core/tools/index.js";

View file

@ -119,7 +119,10 @@ async function runInteractiveMode(
} }
} }
async function prepareInitialMessage(parsed: Args): Promise<{ async function prepareInitialMessage(
parsed: Args,
autoResizeImages: boolean,
): Promise<{
initialMessage?: string; initialMessage?: string;
initialImages?: ImageContent[]; initialImages?: ImageContent[];
}> { }> {
@ -127,7 +130,7 @@ async function prepareInitialMessage(parsed: Args): Promise<{
return {}; return {};
} }
const { text, images } = await processFileArguments(parsed.fileArgs); const { text, images } = await processFileArguments(parsed.fileArgs, { autoResizeImages });
let initialMessage: string; let initialMessage: string;
if (parsed.messages.length > 0) { if (parsed.messages.length > 0) {
@ -329,13 +332,12 @@ export async function main(args: string[]) {
} }
const cwd = process.cwd(); const cwd = process.cwd();
const { initialMessage, initialImages } = await prepareInitialMessage(parsed); const settingsManager = SettingsManager.create(cwd);
time("SettingsManager.create");
const { initialMessage, initialImages } = await prepareInitialMessage(parsed, settingsManager.getImageAutoResize());
time("prepareInitialMessage"); time("prepareInitialMessage");
const isInteractive = !parsed.print && parsed.mode === undefined; const isInteractive = !parsed.print && parsed.mode === undefined;
const mode = parsed.mode || "text"; const mode = parsed.mode || "text";
const settingsManager = SettingsManager.create(cwd);
time("SettingsManager.create");
initTheme(settingsManager.getTheme(), isInteractive); initTheme(settingsManager.getTheme(), isInteractive);
time("initTheme"); time("initTheme");

View file

@ -0,0 +1,136 @@
import type { ImageContent } from "@mariozechner/pi-ai";
export interface ImageResizeOptions {
maxWidth?: number; // Default: 2000
maxHeight?: number; // Default: 2000
jpegQuality?: number; // Default: 80
}
export interface ResizedImage {
data: string; // base64
mimeType: string;
originalWidth: number;
originalHeight: number;
width: number;
height: number;
wasResized: boolean;
}
const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
maxWidth: 2000,
maxHeight: 2000,
jpegQuality: 80,
};
/**
* Resize an image to fit within the specified max dimensions.
* Returns the original image if it already fits within the limits.
*
* Uses sharp for image processing. If sharp is not available (e.g., in some
* environments), returns the original image unchanged.
*/
export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
const opts = { ...DEFAULT_OPTIONS, ...options };
const buffer = Buffer.from(img.data, "base64");
let sharp: typeof import("sharp") | undefined;
try {
sharp = (await import("sharp")).default;
} catch {
// Sharp not available - return original image
// We can't get dimensions without sharp, so return 0s
return {
data: img.data,
mimeType: img.mimeType,
originalWidth: 0,
originalHeight: 0,
width: 0,
height: 0,
wasResized: false,
};
}
const sharpImg = sharp(buffer);
const metadata = await sharpImg.metadata();
const width = metadata.width ?? 0;
const height = metadata.height ?? 0;
const format = metadata.format ?? img.mimeType?.split("/")[1] ?? "png";
// Check if already within limits
if (width <= opts.maxWidth && height <= opts.maxHeight) {
return {
data: img.data,
mimeType: img.mimeType ?? `image/${format}`,
originalWidth: width,
originalHeight: height,
width,
height,
wasResized: false,
};
}
// Calculate new dimensions maintaining aspect ratio
let newWidth = width;
let newHeight = height;
if (newWidth > opts.maxWidth) {
newHeight = Math.round((newHeight * opts.maxWidth) / newWidth);
newWidth = opts.maxWidth;
}
if (newHeight > opts.maxHeight) {
newWidth = Math.round((newWidth * opts.maxHeight) / newHeight);
newHeight = opts.maxHeight;
}
// Resize the image
const resized = await sharp(buffer)
.resize(newWidth, newHeight, { fit: "inside", withoutEnlargement: true })
.toBuffer();
// Determine output format - preserve original if possible, otherwise use JPEG
let outputMimeType: string;
let outputBuffer: Buffer;
if (format === "jpeg" || format === "jpg") {
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
outputMimeType = "image/jpeg";
} else if (format === "png") {
outputBuffer = resized;
outputMimeType = "image/png";
} else if (format === "gif") {
// GIF resize might not preserve animation; convert to PNG for quality
outputBuffer = resized;
outputMimeType = "image/png";
} else if (format === "webp") {
outputBuffer = resized;
outputMimeType = "image/webp";
} else {
// Default to JPEG for unknown formats
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
outputMimeType = "image/jpeg";
}
return {
data: outputBuffer.toString("base64"),
mimeType: outputMimeType,
originalWidth: width,
originalHeight: height,
width: newWidth,
height: newHeight,
wasResized: true,
};
}
/**
* Format a dimension note for resized images.
* This helps the model understand the coordinate mapping.
*/
export function formatDimensionNote(result: ResizedImage): string | undefined {
if (!result.wasResized) {
return undefined;
}
const scale = result.originalWidth / result.width;
return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${result.height}. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
}