Merge pull request #402 from mitsuhiko/image-resize

Added automatic image resizing
This commit is contained in:
Mario Zechner 2026-01-03 00:39:26 +01:00 committed by GitHub
commit e82af9da47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 247 additions and 29 deletions

View file

@ -290,6 +290,8 @@ You: What's in this screenshot? /path/to/image.png
Supported formats: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`
**Auto-resize:** Images larger than 2000x2000 pixels are automatically resized to fit within this limit for better compatibility with Anthropic models. The original dimensions are noted in the context so the model can map coordinates back if needed. Disable via `images.autoResize: false` in settings.
**Inline rendering:** On terminals that support the Kitty graphics protocol (Kitty, Ghostty, WezTerm) or iTerm2 inline images, images in tool output are rendered inline. On unsupported terminals, a text placeholder is shown instead.
Toggle inline images via `/settings` or set `terminal.showImages: false` in settings.
@ -524,6 +526,9 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
"terminal": {
"showImages": true
},
"images": {
"autoResize": true
},
"hooks": ["/path/to/hook.ts"],
"customTools": ["/path/to/tool.ts"]
}
@ -549,6 +554,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
| `retry.maxRetries` | Maximum retry attempts | `3` |
| `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
| `terminal.showImages` | Render images inline (supported terminals) | `true` |
| `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
| `hooks` | Additional hook file paths | `[]` |
| `customTools` | Additional custom tool file paths | `[]` |

View file

@ -47,7 +47,8 @@
"file-type": "^21.1.1",
"glob": "^11.0.3",
"jiti": "^2.6.1",
"marked": "^15.0.12"
"marked": "^15.0.12",
"sharp": "^0.34.2"
},
"devDependencies": {
"@types/diff": "^7.0.2",

View file

@ -7,6 +7,7 @@ import type { ImageContent } from "@mariozechner/pi-ai";
import chalk from "chalk";
import { resolve } from "path";
import { resolveReadPath } from "../core/tools/path-utils.js";
import { formatDimensionNote, resizeImage } from "../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js";
export interface ProcessedFiles {
@ -14,8 +15,14 @@ export interface ProcessedFiles {
images: ImageContent[];
}
export interface ProcessFileOptions {
/** Whether to auto-resize images to 2000x2000 max. Default: true */
autoResizeImages?: boolean;
}
/** Process @file arguments into text content and image attachments */
export async function processFileArguments(fileArgs: string[]): Promise<ProcessedFiles> {
export async function processFileArguments(fileArgs: string[], options?: ProcessFileOptions): Promise<ProcessedFiles> {
const autoResizeImages = options?.autoResizeImages ?? true;
let text = "";
const images: ImageContent[] = [];
@ -45,16 +52,33 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
const content = await readFile(absolutePath);
const base64Content = content.toString("base64");
const attachment: ImageContent = {
type: "image",
mimeType,
data: base64Content,
};
let attachment: ImageContent;
let dimensionNote: string | undefined;
if (autoResizeImages) {
const resized = await resizeImage({ type: "image", data: base64Content, mimeType });
dimensionNote = formatDimensionNote(resized);
attachment = {
type: "image",
mimeType: resized.mimeType,
data: resized.data,
};
} else {
attachment = {
type: "image",
mimeType,
data: base64Content,
};
}
images.push(attachment);
// Add text reference to image
text += `<file name="${absolutePath}"></file>\n`;
// Add text reference to image with optional dimension note
if (dimensionNote) {
text += `<file name="${absolutePath}">${dimensionNote}</file>\n`;
} else {
text += `<file name="${absolutePath}"></file>\n`;
}
} else {
// Handle text file
try {

View file

@ -528,7 +528,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
const contextFiles = options.contextFiles ?? discoverContextFiles(cwd, agentDir);
time("discoverContextFiles");
const builtInTools = options.tools ?? createCodingTools(cwd);
const autoResizeImages = settingsManager.getImageAutoResize();
const builtInTools = options.tools ?? createCodingTools(cwd, { read: { autoResizeImages } });
time("createCodingTools");
let customToolsResult: CustomToolsLoadResult;

View file

@ -34,6 +34,10 @@ export interface TerminalSettings {
showImages?: boolean; // default: true (only relevant if terminal supports images)
}
export interface ImageSettings {
autoResize?: boolean; // default: true (resize images to 2000x2000 max for better model compatibility)
}
export interface Settings {
lastChangelogVersion?: string;
defaultProvider?: string;
@ -52,6 +56,7 @@ export interface Settings {
customTools?: string[]; // Array of custom tool file paths
skills?: SkillsSettings;
terminal?: TerminalSettings;
images?: ImageSettings;
enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag)
}
@ -390,6 +395,18 @@ export class SettingsManager {
this.save();
}
getImageAutoResize(): boolean {
return this.settings.images?.autoResize ?? true;
}
setImageAutoResize(enabled: boolean): void {
if (!this.globalSettings.images) {
this.globalSettings.images = {};
}
this.globalSettings.images.autoResize = enabled;
this.save();
}
getEnabledModels(): string[] | undefined {
return this.settings.enabledModels;
}

View file

@ -3,7 +3,7 @@ export { createEditTool, editTool } from "./edit.js";
export { createFindTool, type FindToolDetails, findTool } from "./find.js";
export { createGrepTool, type GrepToolDetails, grepTool } from "./grep.js";
export { createLsTool, type LsToolDetails, lsTool } from "./ls.js";
export { createReadTool, type ReadToolDetails, readTool } from "./read.js";
export { createReadTool, type ReadToolDetails, type ReadToolOptions, readTool } from "./read.js";
export type { TruncationResult } from "./truncate.js";
export { createWriteTool, writeTool } from "./write.js";
@ -13,7 +13,7 @@ import { createEditTool, editTool } from "./edit.js";
import { createFindTool, findTool } from "./find.js";
import { createGrepTool, grepTool } from "./grep.js";
import { createLsTool, lsTool } from "./ls.js";
import { createReadTool, readTool } from "./read.js";
import { createReadTool, type ReadToolOptions, readTool } from "./read.js";
import { createWriteTool, writeTool } from "./write.js";
/** Tool type (AgentTool from pi-ai) */
@ -38,26 +38,31 @@ export const allTools = {
export type ToolName = keyof typeof allTools;
export interface ToolsOptions {
/** Options for the read tool */
read?: ReadToolOptions;
}
/**
* Create coding tools configured for a specific working directory.
*/
export function createCodingTools(cwd: string): Tool[] {
return [createReadTool(cwd), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
return [createReadTool(cwd, options?.read), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
}
/**
* Create read-only tools configured for a specific working directory.
*/
export function createReadOnlyTools(cwd: string): Tool[] {
return [createReadTool(cwd), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
export function createReadOnlyTools(cwd: string, options?: ToolsOptions): Tool[] {
return [createReadTool(cwd, options?.read), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
}
/**
* Create all tools configured for a specific working directory.
*/
export function createAllTools(cwd: string): Record<ToolName, Tool> {
export function createAllTools(cwd: string, options?: ToolsOptions): Record<ToolName, Tool> {
return {
read: createReadTool(cwd),
read: createReadTool(cwd, options?.read),
bash: createBashTool(cwd),
edit: createEditTool(cwd),
write: createWriteTool(cwd),

View file

@ -3,6 +3,7 @@ import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
import { Type } from "@sinclair/typebox";
import { constants } from "fs";
import { access, readFile } from "fs/promises";
import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js";
import { resolveReadPath } from "./path-utils.js";
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";
@ -17,7 +18,13 @@ export interface ReadToolDetails {
truncation?: TruncationResult;
}
export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
export interface ReadToolOptions {
/** Whether to auto-resize images to 2000x2000 max. Default: true */
autoResizeImages?: boolean;
}
export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool<typeof readSchema> {
const autoResizeImages = options?.autoResizeImages ?? true;
return {
name: "read",
label: "read",
@ -72,10 +79,26 @@ export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
const buffer = await readFile(absolutePath);
const base64 = buffer.toString("base64");
content = [
{ type: "text", text: `Read image file [${mimeType}]` },
{ type: "image", data: base64, mimeType },
];
if (autoResizeImages) {
// Resize image if needed
const resized = await resizeImage({ type: "image", data: base64, mimeType });
const dimensionNote = formatDimensionNote(resized);
let textNote = `Read image file [${resized.mimeType}]`;
if (dimensionNote) {
textNote += `\n${dimensionNote}`;
}
content = [
{ type: "text", text: textNote },
{ type: "image", data: resized.data, mimeType: resized.mimeType },
];
} else {
content = [
{ type: "text", text: `Read image file [${mimeType}]` },
{ type: "image", data: base64, mimeType },
];
}
} else {
// Read as text
const textContent = await readFile(absolutePath, "utf-8");

View file

@ -120,6 +120,7 @@ export {
} from "./core/session-manager.js";
export {
type CompactionSettings,
type ImageSettings,
type RetrySettings,
type Settings,
SettingsManager,
@ -149,7 +150,9 @@ export {
type LsToolDetails,
lsTool,
type ReadToolDetails,
type ReadToolOptions,
readTool,
type ToolsOptions,
type TruncationResult,
writeTool,
} from "./core/tools/index.js";

View file

@ -119,7 +119,10 @@ async function runInteractiveMode(
}
}
async function prepareInitialMessage(parsed: Args): Promise<{
async function prepareInitialMessage(
parsed: Args,
autoResizeImages: boolean,
): Promise<{
initialMessage?: string;
initialImages?: ImageContent[];
}> {
@ -127,7 +130,7 @@ async function prepareInitialMessage(parsed: Args): Promise<{
return {};
}
const { text, images } = await processFileArguments(parsed.fileArgs);
const { text, images } = await processFileArguments(parsed.fileArgs, { autoResizeImages });
let initialMessage: string;
if (parsed.messages.length > 0) {
@ -329,13 +332,12 @@ export async function main(args: string[]) {
}
const cwd = process.cwd();
const { initialMessage, initialImages } = await prepareInitialMessage(parsed);
const settingsManager = SettingsManager.create(cwd);
time("SettingsManager.create");
const { initialMessage, initialImages } = await prepareInitialMessage(parsed, settingsManager.getImageAutoResize());
time("prepareInitialMessage");
const isInteractive = !parsed.print && parsed.mode === undefined;
const mode = parsed.mode || "text";
const settingsManager = SettingsManager.create(cwd);
time("SettingsManager.create");
initTheme(settingsManager.getTheme(), isInteractive);
time("initTheme");

View file

@ -0,0 +1,136 @@
import type { ImageContent } from "@mariozechner/pi-ai";
export interface ImageResizeOptions {
maxWidth?: number; // Default: 2000
maxHeight?: number; // Default: 2000
jpegQuality?: number; // Default: 80
}
export interface ResizedImage {
data: string; // base64
mimeType: string;
originalWidth: number;
originalHeight: number;
width: number;
height: number;
wasResized: boolean;
}
const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
maxWidth: 2000,
maxHeight: 2000,
jpegQuality: 80,
};
/**
* Resize an image to fit within the specified max dimensions.
* Returns the original image if it already fits within the limits.
*
* Uses sharp for image processing. If sharp is not available (e.g., in some
* environments), returns the original image unchanged.
*/
export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
const opts = { ...DEFAULT_OPTIONS, ...options };
const buffer = Buffer.from(img.data, "base64");
let sharp: typeof import("sharp") | undefined;
try {
sharp = (await import("sharp")).default;
} catch {
// Sharp not available - return original image
// We can't get dimensions without sharp, so return 0s
return {
data: img.data,
mimeType: img.mimeType,
originalWidth: 0,
originalHeight: 0,
width: 0,
height: 0,
wasResized: false,
};
}
const sharpImg = sharp(buffer);
const metadata = await sharpImg.metadata();
const width = metadata.width ?? 0;
const height = metadata.height ?? 0;
const format = metadata.format ?? img.mimeType?.split("/")[1] ?? "png";
// Check if already within limits
if (width <= opts.maxWidth && height <= opts.maxHeight) {
return {
data: img.data,
mimeType: img.mimeType ?? `image/${format}`,
originalWidth: width,
originalHeight: height,
width,
height,
wasResized: false,
};
}
// Calculate new dimensions maintaining aspect ratio
let newWidth = width;
let newHeight = height;
if (newWidth > opts.maxWidth) {
newHeight = Math.round((newHeight * opts.maxWidth) / newWidth);
newWidth = opts.maxWidth;
}
if (newHeight > opts.maxHeight) {
newWidth = Math.round((newWidth * opts.maxHeight) / newHeight);
newHeight = opts.maxHeight;
}
// Resize the image
const resized = await sharp(buffer)
.resize(newWidth, newHeight, { fit: "inside", withoutEnlargement: true })
.toBuffer();
// Determine output format - preserve original if possible, otherwise use JPEG
let outputMimeType: string;
let outputBuffer: Buffer;
if (format === "jpeg" || format === "jpg") {
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
outputMimeType = "image/jpeg";
} else if (format === "png") {
outputBuffer = resized;
outputMimeType = "image/png";
} else if (format === "gif") {
// GIF resize might not preserve animation; convert to PNG for quality
outputBuffer = resized;
outputMimeType = "image/png";
} else if (format === "webp") {
outputBuffer = resized;
outputMimeType = "image/webp";
} else {
// Default to JPEG for unknown formats
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
outputMimeType = "image/jpeg";
}
return {
data: outputBuffer.toString("base64"),
mimeType: outputMimeType,
originalWidth: width,
originalHeight: height,
width: newWidth,
height: newHeight,
wasResized: true,
};
}
/**
* Format a dimension note for resized images.
* This helps the model understand the coordinate mapping.
*/
export function formatDimensionNote(result: ResizedImage): string | undefined {
if (!result.wasResized) {
return undefined;
}
const scale = result.originalWidth / result.width;
return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${result.height}. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
}