Merge pull request #402 from mitsuhiko/image-resize

Added automatic image resizing
This commit is contained in:
Mario Zechner 2026-01-03 00:39:26 +01:00 committed by GitHub
commit e82af9da47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 247 additions and 29 deletions

View file

@ -7,6 +7,7 @@ import type { ImageContent } from "@mariozechner/pi-ai";
import chalk from "chalk";
import { resolve } from "path";
import { resolveReadPath } from "../core/tools/path-utils.js";
import { formatDimensionNote, resizeImage } from "../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js";
export interface ProcessedFiles {
@ -14,8 +15,14 @@ export interface ProcessedFiles {
images: ImageContent[];
}
export interface ProcessFileOptions {
/** Whether to auto-resize images to 2000x2000 max. Default: true */
autoResizeImages?: boolean;
}
/** Process @file arguments into text content and image attachments */
export async function processFileArguments(fileArgs: string[]): Promise<ProcessedFiles> {
export async function processFileArguments(fileArgs: string[], options?: ProcessFileOptions): Promise<ProcessedFiles> {
const autoResizeImages = options?.autoResizeImages ?? true;
let text = "";
const images: ImageContent[] = [];
@ -45,16 +52,33 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
const content = await readFile(absolutePath);
const base64Content = content.toString("base64");
const attachment: ImageContent = {
type: "image",
mimeType,
data: base64Content,
};
let attachment: ImageContent;
let dimensionNote: string | undefined;
if (autoResizeImages) {
const resized = await resizeImage({ type: "image", data: base64Content, mimeType });
dimensionNote = formatDimensionNote(resized);
attachment = {
type: "image",
mimeType: resized.mimeType,
data: resized.data,
};
} else {
attachment = {
type: "image",
mimeType,
data: base64Content,
};
}
images.push(attachment);
// Add text reference to image
text += `<file name="${absolutePath}"></file>\n`;
// Add text reference to image with optional dimension note
if (dimensionNote) {
text += `<file name="${absolutePath}">${dimensionNote}</file>\n`;
} else {
text += `<file name="${absolutePath}"></file>\n`;
}
} else {
// Handle text file
try {

View file

@ -528,7 +528,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
const contextFiles = options.contextFiles ?? discoverContextFiles(cwd, agentDir);
time("discoverContextFiles");
const builtInTools = options.tools ?? createCodingTools(cwd);
const autoResizeImages = settingsManager.getImageAutoResize();
const builtInTools = options.tools ?? createCodingTools(cwd, { read: { autoResizeImages } });
time("createCodingTools");
let customToolsResult: CustomToolsLoadResult;

View file

@ -34,6 +34,10 @@ export interface TerminalSettings {
showImages?: boolean; // default: true (only relevant if terminal supports images)
}
export interface ImageSettings {
autoResize?: boolean; // default: true (resize images to 2000x2000 max for better model compatibility)
}
export interface Settings {
lastChangelogVersion?: string;
defaultProvider?: string;
@ -52,6 +56,7 @@ export interface Settings {
customTools?: string[]; // Array of custom tool file paths
skills?: SkillsSettings;
terminal?: TerminalSettings;
images?: ImageSettings;
enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag)
}
@ -390,6 +395,18 @@ export class SettingsManager {
this.save();
}
getImageAutoResize(): boolean {
return this.settings.images?.autoResize ?? true;
}
setImageAutoResize(enabled: boolean): void {
if (!this.globalSettings.images) {
this.globalSettings.images = {};
}
this.globalSettings.images.autoResize = enabled;
this.save();
}
getEnabledModels(): string[] | undefined {
return this.settings.enabledModels;
}

View file

@ -3,7 +3,7 @@ export { createEditTool, editTool } from "./edit.js";
export { createFindTool, type FindToolDetails, findTool } from "./find.js";
export { createGrepTool, type GrepToolDetails, grepTool } from "./grep.js";
export { createLsTool, type LsToolDetails, lsTool } from "./ls.js";
export { createReadTool, type ReadToolDetails, readTool } from "./read.js";
export { createReadTool, type ReadToolDetails, type ReadToolOptions, readTool } from "./read.js";
export type { TruncationResult } from "./truncate.js";
export { createWriteTool, writeTool } from "./write.js";
@ -13,7 +13,7 @@ import { createEditTool, editTool } from "./edit.js";
import { createFindTool, findTool } from "./find.js";
import { createGrepTool, grepTool } from "./grep.js";
import { createLsTool, lsTool } from "./ls.js";
import { createReadTool, readTool } from "./read.js";
import { createReadTool, type ReadToolOptions, readTool } from "./read.js";
import { createWriteTool, writeTool } from "./write.js";
/** Tool type (AgentTool from pi-ai) */
@ -38,26 +38,31 @@ export const allTools = {
export type ToolName = keyof typeof allTools;
export interface ToolsOptions {
/** Options for the read tool */
read?: ReadToolOptions;
}
/**
* Create coding tools configured for a specific working directory.
*/
export function createCodingTools(cwd: string): Tool[] {
return [createReadTool(cwd), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
return [createReadTool(cwd, options?.read), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
}
/**
* Create read-only tools configured for a specific working directory.
*/
export function createReadOnlyTools(cwd: string): Tool[] {
return [createReadTool(cwd), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
export function createReadOnlyTools(cwd: string, options?: ToolsOptions): Tool[] {
return [createReadTool(cwd, options?.read), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
}
/**
* Create all tools configured for a specific working directory.
*/
export function createAllTools(cwd: string): Record<ToolName, Tool> {
export function createAllTools(cwd: string, options?: ToolsOptions): Record<ToolName, Tool> {
return {
read: createReadTool(cwd),
read: createReadTool(cwd, options?.read),
bash: createBashTool(cwd),
edit: createEditTool(cwd),
write: createWriteTool(cwd),

View file

@ -3,6 +3,7 @@ import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
import { Type } from "@sinclair/typebox";
import { constants } from "fs";
import { access, readFile } from "fs/promises";
import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js";
import { resolveReadPath } from "./path-utils.js";
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";
@ -17,7 +18,13 @@ export interface ReadToolDetails {
truncation?: TruncationResult;
}
export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
export interface ReadToolOptions {
/** Whether to auto-resize images to 2000x2000 max. Default: true */
autoResizeImages?: boolean;
}
export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool<typeof readSchema> {
const autoResizeImages = options?.autoResizeImages ?? true;
return {
name: "read",
label: "read",
@ -72,10 +79,26 @@ export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
const buffer = await readFile(absolutePath);
const base64 = buffer.toString("base64");
content = [
{ type: "text", text: `Read image file [${mimeType}]` },
{ type: "image", data: base64, mimeType },
];
if (autoResizeImages) {
// Resize image if needed
const resized = await resizeImage({ type: "image", data: base64, mimeType });
const dimensionNote = formatDimensionNote(resized);
let textNote = `Read image file [${resized.mimeType}]`;
if (dimensionNote) {
textNote += `\n${dimensionNote}`;
}
content = [
{ type: "text", text: textNote },
{ type: "image", data: resized.data, mimeType: resized.mimeType },
];
} else {
content = [
{ type: "text", text: `Read image file [${mimeType}]` },
{ type: "image", data: base64, mimeType },
];
}
} else {
// Read as text
const textContent = await readFile(absolutePath, "utf-8");

View file

@ -120,6 +120,7 @@ export {
} from "./core/session-manager.js";
export {
type CompactionSettings,
type ImageSettings,
type RetrySettings,
type Settings,
SettingsManager,
@ -149,7 +150,9 @@ export {
type LsToolDetails,
lsTool,
type ReadToolDetails,
type ReadToolOptions,
readTool,
type ToolsOptions,
type TruncationResult,
writeTool,
} from "./core/tools/index.js";

View file

@ -119,7 +119,10 @@ async function runInteractiveMode(
}
}
async function prepareInitialMessage(parsed: Args): Promise<{
async function prepareInitialMessage(
parsed: Args,
autoResizeImages: boolean,
): Promise<{
initialMessage?: string;
initialImages?: ImageContent[];
}> {
@ -127,7 +130,7 @@ async function prepareInitialMessage(parsed: Args): Promise<{
return {};
}
const { text, images } = await processFileArguments(parsed.fileArgs);
const { text, images } = await processFileArguments(parsed.fileArgs, { autoResizeImages });
let initialMessage: string;
if (parsed.messages.length > 0) {
@ -329,13 +332,12 @@ export async function main(args: string[]) {
}
const cwd = process.cwd();
const { initialMessage, initialImages } = await prepareInitialMessage(parsed);
const settingsManager = SettingsManager.create(cwd);
time("SettingsManager.create");
const { initialMessage, initialImages } = await prepareInitialMessage(parsed, settingsManager.getImageAutoResize());
time("prepareInitialMessage");
const isInteractive = !parsed.print && parsed.mode === undefined;
const mode = parsed.mode || "text";
const settingsManager = SettingsManager.create(cwd);
time("SettingsManager.create");
initTheme(settingsManager.getTheme(), isInteractive);
time("initTheme");

View file

@ -0,0 +1,136 @@
import type { ImageContent } from "@mariozechner/pi-ai";
export interface ImageResizeOptions {
maxWidth?: number; // Default: 2000
maxHeight?: number; // Default: 2000
jpegQuality?: number; // Default: 80
}
export interface ResizedImage {
data: string; // base64
mimeType: string;
originalWidth: number;
originalHeight: number;
width: number;
height: number;
wasResized: boolean;
}
const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
maxWidth: 2000,
maxHeight: 2000,
jpegQuality: 80,
};
/**
* Resize an image to fit within the specified max dimensions.
* Returns the original image if it already fits within the limits.
*
* Uses sharp for image processing. If sharp is not available (e.g., in some
* environments), returns the original image unchanged.
*/
export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
const opts = { ...DEFAULT_OPTIONS, ...options };
const buffer = Buffer.from(img.data, "base64");
let sharp: typeof import("sharp") | undefined;
try {
sharp = (await import("sharp")).default;
} catch {
// Sharp not available - return original image
// We can't get dimensions without sharp, so return 0s
return {
data: img.data,
mimeType: img.mimeType,
originalWidth: 0,
originalHeight: 0,
width: 0,
height: 0,
wasResized: false,
};
}
const sharpImg = sharp(buffer);
const metadata = await sharpImg.metadata();
const width = metadata.width ?? 0;
const height = metadata.height ?? 0;
const format = metadata.format ?? img.mimeType?.split("/")[1] ?? "png";
// Check if already within limits
if (width <= opts.maxWidth && height <= opts.maxHeight) {
return {
data: img.data,
mimeType: img.mimeType ?? `image/${format}`,
originalWidth: width,
originalHeight: height,
width,
height,
wasResized: false,
};
}
// Calculate new dimensions maintaining aspect ratio
let newWidth = width;
let newHeight = height;
if (newWidth > opts.maxWidth) {
newHeight = Math.round((newHeight * opts.maxWidth) / newWidth);
newWidth = opts.maxWidth;
}
if (newHeight > opts.maxHeight) {
newWidth = Math.round((newWidth * opts.maxHeight) / newHeight);
newHeight = opts.maxHeight;
}
// Resize the image
const resized = await sharp(buffer)
.resize(newWidth, newHeight, { fit: "inside", withoutEnlargement: true })
.toBuffer();
// Determine output format - preserve original if possible, otherwise use JPEG
let outputMimeType: string;
let outputBuffer: Buffer;
if (format === "jpeg" || format === "jpg") {
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
outputMimeType = "image/jpeg";
} else if (format === "png") {
outputBuffer = resized;
outputMimeType = "image/png";
} else if (format === "gif") {
// GIF resize might not preserve animation; convert to PNG for quality
outputBuffer = resized;
outputMimeType = "image/png";
} else if (format === "webp") {
outputBuffer = resized;
outputMimeType = "image/webp";
} else {
// Default to JPEG for unknown formats
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
outputMimeType = "image/jpeg";
}
return {
data: outputBuffer.toString("base64"),
mimeType: outputMimeType,
originalWidth: width,
originalHeight: height,
width: newWidth,
height: newHeight,
wasResized: true,
};
}
/**
* Format a dimension note for resized images.
* This helps the model understand the coordinate mapping.
*/
export function formatDimensionNote(result: ResizedImage): string | undefined {
if (!result.wasResized) {
return undefined;
}
const scale = result.originalWidth / result.width;
return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${result.height}. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
}