mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 19:05:11 +00:00
Merge pull request #402 from mitsuhiko/image-resize
Added automatic image resizing
This commit is contained in:
commit
e82af9da47
10 changed files with 247 additions and 29 deletions
|
|
@ -290,6 +290,8 @@ You: What's in this screenshot? /path/to/image.png
|
|||
|
||||
Supported formats: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`
|
||||
|
||||
**Auto-resize:** Images larger than 2000x2000 pixels are automatically resized to fit within this limit for better compatibility with Anthropic models. The original dimensions are noted in the context so the model can map coordinates back if needed. Disable via `images.autoResize: false` in settings.
|
||||
|
||||
**Inline rendering:** On terminals that support the Kitty graphics protocol (Kitty, Ghostty, WezTerm) or iTerm2 inline images, images in tool output are rendered inline. On unsupported terminals, a text placeholder is shown instead.
|
||||
|
||||
Toggle inline images via `/settings` or set `terminal.showImages: false` in settings.
|
||||
|
|
@ -524,6 +526,9 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
|
|||
"terminal": {
|
||||
"showImages": true
|
||||
},
|
||||
"images": {
|
||||
"autoResize": true
|
||||
},
|
||||
"hooks": ["/path/to/hook.ts"],
|
||||
"customTools": ["/path/to/tool.ts"]
|
||||
}
|
||||
|
|
@ -549,6 +554,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
|
|||
| `retry.maxRetries` | Maximum retry attempts | `3` |
|
||||
| `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
|
||||
| `terminal.showImages` | Render images inline (supported terminals) | `true` |
|
||||
| `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
|
||||
| `hooks` | Additional hook file paths | `[]` |
|
||||
| `customTools` | Additional custom tool file paths | `[]` |
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@
|
|||
"file-type": "^21.1.1",
|
||||
"glob": "^11.0.3",
|
||||
"jiti": "^2.6.1",
|
||||
"marked": "^15.0.12"
|
||||
"marked": "^15.0.12",
|
||||
"sharp": "^0.34.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/diff": "^7.0.2",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import type { ImageContent } from "@mariozechner/pi-ai";
|
|||
import chalk from "chalk";
|
||||
import { resolve } from "path";
|
||||
import { resolveReadPath } from "../core/tools/path-utils.js";
|
||||
import { formatDimensionNote, resizeImage } from "../utils/image-resize.js";
|
||||
import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js";
|
||||
|
||||
export interface ProcessedFiles {
|
||||
|
|
@ -14,8 +15,14 @@ export interface ProcessedFiles {
|
|||
images: ImageContent[];
|
||||
}
|
||||
|
||||
export interface ProcessFileOptions {
|
||||
/** Whether to auto-resize images to 2000x2000 max. Default: true */
|
||||
autoResizeImages?: boolean;
|
||||
}
|
||||
|
||||
/** Process @file arguments into text content and image attachments */
|
||||
export async function processFileArguments(fileArgs: string[]): Promise<ProcessedFiles> {
|
||||
export async function processFileArguments(fileArgs: string[], options?: ProcessFileOptions): Promise<ProcessedFiles> {
|
||||
const autoResizeImages = options?.autoResizeImages ?? true;
|
||||
let text = "";
|
||||
const images: ImageContent[] = [];
|
||||
|
||||
|
|
@ -45,16 +52,33 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
|
|||
const content = await readFile(absolutePath);
|
||||
const base64Content = content.toString("base64");
|
||||
|
||||
const attachment: ImageContent = {
|
||||
type: "image",
|
||||
mimeType,
|
||||
data: base64Content,
|
||||
};
|
||||
let attachment: ImageContent;
|
||||
let dimensionNote: string | undefined;
|
||||
|
||||
if (autoResizeImages) {
|
||||
const resized = await resizeImage({ type: "image", data: base64Content, mimeType });
|
||||
dimensionNote = formatDimensionNote(resized);
|
||||
attachment = {
|
||||
type: "image",
|
||||
mimeType: resized.mimeType,
|
||||
data: resized.data,
|
||||
};
|
||||
} else {
|
||||
attachment = {
|
||||
type: "image",
|
||||
mimeType,
|
||||
data: base64Content,
|
||||
};
|
||||
}
|
||||
|
||||
images.push(attachment);
|
||||
|
||||
// Add text reference to image
|
||||
text += `<file name="${absolutePath}"></file>\n`;
|
||||
// Add text reference to image with optional dimension note
|
||||
if (dimensionNote) {
|
||||
text += `<file name="${absolutePath}">${dimensionNote}</file>\n`;
|
||||
} else {
|
||||
text += `<file name="${absolutePath}"></file>\n`;
|
||||
}
|
||||
} else {
|
||||
// Handle text file
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -528,7 +528,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|||
const contextFiles = options.contextFiles ?? discoverContextFiles(cwd, agentDir);
|
||||
time("discoverContextFiles");
|
||||
|
||||
const builtInTools = options.tools ?? createCodingTools(cwd);
|
||||
const autoResizeImages = settingsManager.getImageAutoResize();
|
||||
const builtInTools = options.tools ?? createCodingTools(cwd, { read: { autoResizeImages } });
|
||||
time("createCodingTools");
|
||||
|
||||
let customToolsResult: CustomToolsLoadResult;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,10 @@ export interface TerminalSettings {
|
|||
showImages?: boolean; // default: true (only relevant if terminal supports images)
|
||||
}
|
||||
|
||||
export interface ImageSettings {
|
||||
autoResize?: boolean; // default: true (resize images to 2000x2000 max for better model compatibility)
|
||||
}
|
||||
|
||||
export interface Settings {
|
||||
lastChangelogVersion?: string;
|
||||
defaultProvider?: string;
|
||||
|
|
@ -52,6 +56,7 @@ export interface Settings {
|
|||
customTools?: string[]; // Array of custom tool file paths
|
||||
skills?: SkillsSettings;
|
||||
terminal?: TerminalSettings;
|
||||
images?: ImageSettings;
|
||||
enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag)
|
||||
}
|
||||
|
||||
|
|
@ -390,6 +395,18 @@ export class SettingsManager {
|
|||
this.save();
|
||||
}
|
||||
|
||||
getImageAutoResize(): boolean {
|
||||
return this.settings.images?.autoResize ?? true;
|
||||
}
|
||||
|
||||
setImageAutoResize(enabled: boolean): void {
|
||||
if (!this.globalSettings.images) {
|
||||
this.globalSettings.images = {};
|
||||
}
|
||||
this.globalSettings.images.autoResize = enabled;
|
||||
this.save();
|
||||
}
|
||||
|
||||
getEnabledModels(): string[] | undefined {
|
||||
return this.settings.enabledModels;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ export { createEditTool, editTool } from "./edit.js";
|
|||
export { createFindTool, type FindToolDetails, findTool } from "./find.js";
|
||||
export { createGrepTool, type GrepToolDetails, grepTool } from "./grep.js";
|
||||
export { createLsTool, type LsToolDetails, lsTool } from "./ls.js";
|
||||
export { createReadTool, type ReadToolDetails, readTool } from "./read.js";
|
||||
export { createReadTool, type ReadToolDetails, type ReadToolOptions, readTool } from "./read.js";
|
||||
export type { TruncationResult } from "./truncate.js";
|
||||
export { createWriteTool, writeTool } from "./write.js";
|
||||
|
||||
|
|
@ -13,7 +13,7 @@ import { createEditTool, editTool } from "./edit.js";
|
|||
import { createFindTool, findTool } from "./find.js";
|
||||
import { createGrepTool, grepTool } from "./grep.js";
|
||||
import { createLsTool, lsTool } from "./ls.js";
|
||||
import { createReadTool, readTool } from "./read.js";
|
||||
import { createReadTool, type ReadToolOptions, readTool } from "./read.js";
|
||||
import { createWriteTool, writeTool } from "./write.js";
|
||||
|
||||
/** Tool type (AgentTool from pi-ai) */
|
||||
|
|
@ -38,26 +38,31 @@ export const allTools = {
|
|||
|
||||
export type ToolName = keyof typeof allTools;
|
||||
|
||||
export interface ToolsOptions {
|
||||
/** Options for the read tool */
|
||||
read?: ReadToolOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create coding tools configured for a specific working directory.
|
||||
*/
|
||||
export function createCodingTools(cwd: string): Tool[] {
|
||||
return [createReadTool(cwd), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
|
||||
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
|
||||
return [createReadTool(cwd, options?.read), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create read-only tools configured for a specific working directory.
|
||||
*/
|
||||
export function createReadOnlyTools(cwd: string): Tool[] {
|
||||
return [createReadTool(cwd), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
|
||||
export function createReadOnlyTools(cwd: string, options?: ToolsOptions): Tool[] {
|
||||
return [createReadTool(cwd, options?.read), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create all tools configured for a specific working directory.
|
||||
*/
|
||||
export function createAllTools(cwd: string): Record<ToolName, Tool> {
|
||||
export function createAllTools(cwd: string, options?: ToolsOptions): Record<ToolName, Tool> {
|
||||
return {
|
||||
read: createReadTool(cwd),
|
||||
read: createReadTool(cwd, options?.read),
|
||||
bash: createBashTool(cwd),
|
||||
edit: createEditTool(cwd),
|
||||
write: createWriteTool(cwd),
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import { constants } from "fs";
|
||||
import { access, readFile } from "fs/promises";
|
||||
import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js";
|
||||
import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js";
|
||||
import { resolveReadPath } from "./path-utils.js";
|
||||
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";
|
||||
|
|
@ -17,7 +18,13 @@ export interface ReadToolDetails {
|
|||
truncation?: TruncationResult;
|
||||
}
|
||||
|
||||
export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
|
||||
export interface ReadToolOptions {
|
||||
/** Whether to auto-resize images to 2000x2000 max. Default: true */
|
||||
autoResizeImages?: boolean;
|
||||
}
|
||||
|
||||
export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool<typeof readSchema> {
|
||||
const autoResizeImages = options?.autoResizeImages ?? true;
|
||||
return {
|
||||
name: "read",
|
||||
label: "read",
|
||||
|
|
@ -72,10 +79,26 @@ export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
|
|||
const buffer = await readFile(absolutePath);
|
||||
const base64 = buffer.toString("base64");
|
||||
|
||||
content = [
|
||||
{ type: "text", text: `Read image file [${mimeType}]` },
|
||||
{ type: "image", data: base64, mimeType },
|
||||
];
|
||||
if (autoResizeImages) {
|
||||
// Resize image if needed
|
||||
const resized = await resizeImage({ type: "image", data: base64, mimeType });
|
||||
const dimensionNote = formatDimensionNote(resized);
|
||||
|
||||
let textNote = `Read image file [${resized.mimeType}]`;
|
||||
if (dimensionNote) {
|
||||
textNote += `\n${dimensionNote}`;
|
||||
}
|
||||
|
||||
content = [
|
||||
{ type: "text", text: textNote },
|
||||
{ type: "image", data: resized.data, mimeType: resized.mimeType },
|
||||
];
|
||||
} else {
|
||||
content = [
|
||||
{ type: "text", text: `Read image file [${mimeType}]` },
|
||||
{ type: "image", data: base64, mimeType },
|
||||
];
|
||||
}
|
||||
} else {
|
||||
// Read as text
|
||||
const textContent = await readFile(absolutePath, "utf-8");
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ export {
|
|||
} from "./core/session-manager.js";
|
||||
export {
|
||||
type CompactionSettings,
|
||||
type ImageSettings,
|
||||
type RetrySettings,
|
||||
type Settings,
|
||||
SettingsManager,
|
||||
|
|
@ -149,7 +150,9 @@ export {
|
|||
type LsToolDetails,
|
||||
lsTool,
|
||||
type ReadToolDetails,
|
||||
type ReadToolOptions,
|
||||
readTool,
|
||||
type ToolsOptions,
|
||||
type TruncationResult,
|
||||
writeTool,
|
||||
} from "./core/tools/index.js";
|
||||
|
|
|
|||
|
|
@ -119,7 +119,10 @@ async function runInteractiveMode(
|
|||
}
|
||||
}
|
||||
|
||||
async function prepareInitialMessage(parsed: Args): Promise<{
|
||||
async function prepareInitialMessage(
|
||||
parsed: Args,
|
||||
autoResizeImages: boolean,
|
||||
): Promise<{
|
||||
initialMessage?: string;
|
||||
initialImages?: ImageContent[];
|
||||
}> {
|
||||
|
|
@ -127,7 +130,7 @@ async function prepareInitialMessage(parsed: Args): Promise<{
|
|||
return {};
|
||||
}
|
||||
|
||||
const { text, images } = await processFileArguments(parsed.fileArgs);
|
||||
const { text, images } = await processFileArguments(parsed.fileArgs, { autoResizeImages });
|
||||
|
||||
let initialMessage: string;
|
||||
if (parsed.messages.length > 0) {
|
||||
|
|
@ -329,13 +332,12 @@ export async function main(args: string[]) {
|
|||
}
|
||||
|
||||
const cwd = process.cwd();
|
||||
const { initialMessage, initialImages } = await prepareInitialMessage(parsed);
|
||||
const settingsManager = SettingsManager.create(cwd);
|
||||
time("SettingsManager.create");
|
||||
const { initialMessage, initialImages } = await prepareInitialMessage(parsed, settingsManager.getImageAutoResize());
|
||||
time("prepareInitialMessage");
|
||||
const isInteractive = !parsed.print && parsed.mode === undefined;
|
||||
const mode = parsed.mode || "text";
|
||||
|
||||
const settingsManager = SettingsManager.create(cwd);
|
||||
time("SettingsManager.create");
|
||||
initTheme(settingsManager.getTheme(), isInteractive);
|
||||
time("initTheme");
|
||||
|
||||
|
|
|
|||
136
packages/coding-agent/src/utils/image-resize.ts
Normal file
136
packages/coding-agent/src/utils/image-resize.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
|
||||
export interface ImageResizeOptions {
|
||||
maxWidth?: number; // Default: 2000
|
||||
maxHeight?: number; // Default: 2000
|
||||
jpegQuality?: number; // Default: 80
|
||||
}
|
||||
|
||||
export interface ResizedImage {
|
||||
data: string; // base64
|
||||
mimeType: string;
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
width: number;
|
||||
height: number;
|
||||
wasResized: boolean;
|
||||
}
|
||||
|
||||
const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
|
||||
maxWidth: 2000,
|
||||
maxHeight: 2000,
|
||||
jpegQuality: 80,
|
||||
};
|
||||
|
||||
/**
|
||||
* Resize an image to fit within the specified max dimensions.
|
||||
* Returns the original image if it already fits within the limits.
|
||||
*
|
||||
* Uses sharp for image processing. If sharp is not available (e.g., in some
|
||||
* environments), returns the original image unchanged.
|
||||
*/
|
||||
export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
|
||||
const opts = { ...DEFAULT_OPTIONS, ...options };
|
||||
const buffer = Buffer.from(img.data, "base64");
|
||||
|
||||
let sharp: typeof import("sharp") | undefined;
|
||||
try {
|
||||
sharp = (await import("sharp")).default;
|
||||
} catch {
|
||||
// Sharp not available - return original image
|
||||
// We can't get dimensions without sharp, so return 0s
|
||||
return {
|
||||
data: img.data,
|
||||
mimeType: img.mimeType,
|
||||
originalWidth: 0,
|
||||
originalHeight: 0,
|
||||
width: 0,
|
||||
height: 0,
|
||||
wasResized: false,
|
||||
};
|
||||
}
|
||||
|
||||
const sharpImg = sharp(buffer);
|
||||
const metadata = await sharpImg.metadata();
|
||||
|
||||
const width = metadata.width ?? 0;
|
||||
const height = metadata.height ?? 0;
|
||||
const format = metadata.format ?? img.mimeType?.split("/")[1] ?? "png";
|
||||
|
||||
// Check if already within limits
|
||||
if (width <= opts.maxWidth && height <= opts.maxHeight) {
|
||||
return {
|
||||
data: img.data,
|
||||
mimeType: img.mimeType ?? `image/${format}`,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
width,
|
||||
height,
|
||||
wasResized: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Calculate new dimensions maintaining aspect ratio
|
||||
let newWidth = width;
|
||||
let newHeight = height;
|
||||
|
||||
if (newWidth > opts.maxWidth) {
|
||||
newHeight = Math.round((newHeight * opts.maxWidth) / newWidth);
|
||||
newWidth = opts.maxWidth;
|
||||
}
|
||||
if (newHeight > opts.maxHeight) {
|
||||
newWidth = Math.round((newWidth * opts.maxHeight) / newHeight);
|
||||
newHeight = opts.maxHeight;
|
||||
}
|
||||
|
||||
// Resize the image
|
||||
const resized = await sharp(buffer)
|
||||
.resize(newWidth, newHeight, { fit: "inside", withoutEnlargement: true })
|
||||
.toBuffer();
|
||||
|
||||
// Determine output format - preserve original if possible, otherwise use JPEG
|
||||
let outputMimeType: string;
|
||||
let outputBuffer: Buffer;
|
||||
|
||||
if (format === "jpeg" || format === "jpg") {
|
||||
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
|
||||
outputMimeType = "image/jpeg";
|
||||
} else if (format === "png") {
|
||||
outputBuffer = resized;
|
||||
outputMimeType = "image/png";
|
||||
} else if (format === "gif") {
|
||||
// GIF resize might not preserve animation; convert to PNG for quality
|
||||
outputBuffer = resized;
|
||||
outputMimeType = "image/png";
|
||||
} else if (format === "webp") {
|
||||
outputBuffer = resized;
|
||||
outputMimeType = "image/webp";
|
||||
} else {
|
||||
// Default to JPEG for unknown formats
|
||||
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
|
||||
outputMimeType = "image/jpeg";
|
||||
}
|
||||
|
||||
return {
|
||||
data: outputBuffer.toString("base64"),
|
||||
mimeType: outputMimeType,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
wasResized: true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a dimension note for resized images.
|
||||
* This helps the model understand the coordinate mapping.
|
||||
*/
|
||||
export function formatDimensionNote(result: ResizedImage): string | undefined {
|
||||
if (!result.wasResized) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const scale = result.originalWidth / result.width;
|
||||
return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${result.height}. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue