mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 22:03:45 +00:00
Merge pull request #402 from mitsuhiko/image-resize
Added automatic image resizing
This commit is contained in:
commit
e82af9da47
10 changed files with 247 additions and 29 deletions
|
|
@ -7,6 +7,7 @@ import type { ImageContent } from "@mariozechner/pi-ai";
|
|||
import chalk from "chalk";
|
||||
import { resolve } from "path";
|
||||
import { resolveReadPath } from "../core/tools/path-utils.js";
|
||||
import { formatDimensionNote, resizeImage } from "../utils/image-resize.js";
|
||||
import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js";
|
||||
|
||||
export interface ProcessedFiles {
|
||||
|
|
@ -14,8 +15,14 @@ export interface ProcessedFiles {
|
|||
images: ImageContent[];
|
||||
}
|
||||
|
||||
export interface ProcessFileOptions {
|
||||
/** Whether to auto-resize images to 2000x2000 max. Default: true */
|
||||
autoResizeImages?: boolean;
|
||||
}
|
||||
|
||||
/** Process @file arguments into text content and image attachments */
|
||||
export async function processFileArguments(fileArgs: string[]): Promise<ProcessedFiles> {
|
||||
export async function processFileArguments(fileArgs: string[], options?: ProcessFileOptions): Promise<ProcessedFiles> {
|
||||
const autoResizeImages = options?.autoResizeImages ?? true;
|
||||
let text = "";
|
||||
const images: ImageContent[] = [];
|
||||
|
||||
|
|
@ -45,16 +52,33 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
|
|||
const content = await readFile(absolutePath);
|
||||
const base64Content = content.toString("base64");
|
||||
|
||||
const attachment: ImageContent = {
|
||||
type: "image",
|
||||
mimeType,
|
||||
data: base64Content,
|
||||
};
|
||||
let attachment: ImageContent;
|
||||
let dimensionNote: string | undefined;
|
||||
|
||||
if (autoResizeImages) {
|
||||
const resized = await resizeImage({ type: "image", data: base64Content, mimeType });
|
||||
dimensionNote = formatDimensionNote(resized);
|
||||
attachment = {
|
||||
type: "image",
|
||||
mimeType: resized.mimeType,
|
||||
data: resized.data,
|
||||
};
|
||||
} else {
|
||||
attachment = {
|
||||
type: "image",
|
||||
mimeType,
|
||||
data: base64Content,
|
||||
};
|
||||
}
|
||||
|
||||
images.push(attachment);
|
||||
|
||||
// Add text reference to image
|
||||
text += `<file name="${absolutePath}"></file>\n`;
|
||||
// Add text reference to image with optional dimension note
|
||||
if (dimensionNote) {
|
||||
text += `<file name="${absolutePath}">${dimensionNote}</file>\n`;
|
||||
} else {
|
||||
text += `<file name="${absolutePath}"></file>\n`;
|
||||
}
|
||||
} else {
|
||||
// Handle text file
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -528,7 +528,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|||
const contextFiles = options.contextFiles ?? discoverContextFiles(cwd, agentDir);
|
||||
time("discoverContextFiles");
|
||||
|
||||
const builtInTools = options.tools ?? createCodingTools(cwd);
|
||||
const autoResizeImages = settingsManager.getImageAutoResize();
|
||||
const builtInTools = options.tools ?? createCodingTools(cwd, { read: { autoResizeImages } });
|
||||
time("createCodingTools");
|
||||
|
||||
let customToolsResult: CustomToolsLoadResult;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,10 @@ export interface TerminalSettings {
|
|||
showImages?: boolean; // default: true (only relevant if terminal supports images)
|
||||
}
|
||||
|
||||
export interface ImageSettings {
|
||||
autoResize?: boolean; // default: true (resize images to 2000x2000 max for better model compatibility)
|
||||
}
|
||||
|
||||
export interface Settings {
|
||||
lastChangelogVersion?: string;
|
||||
defaultProvider?: string;
|
||||
|
|
@ -52,6 +56,7 @@ export interface Settings {
|
|||
customTools?: string[]; // Array of custom tool file paths
|
||||
skills?: SkillsSettings;
|
||||
terminal?: TerminalSettings;
|
||||
images?: ImageSettings;
|
||||
enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag)
|
||||
}
|
||||
|
||||
|
|
@ -390,6 +395,18 @@ export class SettingsManager {
|
|||
this.save();
|
||||
}
|
||||
|
||||
getImageAutoResize(): boolean {
|
||||
return this.settings.images?.autoResize ?? true;
|
||||
}
|
||||
|
||||
setImageAutoResize(enabled: boolean): void {
|
||||
if (!this.globalSettings.images) {
|
||||
this.globalSettings.images = {};
|
||||
}
|
||||
this.globalSettings.images.autoResize = enabled;
|
||||
this.save();
|
||||
}
|
||||
|
||||
getEnabledModels(): string[] | undefined {
|
||||
return this.settings.enabledModels;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ export { createEditTool, editTool } from "./edit.js";
|
|||
export { createFindTool, type FindToolDetails, findTool } from "./find.js";
|
||||
export { createGrepTool, type GrepToolDetails, grepTool } from "./grep.js";
|
||||
export { createLsTool, type LsToolDetails, lsTool } from "./ls.js";
|
||||
export { createReadTool, type ReadToolDetails, readTool } from "./read.js";
|
||||
export { createReadTool, type ReadToolDetails, type ReadToolOptions, readTool } from "./read.js";
|
||||
export type { TruncationResult } from "./truncate.js";
|
||||
export { createWriteTool, writeTool } from "./write.js";
|
||||
|
||||
|
|
@ -13,7 +13,7 @@ import { createEditTool, editTool } from "./edit.js";
|
|||
import { createFindTool, findTool } from "./find.js";
|
||||
import { createGrepTool, grepTool } from "./grep.js";
|
||||
import { createLsTool, lsTool } from "./ls.js";
|
||||
import { createReadTool, readTool } from "./read.js";
|
||||
import { createReadTool, type ReadToolOptions, readTool } from "./read.js";
|
||||
import { createWriteTool, writeTool } from "./write.js";
|
||||
|
||||
/** Tool type (AgentTool from pi-ai) */
|
||||
|
|
@ -38,26 +38,31 @@ export const allTools = {
|
|||
|
||||
export type ToolName = keyof typeof allTools;
|
||||
|
||||
export interface ToolsOptions {
|
||||
/** Options for the read tool */
|
||||
read?: ReadToolOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create coding tools configured for a specific working directory.
|
||||
*/
|
||||
export function createCodingTools(cwd: string): Tool[] {
|
||||
return [createReadTool(cwd), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
|
||||
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
|
||||
return [createReadTool(cwd, options?.read), createBashTool(cwd), createEditTool(cwd), createWriteTool(cwd)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create read-only tools configured for a specific working directory.
|
||||
*/
|
||||
export function createReadOnlyTools(cwd: string): Tool[] {
|
||||
return [createReadTool(cwd), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
|
||||
export function createReadOnlyTools(cwd: string, options?: ToolsOptions): Tool[] {
|
||||
return [createReadTool(cwd, options?.read), createGrepTool(cwd), createFindTool(cwd), createLsTool(cwd)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create all tools configured for a specific working directory.
|
||||
*/
|
||||
export function createAllTools(cwd: string): Record<ToolName, Tool> {
|
||||
export function createAllTools(cwd: string, options?: ToolsOptions): Record<ToolName, Tool> {
|
||||
return {
|
||||
read: createReadTool(cwd),
|
||||
read: createReadTool(cwd, options?.read),
|
||||
bash: createBashTool(cwd),
|
||||
edit: createEditTool(cwd),
|
||||
write: createWriteTool(cwd),
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import { constants } from "fs";
|
||||
import { access, readFile } from "fs/promises";
|
||||
import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js";
|
||||
import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js";
|
||||
import { resolveReadPath } from "./path-utils.js";
|
||||
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";
|
||||
|
|
@ -17,7 +18,13 @@ export interface ReadToolDetails {
|
|||
truncation?: TruncationResult;
|
||||
}
|
||||
|
||||
export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
|
||||
export interface ReadToolOptions {
|
||||
/** Whether to auto-resize images to 2000x2000 max. Default: true */
|
||||
autoResizeImages?: boolean;
|
||||
}
|
||||
|
||||
export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool<typeof readSchema> {
|
||||
const autoResizeImages = options?.autoResizeImages ?? true;
|
||||
return {
|
||||
name: "read",
|
||||
label: "read",
|
||||
|
|
@ -72,10 +79,26 @@ export function createReadTool(cwd: string): AgentTool<typeof readSchema> {
|
|||
const buffer = await readFile(absolutePath);
|
||||
const base64 = buffer.toString("base64");
|
||||
|
||||
content = [
|
||||
{ type: "text", text: `Read image file [${mimeType}]` },
|
||||
{ type: "image", data: base64, mimeType },
|
||||
];
|
||||
if (autoResizeImages) {
|
||||
// Resize image if needed
|
||||
const resized = await resizeImage({ type: "image", data: base64, mimeType });
|
||||
const dimensionNote = formatDimensionNote(resized);
|
||||
|
||||
let textNote = `Read image file [${resized.mimeType}]`;
|
||||
if (dimensionNote) {
|
||||
textNote += `\n${dimensionNote}`;
|
||||
}
|
||||
|
||||
content = [
|
||||
{ type: "text", text: textNote },
|
||||
{ type: "image", data: resized.data, mimeType: resized.mimeType },
|
||||
];
|
||||
} else {
|
||||
content = [
|
||||
{ type: "text", text: `Read image file [${mimeType}]` },
|
||||
{ type: "image", data: base64, mimeType },
|
||||
];
|
||||
}
|
||||
} else {
|
||||
// Read as text
|
||||
const textContent = await readFile(absolutePath, "utf-8");
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ export {
|
|||
} from "./core/session-manager.js";
|
||||
export {
|
||||
type CompactionSettings,
|
||||
type ImageSettings,
|
||||
type RetrySettings,
|
||||
type Settings,
|
||||
SettingsManager,
|
||||
|
|
@ -149,7 +150,9 @@ export {
|
|||
type LsToolDetails,
|
||||
lsTool,
|
||||
type ReadToolDetails,
|
||||
type ReadToolOptions,
|
||||
readTool,
|
||||
type ToolsOptions,
|
||||
type TruncationResult,
|
||||
writeTool,
|
||||
} from "./core/tools/index.js";
|
||||
|
|
|
|||
|
|
@ -119,7 +119,10 @@ async function runInteractiveMode(
|
|||
}
|
||||
}
|
||||
|
||||
async function prepareInitialMessage(parsed: Args): Promise<{
|
||||
async function prepareInitialMessage(
|
||||
parsed: Args,
|
||||
autoResizeImages: boolean,
|
||||
): Promise<{
|
||||
initialMessage?: string;
|
||||
initialImages?: ImageContent[];
|
||||
}> {
|
||||
|
|
@ -127,7 +130,7 @@ async function prepareInitialMessage(parsed: Args): Promise<{
|
|||
return {};
|
||||
}
|
||||
|
||||
const { text, images } = await processFileArguments(parsed.fileArgs);
|
||||
const { text, images } = await processFileArguments(parsed.fileArgs, { autoResizeImages });
|
||||
|
||||
let initialMessage: string;
|
||||
if (parsed.messages.length > 0) {
|
||||
|
|
@ -329,13 +332,12 @@ export async function main(args: string[]) {
|
|||
}
|
||||
|
||||
const cwd = process.cwd();
|
||||
const { initialMessage, initialImages } = await prepareInitialMessage(parsed);
|
||||
const settingsManager = SettingsManager.create(cwd);
|
||||
time("SettingsManager.create");
|
||||
const { initialMessage, initialImages } = await prepareInitialMessage(parsed, settingsManager.getImageAutoResize());
|
||||
time("prepareInitialMessage");
|
||||
const isInteractive = !parsed.print && parsed.mode === undefined;
|
||||
const mode = parsed.mode || "text";
|
||||
|
||||
const settingsManager = SettingsManager.create(cwd);
|
||||
time("SettingsManager.create");
|
||||
initTheme(settingsManager.getTheme(), isInteractive);
|
||||
time("initTheme");
|
||||
|
||||
|
|
|
|||
136
packages/coding-agent/src/utils/image-resize.ts
Normal file
136
packages/coding-agent/src/utils/image-resize.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
|
||||
export interface ImageResizeOptions {
|
||||
maxWidth?: number; // Default: 2000
|
||||
maxHeight?: number; // Default: 2000
|
||||
jpegQuality?: number; // Default: 80
|
||||
}
|
||||
|
||||
export interface ResizedImage {
|
||||
data: string; // base64
|
||||
mimeType: string;
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
width: number;
|
||||
height: number;
|
||||
wasResized: boolean;
|
||||
}
|
||||
|
||||
const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
|
||||
maxWidth: 2000,
|
||||
maxHeight: 2000,
|
||||
jpegQuality: 80,
|
||||
};
|
||||
|
||||
/**
|
||||
* Resize an image to fit within the specified max dimensions.
|
||||
* Returns the original image if it already fits within the limits.
|
||||
*
|
||||
* Uses sharp for image processing. If sharp is not available (e.g., in some
|
||||
* environments), returns the original image unchanged.
|
||||
*/
|
||||
export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
|
||||
const opts = { ...DEFAULT_OPTIONS, ...options };
|
||||
const buffer = Buffer.from(img.data, "base64");
|
||||
|
||||
let sharp: typeof import("sharp") | undefined;
|
||||
try {
|
||||
sharp = (await import("sharp")).default;
|
||||
} catch {
|
||||
// Sharp not available - return original image
|
||||
// We can't get dimensions without sharp, so return 0s
|
||||
return {
|
||||
data: img.data,
|
||||
mimeType: img.mimeType,
|
||||
originalWidth: 0,
|
||||
originalHeight: 0,
|
||||
width: 0,
|
||||
height: 0,
|
||||
wasResized: false,
|
||||
};
|
||||
}
|
||||
|
||||
const sharpImg = sharp(buffer);
|
||||
const metadata = await sharpImg.metadata();
|
||||
|
||||
const width = metadata.width ?? 0;
|
||||
const height = metadata.height ?? 0;
|
||||
const format = metadata.format ?? img.mimeType?.split("/")[1] ?? "png";
|
||||
|
||||
// Check if already within limits
|
||||
if (width <= opts.maxWidth && height <= opts.maxHeight) {
|
||||
return {
|
||||
data: img.data,
|
||||
mimeType: img.mimeType ?? `image/${format}`,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
width,
|
||||
height,
|
||||
wasResized: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Calculate new dimensions maintaining aspect ratio
|
||||
let newWidth = width;
|
||||
let newHeight = height;
|
||||
|
||||
if (newWidth > opts.maxWidth) {
|
||||
newHeight = Math.round((newHeight * opts.maxWidth) / newWidth);
|
||||
newWidth = opts.maxWidth;
|
||||
}
|
||||
if (newHeight > opts.maxHeight) {
|
||||
newWidth = Math.round((newWidth * opts.maxHeight) / newHeight);
|
||||
newHeight = opts.maxHeight;
|
||||
}
|
||||
|
||||
// Resize the image
|
||||
const resized = await sharp(buffer)
|
||||
.resize(newWidth, newHeight, { fit: "inside", withoutEnlargement: true })
|
||||
.toBuffer();
|
||||
|
||||
// Determine output format - preserve original if possible, otherwise use JPEG
|
||||
let outputMimeType: string;
|
||||
let outputBuffer: Buffer;
|
||||
|
||||
if (format === "jpeg" || format === "jpg") {
|
||||
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
|
||||
outputMimeType = "image/jpeg";
|
||||
} else if (format === "png") {
|
||||
outputBuffer = resized;
|
||||
outputMimeType = "image/png";
|
||||
} else if (format === "gif") {
|
||||
// GIF resize might not preserve animation; convert to PNG for quality
|
||||
outputBuffer = resized;
|
||||
outputMimeType = "image/png";
|
||||
} else if (format === "webp") {
|
||||
outputBuffer = resized;
|
||||
outputMimeType = "image/webp";
|
||||
} else {
|
||||
// Default to JPEG for unknown formats
|
||||
outputBuffer = await sharp(resized).jpeg({ quality: opts.jpegQuality }).toBuffer();
|
||||
outputMimeType = "image/jpeg";
|
||||
}
|
||||
|
||||
return {
|
||||
data: outputBuffer.toString("base64"),
|
||||
mimeType: outputMimeType,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
wasResized: true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a dimension note for resized images.
|
||||
* This helps the model understand the coordinate mapping.
|
||||
*/
|
||||
export function formatDimensionNote(result: ResizedImage): string | undefined {
|
||||
if (!result.wasResized) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const scale = result.originalWidth / result.width;
|
||||
return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${result.height}. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue