diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 528d1011..e50f0f28 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -5,6 +5,7 @@ ### Added - Session ID is now forwarded to LLM providers for session-based caching (used by OpenAI Codex for prompt caching). +- Added `blockImages` setting to prevent images from being sent to LLM providers ([#492](https://github.com/badlogic/pi-mono/pull/492) by [@jsinge97](https://github.com/jsinge97)) ### Fixed diff --git a/packages/coding-agent/src/core/sdk.ts b/packages/coding-agent/src/core/sdk.ts index ce11b71d..a95ba544 100644 --- a/packages/coding-agent/src/core/sdk.ts +++ b/packages/coding-agent/src/core/sdk.ts @@ -20,8 +20,8 @@ * ``` */ -import { Agent, type AgentTool, type ThinkingLevel } from "@mariozechner/pi-agent-core"; -import type { Model } from "@mariozechner/pi-ai"; +import { Agent, type AgentMessage, type AgentTool, type ThinkingLevel } from "@mariozechner/pi-agent-core"; +import type { Message, Model } from "@mariozechner/pi-ai"; import { join } from "path"; import { getAgentDir } from "../config.js"; import { AgentSession } from "./agent-session.js"; @@ -300,6 +300,7 @@ export function loadSettings(cwd?: string, agentDir?: string): Settings { extensions: manager.getExtensionPaths(), skills: manager.getSkillsSettings(), terminal: { showImages: manager.getShowImages() }, + images: { autoResize: manager.getImageAutoResize(), blockImages: manager.getBlockImages() }, }; } @@ -605,6 +606,43 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} const promptTemplates = options.promptTemplates ?? discoverPromptTemplates(cwd, agentDir); time("discoverPromptTemplates"); + // Create convertToLlm wrapper that filters images if blockImages is enabled (defense-in-depth) + const convertToLlmWithBlockImages = (messages: AgentMessage[]): Message[] => { + const converted = convertToLlm(messages); + // Check setting dynamically so mid-session changes take effect + if (!settingsManager.getBlockImages()) { + return converted; + } + // Filter out ImageContent from all messages, replacing with text placeholder + return converted.map((msg) => { + if (msg.role === "user" || msg.role === "toolResult") { + const content = msg.content; + if (Array.isArray(content)) { + const hasImages = content.some((c) => c.type === "image"); + if (hasImages) { + const filteredContent = content + .map((c) => + c.type === "image" ? { type: "text" as const, text: "Image reading is disabled." } : c, + ) + .filter( + (c, i, arr) => + // Dedupe consecutive "Image reading is disabled." texts + !( + c.type === "text" && + c.text === "Image reading is disabled." && + i > 0 && + arr[i - 1].type === "text" && + (arr[i - 1] as { type: "text"; text: string }).text === "Image reading is disabled." + ), + ); + return { ...msg, content: filteredContent }; + } + } + } + return msg; + }); + }; + agent = new Agent({ initialState: { systemPrompt, @@ -612,7 +650,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} thinkingLevel, tools: activeToolsArray, }, - convertToLlm, + convertToLlm: convertToLlmWithBlockImages, sessionId: sessionManager.getSessionId(), transformContext: extensionRunner ? async (messages) => { diff --git a/packages/coding-agent/src/core/settings-manager.ts b/packages/coding-agent/src/core/settings-manager.ts index 996a804e..e9f29b5d 100644 --- a/packages/coding-agent/src/core/settings-manager.ts +++ b/packages/coding-agent/src/core/settings-manager.ts @@ -36,6 +36,7 @@ export interface TerminalSettings { export interface ImageSettings { autoResize?: boolean; // default: true (resize images to 2000x2000 max for better model compatibility) + blockImages?: boolean; // default: false - when true, prevents all images from being sent to LLM providers } export interface Settings { @@ -170,23 +171,23 @@ export class SettingsManager { } private save(): void { - if (!this.persist || !this.settingsPath) return; + if (this.persist && this.settingsPath) { + try { + const dir = dirname(this.settingsPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } - try { - const dir = dirname(this.settingsPath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); + // Save only global settings (project settings are read-only) + writeFileSync(this.settingsPath, JSON.stringify(this.globalSettings, null, 2), "utf-8"); + } catch (error) { + console.error(`Warning: Could not save settings file: ${error}`); } - - // Save only global settings (project settings are read-only) - writeFileSync(this.settingsPath, JSON.stringify(this.globalSettings, null, 2), "utf-8"); - - // Re-merge project settings into active settings - const projectSettings = this.loadProjectSettings(); - this.settings = deepMergeSettings(this.globalSettings, projectSettings); - } catch (error) { - console.error(`Warning: Could not save settings file: ${error}`); } + + // Always re-merge to update active settings (needed for both file and inMemory modes) + const projectSettings = this.loadProjectSettings(); + this.settings = deepMergeSettings(this.globalSettings, projectSettings); } getLastChangelogVersion(): string | undefined { @@ -398,6 +399,18 @@ export class SettingsManager { this.save(); } + getBlockImages(): boolean { + return this.settings.images?.blockImages ?? false; + } + + setBlockImages(blocked: boolean): void { + if (!this.globalSettings.images) { + this.globalSettings.images = {}; + } + this.globalSettings.images.blockImages = blocked; + this.save(); + } + getEnabledModels(): string[] | undefined { return this.settings.enabledModels; } diff --git a/packages/coding-agent/src/modes/interactive/components/settings-selector.ts b/packages/coding-agent/src/modes/interactive/components/settings-selector.ts index a04f63cc..681742a4 100644 --- a/packages/coding-agent/src/modes/interactive/components/settings-selector.ts +++ b/packages/coding-agent/src/modes/interactive/components/settings-selector.ts @@ -25,6 +25,7 @@ export interface SettingsConfig { autoCompact: boolean; showImages: boolean; autoResizeImages: boolean; + blockImages: boolean; steeringMode: "all" | "one-at-a-time"; followUpMode: "all" | "one-at-a-time"; thinkingLevel: ThinkingLevel; @@ -40,6 +41,7 @@ export interface SettingsCallbacks { onAutoCompactChange: (enabled: boolean) => void; onShowImagesChange: (enabled: boolean) => void; onAutoResizeImagesChange: (enabled: boolean) => void; + onBlockImagesChange: (blocked: boolean) => void; onSteeringModeChange: (mode: "all" | "one-at-a-time") => void; onFollowUpModeChange: (mode: "all" | "one-at-a-time") => void; onThinkingLevelChange: (level: ThinkingLevel) => void; @@ -243,6 +245,16 @@ export class SettingsSelectorComponent extends Container { values: ["true", "false"], }); + // Block images toggle (always available, insert after auto-resize-images) + const autoResizeIndex = items.findIndex((item) => item.id === "auto-resize-images"); + items.splice(autoResizeIndex + 1, 0, { + id: "block-images", + label: "Block images", + description: "Prevent images from being sent to LLM providers", + currentValue: config.blockImages ? "true" : "false", + values: ["true", "false"], + }); + // Add borders this.addChild(new DynamicBorder()); @@ -261,6 +273,9 @@ export class SettingsSelectorComponent extends Container { case "auto-resize-images": callbacks.onAutoResizeImagesChange(newValue === "true"); break; + case "block-images": + callbacks.onBlockImagesChange(newValue === "true"); + break; case "steering-mode": callbacks.onSteeringModeChange(newValue as "all" | "one-at-a-time"); break; diff --git a/packages/coding-agent/src/modes/interactive/interactive-mode.ts b/packages/coding-agent/src/modes/interactive/interactive-mode.ts index 26d30573..a799ef6f 100644 --- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts @@ -1936,6 +1936,7 @@ export class InteractiveMode { autoCompact: this.session.autoCompactionEnabled, showImages: this.settingsManager.getShowImages(), autoResizeImages: this.settingsManager.getImageAutoResize(), + blockImages: this.settingsManager.getBlockImages(), steeringMode: this.session.steeringMode, followUpMode: this.session.followUpMode, thinkingLevel: this.session.thinkingLevel, @@ -1962,6 +1963,9 @@ export class InteractiveMode { onAutoResizeImagesChange: (enabled) => { this.settingsManager.setImageAutoResize(enabled); }, + onBlockImagesChange: (blocked) => { + this.settingsManager.setBlockImages(blocked); + }, onSteeringModeChange: (mode) => { this.session.setSteeringMode(mode); }, diff --git a/packages/coding-agent/test/block-images.test.ts b/packages/coding-agent/test/block-images.test.ts new file mode 100644 index 00000000..2c599276 --- /dev/null +++ b/packages/coding-agent/test/block-images.test.ts @@ -0,0 +1,120 @@ +import { mkdirSync, rmSync, writeFileSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { processFileArguments } from "../src/cli/file-processor.js"; +import { SettingsManager } from "../src/core/settings-manager.js"; +import { createReadTool } from "../src/core/tools/read.js"; + +// 1x1 red PNG image as base64 (smallest valid PNG) +const TINY_PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="; + +describe("blockImages setting", () => { + describe("SettingsManager", () => { + it("should default blockImages to false", () => { + const manager = SettingsManager.inMemory({}); + expect(manager.getBlockImages()).toBe(false); + }); + + it("should return true when blockImages is set to true", () => { + const manager = SettingsManager.inMemory({ images: { blockImages: true } }); + expect(manager.getBlockImages()).toBe(true); + }); + + it("should persist blockImages setting via setBlockImages", () => { + const manager = SettingsManager.inMemory({}); + expect(manager.getBlockImages()).toBe(false); + + manager.setBlockImages(true); + expect(manager.getBlockImages()).toBe(true); + + manager.setBlockImages(false); + expect(manager.getBlockImages()).toBe(false); + }); + + it("should handle blockImages alongside autoResize", () => { + const manager = SettingsManager.inMemory({ + images: { autoResize: true, blockImages: true }, + }); + expect(manager.getImageAutoResize()).toBe(true); + expect(manager.getBlockImages()).toBe(true); + }); + }); + + describe("Read tool", () => { + let testDir: string; + + beforeEach(() => { + testDir = join(tmpdir(), `block-images-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("should always read images (filtering happens at convertToLlm layer)", async () => { + // Create test image + const imagePath = join(testDir, "test.png"); + writeFileSync(imagePath, Buffer.from(TINY_PNG_BASE64, "base64")); + + const tool = createReadTool(testDir); + const result = await tool.execute("test-1", { path: imagePath }); + + // Should have text note + image content + expect(result.content.length).toBeGreaterThanOrEqual(1); + const hasImage = result.content.some((c) => c.type === "image"); + expect(hasImage).toBe(true); + }); + + it("should read text files normally", async () => { + // Create test text file + const textPath = join(testDir, "test.txt"); + writeFileSync(textPath, "Hello, world!"); + + const tool = createReadTool(testDir); + const result = await tool.execute("test-2", { path: textPath }); + + expect(result.content).toHaveLength(1); + expect(result.content[0].type).toBe("text"); + const textContent = result.content[0] as { type: "text"; text: string }; + expect(textContent.text).toContain("Hello, world!"); + }); + }); + + describe("processFileArguments", () => { + let testDir: string; + + beforeEach(() => { + testDir = join(tmpdir(), `block-images-process-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("should always process images (filtering happens at convertToLlm layer)", async () => { + // Create test image + const imagePath = join(testDir, "test.png"); + writeFileSync(imagePath, Buffer.from(TINY_PNG_BASE64, "base64")); + + const result = await processFileArguments([imagePath]); + + expect(result.images).toHaveLength(1); + expect(result.images[0].type).toBe("image"); + }); + + it("should process text files normally", async () => { + // Create test text file + const textPath = join(testDir, "test.txt"); + writeFileSync(textPath, "Hello, world!"); + + const result = await processFileArguments([textPath]); + + expect(result.images).toHaveLength(0); + expect(result.text).toContain("Hello, world!"); + }); + }); +});