WIP: Major cleanup - move Attachment to consumers, simplify agent API

- Removed Attachment from agent package (now in web-ui/coding-agent)
- Agent.prompt now takes (text, images?: ImageContent[])
- Removed transports from web-ui (duplicate of agent package)
- Updated coding-agent to use local message types
- Updated mom package for new agent API

Remaining: Fix AgentInterface.ts to compose UserMessageWithAttachments
This commit is contained in:
Mario Zechner 2025-12-28 10:55:12 +01:00
parent f86dea2e4f
commit 6ddc7418da
57 changed files with 167 additions and 1061 deletions

View file

@ -3,21 +3,21 @@
*/
import { access, readFile, stat } from "node:fs/promises";
import type { Attachment } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import chalk from "chalk";
import { resolve } from "path";
import { resolveReadPath } from "../core/tools/path-utils.js";
import { detectSupportedImageMimeTypeFromFile } from "../utils/mime.js";
export interface ProcessedFiles {
textContent: string;
imageAttachments: Attachment[];
text: string;
images: ImageContent[];
}
/** Process @file arguments into text content and image attachments */
export async function processFileArguments(fileArgs: string[]): Promise<ProcessedFiles> {
let textContent = "";
const imageAttachments: Attachment[] = [];
let text = "";
const images: ImageContent[] = [];
for (const fileArg of fileArgs) {
// Expand and resolve path (handles ~ expansion and macOS screenshot Unicode spaces)
@ -45,24 +45,21 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
const content = await readFile(absolutePath);
const base64Content = content.toString("base64");
const attachment: Attachment = {
id: `file-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`,
const attachment: ImageContent = {
type: "image",
fileName: absolutePath.split("/").pop() || absolutePath,
mimeType,
size: stats.size,
content: base64Content,
data: base64Content,
};
imageAttachments.push(attachment);
images.push(attachment);
// Add text reference to image
textContent += `<file name="${absolutePath}"></file>\n`;
text += `<file name="${absolutePath}"></file>\n`;
} else {
// Handle text file
try {
const content = await readFile(absolutePath, "utf-8");
textContent += `<file name="${absolutePath}">\n${content}\n</file>\n`;
text += `<file name="${absolutePath}">\n${content}\n</file>\n`;
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
console.error(chalk.red(`Error: Could not read file ${absolutePath}: ${message}`));
@ -71,5 +68,5 @@ export async function processFileArguments(fileArgs: string[]): Promise<Processe
}
}
return { textContent, imageAttachments };
return { text, images };
}

View file

@ -13,15 +13,8 @@
* Modes use this class and add their own I/O layer on top.
*/
import type {
Agent,
AgentEvent,
AgentMessage,
AgentState,
Attachment,
ThinkingLevel,
} from "@mariozechner/pi-agent-core";
import type { AssistantMessage, Message, Model, TextContent } from "@mariozechner/pi-ai";
import type { Agent, AgentEvent, AgentMessage, AgentState, ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, ImageContent, Message, Model, TextContent } from "@mariozechner/pi-ai";
import { isContextOverflow, modelsAreEqual, supportsXhigh } from "@mariozechner/pi-ai";
import { getAuthPath } from "../config.js";
import { type BashResult, executeBash as executeBashCommand } from "./bash-executor.js";
@ -83,8 +76,8 @@ export interface AgentSessionConfig {
export interface PromptOptions {
/** Whether to expand file-based slash commands (default: true) */
expandSlashCommands?: boolean;
/** Image/file attachments */
attachments?: Attachment[];
/** Image attachments */
images?: ImageContent[];
}
/** Result from cycleModel() */
@ -492,7 +485,7 @@ export class AgentSession {
// Expand file-based slash commands if requested
const expandedText = expandCommands ? expandSlashCommand(text, [...this._fileCommands]) : text;
await this.agent.prompt(expandedText, options?.attachments);
await this.agent.prompt(expandedText, options?.images);
await this.waitForRetry();
}

View file

@ -8,7 +8,7 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, Model, Usage } from "@mariozechner/pi-ai";
import { complete } from "@mariozechner/pi-ai";
import { messageTransformer } from "./messages.js";
import { convertToLlm } from "./messages.js";
import { type CompactionEntry, createSummaryMessage, type SessionEntry } from "./session-manager.js";
/**
@ -337,7 +337,7 @@ export async function generateSummary(
: SUMMARIZATION_PROMPT;
// Transform custom messages (like bashExecution) to LLM-compatible messages
const transformedMessages = messageTransformer(currentMessages);
const transformedMessages = convertToLlm(currentMessages);
const summarizationMessages = [
...transformedMessages,
@ -558,7 +558,7 @@ async function generateTurnPrefixSummary(
): Promise<string> {
const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
const transformedMessages = messageTransformer(messages);
const transformedMessages = convertToLlm(messages);
const summarizationMessages = [
...transformedMessages,
{

View file

@ -5,7 +5,7 @@
* They can provide custom rendering for tool calls and results in the TUI.
*/
import type { AgentTool, AgentToolResult, AgentToolUpdateCallback } from "@mariozechner/pi-ai";
import type { AgentTool, AgentToolResult, AgentToolUpdateCallback } from "@mariozechner/pi-agent-core";
import type { Component } from "@mariozechner/pi-tui";
import type { Static, TSchema } from "@sinclair/typebox";
import type { Theme } from "../../modes/interactive/theme/theme.js";

View file

@ -2,7 +2,7 @@
* Hook runner - executes hooks and manages their lifecycle.
*/
import type { Message } from "@mariozechner/pi-ai";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ModelRegistry } from "../model-registry.js";
import type { SessionManager } from "../session-manager.js";
import type { AppendEntryHandler, LoadedHook, SendMessageHandler } from "./loader.js";
@ -315,7 +315,7 @@ export class HookRunner {
*
* Note: Messages are already deep-copied by the caller (pi-ai preprocessor).
*/
async emitContext(messages: Message[]): Promise<Message[]> {
async emitContext(messages: AgentMessage[]): Promise<AgentMessage[]> {
const ctx = this.createContext();
let currentMessages = messages;

View file

@ -2,7 +2,7 @@
* Tool wrapper - wraps tools with hook callbacks for interception.
*/
import type { AgentTool, AgentToolUpdateCallback } from "@mariozechner/pi-ai";
import type { AgentTool, AgentToolUpdateCallback } from "@mariozechner/pi-agent-core";
import type { HookRunner } from "./runner.js";
import type { ToolCallEventResult, ToolResultEventResult } from "./types.js";

View file

@ -151,12 +151,11 @@ export type SessionEvent =
* Event data for context event.
* Fired before each LLM call, allowing hooks to modify context non-destructively.
* Original session messages are NOT modified - only the messages sent to the LLM are affected.
* Messages are already in LLM format (Message[], not AgentMessage[]).
*/
export interface ContextEvent {
type: "context";
/** Messages about to be sent to the LLM (deep copy, safe to modify) */
messages: Message[];
messages: AgentMessage[];
}
/**

View file

@ -6,11 +6,7 @@
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Message } from "@mariozechner/pi-ai";
// ============================================================================
// Custom Message Types
// ============================================================================
import type { ImageContent, Message, TextContent } from "@mariozechner/pi-ai";
/**
* Message type for bash executions via the ! command.
@ -26,8 +22,6 @@ export interface BashExecutionMessage {
timestamp: number;
}
import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
/**
* Message type for hook-injected messages via sendMessage().
* These are custom messages that hooks can inject into the conversation.
@ -41,36 +35,28 @@ export interface HookMessage<T = unknown> {
timestamp: number;
}
// Extend CustomMessages via declaration merging
// Extend CustomAgentMessages via declaration merging
declare module "@mariozechner/pi-agent-core" {
interface CustomMessages {
interface CustomAgentMessages {
bashExecution: BashExecutionMessage;
hookMessage: HookMessage;
}
}
// ============================================================================
// Type Guards
// ============================================================================
/**
* Type guard for BashExecutionMessage.
*/
export function isBashExecutionMessage(msg: AgentMessage | Message): msg is BashExecutionMessage {
return (msg as BashExecutionMessage).role === "bashExecution";
return msg.role === "bashExecution";
}
/**
* Type guard for HookAgentMessage.
* Type guard for HookMessage.
*/
export function isHookMessage(msg: AgentMessage | Message): msg is HookMessage {
return (msg as HookMessage).role === "hookMessage";
return msg.role === "hookMessage";
}
// ============================================================================
// Message Formatting
// ============================================================================
/**
* Convert a BashExecutionMessage to user message text for LLM context.
*/
@ -92,18 +78,15 @@ export function bashExecutionToText(msg: BashExecutionMessage): string {
return text;
}
// ============================================================================
// Message Transformer
// ============================================================================
/**
* Transform AgentMessages (including custom types) to LLM-compatible Messages.
*
* This is used by:
* - Agent's messageTransformer option (for prompt calls)
* - Agent's transormToLlm option (for prompt calls and queued messages)
* - Compaction's generateSummary (for summarization)
* - Custom hooks and tools
*/
export function messageTransformer(messages: AgentMessage[]): Message[] {
export function convertToLlm(messages: AgentMessage[]): Message[] {
return messages
.map((m): Message | null => {
if (isBashExecutionMessage(m)) {
@ -131,5 +114,5 @@ export function messageTransformer(messages: AgentMessage[]): Message[] {
// Filter out unknown message types
return null;
})
.filter((m): m is Message => m !== null);
.filter((m) => m !== null);
}

View file

@ -29,7 +29,7 @@
* ```
*/
import { Agent, ProviderTransport, type ThinkingLevel } from "@mariozechner/pi-agent-core";
import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import { join } from "path";
import { getAgentDir } from "../config.js";
@ -39,7 +39,7 @@ import { discoverAndLoadCustomTools, type LoadedCustomTool } from "./custom-tool
import type { CustomAgentTool } from "./custom-tools/types.js";
import { discoverAndLoadHooks, HookRunner, type LoadedHook, wrapToolsWithHooks } from "./hooks/index.js";
import type { HookFactory } from "./hooks/types.js";
import { messageTransformer } from "./messages.js";
import { convertToLlm } from "./messages.js";
import { ModelRegistry } from "./model-registry.js";
import { SessionManager } from "./session-manager.js";
import { type Settings, SettingsManager, type SkillsSettings } from "./settings-manager.js";
@ -588,26 +588,24 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
thinkingLevel,
tools: allToolsArray,
},
messageTransformer,
preprocessor: hookRunner
convertToLlm,
transformContext: hookRunner
? async (messages) => {
return hookRunner.emitContext(messages);
}
: undefined,
queueMode: settingsManager.getQueueMode(),
transport: new ProviderTransport({
getApiKey: async () => {
const currentModel = agent.state.model;
if (!currentModel) {
throw new Error("No model selected");
}
const key = await modelRegistry.getApiKey(currentModel);
if (!key) {
throw new Error(`No API key found for provider "${currentModel.provider}"`);
}
return key;
},
}),
getApiKey: async () => {
const currentModel = agent.state.model;
if (!currentModel) {
throw new Error("No model selected");
}
const key = await modelRegistry.getApiKey(currentModel);
if (!key) {
throw new Error(`No API key found for provider "${currentModel.provider}"`);
}
return key;
},
});
time("createAgent");

View file

@ -2,7 +2,7 @@ import { randomBytes } from "node:crypto";
import { createWriteStream } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { spawn } from "child_process";
import { getShellConfig, killProcessTree } from "../../utils/shell.js";

View file

@ -1,4 +1,4 @@
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import * as Diff from "diff";
import { constants } from "fs";

View file

@ -1,4 +1,4 @@
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { spawnSync } from "child_process";
import { existsSync } from "fs";

View file

@ -1,5 +1,5 @@
import { createInterface } from "node:readline";
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { spawn } from "child_process";
import { readFileSync, type Stats, statSync } from "fs";

View file

@ -1,5 +1,3 @@
import type { AgentTool } from "@mariozechner/pi-ai";
export { type BashToolDetails, bashTool, createBashTool } from "./bash.js";
export { createEditTool, editTool } from "./edit.js";
export { createFindTool, type FindToolDetails, findTool } from "./find.js";
@ -9,6 +7,7 @@ export { createReadTool, type ReadToolDetails, readTool } from "./read.js";
export type { TruncationResult } from "./truncate.js";
export { createWriteTool, writeTool } from "./write.js";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { bashTool, createBashTool } from "./bash.js";
import { createEditTool, editTool } from "./edit.js";
import { createFindTool, findTool } from "./find.js";

View file

@ -1,4 +1,4 @@
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { existsSync, readdirSync, statSync } from "fs";
import nodePath from "path";

View file

@ -1,4 +1,5 @@
import type { AgentTool, ImageContent, TextContent } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import type { ImageContent, TextContent } from "@mariozechner/pi-ai";
import { Type } from "@sinclair/typebox";
import { constants } from "fs";
import { access, readFile } from "fs/promises";

View file

@ -1,4 +1,4 @@
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { mkdir, writeFile } from "fs/promises";
import { dirname } from "path";

View file

@ -73,7 +73,7 @@ export {
isReadToolResult,
isWriteToolResult,
} from "./core/hooks/index.js";
export { messageTransformer } from "./core/messages.js";
export { convertToLlm } from "./core/messages.js";
export { ModelRegistry } from "./core/model-registry.js";
// SDK for programmatic usage
export {

View file

@ -5,8 +5,7 @@
* createAgentSession() options. The SDK does the heavy lifting.
*/
import type { Attachment } from "@mariozechner/pi-agent-core";
import { supportsXhigh } from "@mariozechner/pi-ai";
import { type ImageContent, supportsXhigh } from "@mariozechner/pi-ai";
import chalk from "chalk";
import { existsSync } from "fs";
import { join } from "path";
@ -64,7 +63,7 @@ async function runInteractiveMode(
customTools: LoadedCustomTool[],
setToolUIContext: (uiContext: HookUIContext, hasUI: boolean) => void,
initialMessage?: string,
initialAttachments?: Attachment[],
initialImages?: ImageContent[],
fdPath: string | null = null,
): Promise<void> {
const mode = new InteractiveMode(session, version, changelogMarkdown, customTools, setToolUIContext, fdPath);
@ -93,7 +92,7 @@ async function runInteractiveMode(
if (initialMessage) {
try {
await session.prompt(initialMessage, { attachments: initialAttachments });
await session.prompt(initialMessage, { images: initialImages });
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
mode.showError(errorMessage);
@ -122,25 +121,25 @@ async function runInteractiveMode(
async function prepareInitialMessage(parsed: Args): Promise<{
initialMessage?: string;
initialAttachments?: Attachment[];
initialImages?: ImageContent[];
}> {
if (parsed.fileArgs.length === 0) {
return {};
}
const { textContent, imageAttachments } = await processFileArguments(parsed.fileArgs);
const { text, images } = await processFileArguments(parsed.fileArgs);
let initialMessage: string;
if (parsed.messages.length > 0) {
initialMessage = textContent + parsed.messages[0];
initialMessage = text + parsed.messages[0];
parsed.messages.shift();
} else {
initialMessage = textContent;
initialMessage = text;
}
return {
initialMessage,
initialAttachments: imageAttachments.length > 0 ? imageAttachments : undefined,
initialImages: images.length > 0 ? images : undefined,
};
}
@ -330,7 +329,7 @@ export async function main(args: string[]) {
}
const cwd = process.cwd();
const { initialMessage, initialAttachments } = await prepareInitialMessage(parsed);
const { initialMessage, initialImages } = await prepareInitialMessage(parsed);
time("prepareInitialMessage");
const isInteractive = !parsed.print && parsed.mode === undefined;
const mode = parsed.mode || "text";
@ -438,11 +437,11 @@ export async function main(args: string[]) {
customToolsResult.tools,
customToolsResult.setUIContext,
initialMessage,
initialAttachments,
initialImages,
fdPath,
);
} else {
await runPrintMode(session, mode, parsed.messages, initialMessage, initialAttachments);
await runPrintMode(session, mode, parsed.messages, initialMessage, initialImages);
stopThemeWatcher();
if (process.stdout.writableLength > 0) {
await new Promise<void>((resolve) => process.stdout.once("drain", resolve));

View file

@ -6,8 +6,7 @@
* - `pi --mode json "prompt"` - JSON event stream
*/
import type { Attachment } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai";
import type { AssistantMessage, ImageContent } from "@mariozechner/pi-ai";
import type { AgentSession } from "../core/agent-session.js";
/**
@ -18,14 +17,14 @@ import type { AgentSession } from "../core/agent-session.js";
* @param mode Output mode: "text" for final response only, "json" for all events
* @param messages Array of prompts to send
* @param initialMessage Optional first message (may contain @file content)
* @param initialAttachments Optional attachments for the initial message
* @param initialImages Optional images for the initial message
*/
export async function runPrintMode(
session: AgentSession,
mode: "text" | "json",
messages: string[],
initialMessage?: string,
initialAttachments?: Attachment[],
initialImages?: ImageContent[],
): Promise<void> {
// Load entries once for session start events
const entries = session.sessionManager.getEntries();
@ -79,7 +78,7 @@ export async function runPrintMode(
// Send initial message with attachments
if (initialMessage) {
await session.prompt(initialMessage, { attachments: initialAttachments });
await session.prompt(initialMessage, { images: initialImages });
}
// Send remaining messages

View file

@ -6,7 +6,8 @@
import { type ChildProcess, spawn } from "node:child_process";
import * as readline from "node:readline";
import type { AgentEvent, AgentMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { AgentEvent, AgentMessage, ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import type { SessionStats } from "../../core/agent-session.js";
import type { BashResult } from "../../core/bash-executor.js";
import type { CompactionResult } from "../../core/compaction.js";
@ -167,8 +168,8 @@ export class RpcClient {
* Returns immediately after sending; use onEvent() to receive streaming events.
* Use waitForIdle() to wait for completion.
*/
async prompt(message: string, attachments?: Attachment[]): Promise<void> {
await this.send({ type: "prompt", message, attachments });
async prompt(message: string, images?: ImageContent[]): Promise<void> {
await this.send({ type: "prompt", message, images });
}
/**
@ -404,9 +405,9 @@ export class RpcClient {
/**
* Send prompt and wait for completion, returning all events.
*/
async promptAndWait(message: string, attachments?: Attachment[], timeout = 60000): Promise<AgentEvent[]> {
async promptAndWait(message: string, images?: ImageContent[], timeout = 60000): Promise<AgentEvent[]> {
const eventsPromise = this.collectEvents(timeout);
await this.prompt(message, attachments);
await this.prompt(message, images);
return eventsPromise;
}

View file

@ -187,7 +187,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
// Hook commands and file slash commands are handled in session.prompt()
session
.prompt(command.message, {
attachments: command.attachments,
images: command.images,
})
.catch((e) => output(error(id, "prompt", e.message)));
return success(id, "prompt");

View file

@ -5,8 +5,8 @@
* Responses and events are emitted as JSON lines on stdout.
*/
import type { AgentMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import type { AgentMessage, ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { ImageContent, Model } from "@mariozechner/pi-ai";
import type { SessionStats } from "../../core/agent-session.js";
import type { BashResult } from "../../core/bash-executor.js";
import type { CompactionResult } from "../../core/compaction.js";
@ -17,7 +17,7 @@ import type { CompactionResult } from "../../core/compaction.js";
export type RpcCommand =
// Prompting
| { id?: string; type: "prompt"; message: string; attachments?: Attachment[] }
| { id?: string; type: "prompt"; message: string; images?: ImageContent[] }
| { id?: string; type: "queue_message"; message: string }
| { id?: string; type: "abort" }
| { id?: string; type: "reset" }