fix(coding-agent): paste clipboard images on Wayland via wl-paste/xclip

This commit is contained in:
Zhou Rui 2026-01-06 09:37:01 +08:00 committed by Mario Zechner
parent 9063a71fe6
commit 538ae6a083
4 changed files with 303 additions and 9 deletions

View file

@ -2,6 +2,10 @@
## [Unreleased]
### Fixed
- Interactive mode: Ctrl+V clipboard image paste now works on Wayland sessions by using `wl-paste` with `xclip` fallback ([#488](https://github.com/badlogic/pi-mono/pull/488) by [@ghoulr](https://github.com/ghoulr))
## [0.37.2] - 2026-01-05
### Fixed

View file

@ -7,7 +7,6 @@ import * as crypto from "node:crypto";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import Clipboard from "@crosscopy/clipboard";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { type AssistantMessage, getOAuthProviders, type Message, type OAuthProvider } from "@mariozechner/pi-ai";
import type { KeyId, SlashCommand } from "@mariozechner/pi-tui";
@ -43,6 +42,7 @@ import { loadProjectContextFiles } from "../../core/system-prompt.js";
import type { TruncationResult } from "../../core/tools/truncate.js";
import { getChangelogPath, parseChangelog } from "../../utils/changelog.js";
import { copyToClipboard } from "../../utils/clipboard.js";
import { extensionForImageMimeType, readClipboardImage } from "../../utils/clipboard-image.js";
import { ArminComponent } from "./components/armin.js";
import { AssistantMessageComponent } from "./components/assistant-message.js";
import { BashExecutionComponent } from "./components/bash-execution.js";
@ -924,20 +924,17 @@ export class InteractiveMode {
private async handleClipboardImagePaste(): Promise<void> {
try {
if (!Clipboard.hasImage()) {
return;
}
const imageData = await Clipboard.getImageBinary();
if (!imageData || imageData.length === 0) {
const image = await readClipboardImage();
if (!image) {
return;
}
// Write to temp file
const tmpDir = os.tmpdir();
const fileName = `pi-clipboard-${crypto.randomUUID()}.png`;
const ext = extensionForImageMimeType(image.mimeType) ?? "png";
const fileName = `pi-clipboard-${crypto.randomUUID()}.${ext}`;
const filePath = path.join(tmpDir, fileName);
fs.writeFileSync(filePath, Buffer.from(imageData));
fs.writeFileSync(filePath, Buffer.from(image.bytes));
// Insert file path directly
this.editor.insertTextAtCursor(filePath);

View file

@ -0,0 +1,157 @@
import Clipboard from "@crosscopy/clipboard";
import { spawnSync } from "child_process";
export type ClipboardImage = {
bytes: Uint8Array;
mimeType: string;
};
const PREFERRED_IMAGE_MIME_TYPES = ["image/png", "image/jpeg", "image/webp", "image/gif"] as const;
const DEFAULT_LIST_TIMEOUT_MS = 1000;
const DEFAULT_READ_TIMEOUT_MS = 3000;
const DEFAULT_MAX_BUFFER_BYTES = 50 * 1024 * 1024;
export function isWaylandSession(env: NodeJS.ProcessEnv = process.env): boolean {
return Boolean(env.WAYLAND_DISPLAY) || env.XDG_SESSION_TYPE === "wayland";
}
function baseMimeType(mimeType: string): string {
return mimeType.split(";")[0]?.trim().toLowerCase() ?? mimeType.toLowerCase();
}
export function extensionForImageMimeType(mimeType: string): string | null {
switch (baseMimeType(mimeType)) {
case "image/png":
return "png";
case "image/jpeg":
return "jpg";
case "image/webp":
return "webp";
case "image/gif":
return "gif";
default:
return null;
}
}
function selectPreferredImageMimeType(mimeTypes: string[]): string | null {
const normalized = mimeTypes
.map((t) => t.trim())
.filter(Boolean)
.map((t) => ({ raw: t, base: baseMimeType(t) }));
for (const preferred of PREFERRED_IMAGE_MIME_TYPES) {
const match = normalized.find((t) => t.base === preferred);
if (match) {
return match.raw;
}
}
const anyImage = normalized.find((t) => t.base.startsWith("image/"));
return anyImage?.raw ?? null;
}
function runCommand(
command: string,
args: string[],
options?: { timeoutMs?: number; maxBufferBytes?: number },
): { stdout: Buffer; ok: boolean } {
const timeoutMs = options?.timeoutMs ?? DEFAULT_READ_TIMEOUT_MS;
const maxBufferBytes = options?.maxBufferBytes ?? DEFAULT_MAX_BUFFER_BYTES;
const result = spawnSync(command, args, {
timeout: timeoutMs,
maxBuffer: maxBufferBytes,
});
if (result.error) {
return { ok: false, stdout: Buffer.alloc(0) };
}
if (result.status !== 0) {
return { ok: false, stdout: Buffer.alloc(0) };
}
const stdout = Buffer.isBuffer(result.stdout)
? result.stdout
: Buffer.from(result.stdout ?? "", typeof result.stdout === "string" ? "utf-8" : undefined);
return { ok: true, stdout };
}
function readClipboardImageViaWlPaste(): ClipboardImage | null {
const list = runCommand("wl-paste", ["--list-types"], { timeoutMs: DEFAULT_LIST_TIMEOUT_MS });
if (!list.ok) {
return null;
}
const types = list.stdout
.toString("utf-8")
.split(/\r?\n/)
.map((t) => t.trim())
.filter(Boolean);
const selectedType = selectPreferredImageMimeType(types);
if (!selectedType) {
return null;
}
const data = runCommand("wl-paste", ["--type", selectedType, "--no-newline"]);
if (!data.ok || data.stdout.length === 0) {
return null;
}
return { bytes: data.stdout, mimeType: baseMimeType(selectedType) };
}
function readClipboardImageViaXclip(): ClipboardImage | null {
const targets = runCommand("xclip", ["-selection", "clipboard", "-t", "TARGETS", "-o"], {
timeoutMs: DEFAULT_LIST_TIMEOUT_MS,
});
let candidateTypes: string[] = [];
if (targets.ok) {
candidateTypes = targets.stdout
.toString("utf-8")
.split(/\r?\n/)
.map((t) => t.trim())
.filter(Boolean);
}
const preferred = candidateTypes.length > 0 ? selectPreferredImageMimeType(candidateTypes) : null;
const tryTypes = preferred ? [preferred, ...PREFERRED_IMAGE_MIME_TYPES] : [...PREFERRED_IMAGE_MIME_TYPES];
for (const mimeType of tryTypes) {
const data = runCommand("xclip", ["-selection", "clipboard", "-t", mimeType, "-o"]);
if (data.ok && data.stdout.length > 0) {
return { bytes: data.stdout, mimeType: baseMimeType(mimeType) };
}
}
return null;
}
export async function readClipboardImage(options?: {
env?: NodeJS.ProcessEnv;
platform?: NodeJS.Platform;
}): Promise<ClipboardImage | null> {
const env = options?.env ?? process.env;
const platform = options?.platform ?? process.platform;
if (platform === "linux" && isWaylandSession(env)) {
return readClipboardImageViaWlPaste() ?? readClipboardImageViaXclip();
}
if (!Clipboard.hasImage()) {
return null;
}
const imageData = await Clipboard.getImageBinary();
if (!imageData || imageData.length === 0) {
return null;
}
const bytes = imageData instanceof Uint8Array ? imageData : Uint8Array.from(imageData);
return { bytes, mimeType: "image/png" };
}

View file

@ -0,0 +1,136 @@
import type { SpawnSyncReturns } from "child_process";
import { beforeEach, describe, expect, test, vi } from "vitest";
const mocks = vi.hoisted(() => {
return {
spawnSync: vi.fn<(command: string, args: string[], options: unknown) => SpawnSyncReturns<Buffer>>(),
clipboard: {
hasImage: vi.fn<() => boolean>(),
getImageBinary: vi.fn<() => Promise<Uint8Array | null>>(),
},
};
});
vi.mock("child_process", () => {
return {
spawnSync: mocks.spawnSync,
};
});
vi.mock("@crosscopy/clipboard", () => {
return {
default: mocks.clipboard,
};
});
function spawnOk(stdout: Buffer): SpawnSyncReturns<Buffer> {
return {
pid: 123,
output: [Buffer.alloc(0), stdout, Buffer.alloc(0)],
stdout,
stderr: Buffer.alloc(0),
status: 0,
signal: null,
};
}
function spawnError(error: Error): SpawnSyncReturns<Buffer> {
return {
pid: 123,
output: [Buffer.alloc(0), Buffer.alloc(0), Buffer.alloc(0)],
stdout: Buffer.alloc(0),
stderr: Buffer.alloc(0),
status: null,
signal: null,
error,
};
}
describe("readClipboardImage", () => {
beforeEach(() => {
vi.resetModules();
mocks.spawnSync.mockReset();
mocks.clipboard.hasImage.mockReset();
mocks.clipboard.getImageBinary.mockReset();
});
test("Wayland: uses wl-paste and never calls @crosscopy/clipboard", async () => {
mocks.clipboard.hasImage.mockImplementation(() => {
throw new Error("clipboard.hasImage should not be called on Wayland");
});
mocks.spawnSync.mockImplementation((command, args, _options) => {
if (command === "wl-paste" && args[0] === "--list-types") {
return spawnOk(Buffer.from("text/plain\nimage/png\n", "utf-8"));
}
if (command === "wl-paste" && args[0] === "--type") {
return spawnOk(Buffer.from([1, 2, 3]));
}
throw new Error(`Unexpected spawnSync call: ${command} ${args.join(" ")}`);
});
const { readClipboardImage } = await import("../src/utils/clipboard-image.js");
const result = await readClipboardImage({ platform: "linux", env: { WAYLAND_DISPLAY: "1" } });
expect(result).not.toBeNull();
expect(result?.mimeType).toBe("image/png");
expect(Array.from(result?.bytes ?? [])).toEqual([1, 2, 3]);
});
test("Wayland: falls back to xclip when wl-paste is missing", async () => {
mocks.clipboard.hasImage.mockImplementation(() => {
throw new Error("clipboard.hasImage should not be called on Wayland");
});
const enoent = new Error("spawn ENOENT");
(enoent as { code?: string }).code = "ENOENT";
mocks.spawnSync.mockImplementation((command, args, _options) => {
if (command === "wl-paste") {
return spawnError(enoent);
}
if (command === "xclip" && args.includes("TARGETS")) {
return spawnOk(Buffer.from("image/png\n", "utf-8"));
}
if (command === "xclip" && args.includes("image/png")) {
return spawnOk(Buffer.from([9, 8]));
}
return spawnOk(Buffer.alloc(0));
});
const { readClipboardImage } = await import("../src/utils/clipboard-image.js");
const result = await readClipboardImage({ platform: "linux", env: { XDG_SESSION_TYPE: "wayland" } });
expect(result).not.toBeNull();
expect(result?.mimeType).toBe("image/png");
expect(Array.from(result?.bytes ?? [])).toEqual([9, 8]);
});
test("Non-Wayland: uses @crosscopy/clipboard", async () => {
mocks.spawnSync.mockImplementation(() => {
throw new Error("spawnSync should not be called for non-Wayland sessions");
});
mocks.clipboard.hasImage.mockReturnValue(true);
mocks.clipboard.getImageBinary.mockResolvedValue(new Uint8Array([7]));
const { readClipboardImage } = await import("../src/utils/clipboard-image.js");
const result = await readClipboardImage({ platform: "linux", env: {} });
expect(result).not.toBeNull();
expect(result?.mimeType).toBe("image/png");
expect(Array.from(result?.bytes ?? [])).toEqual([7]);
});
test("Non-Wayland: returns null when clipboard has no image", async () => {
mocks.spawnSync.mockImplementation(() => {
throw new Error("spawnSync should not be called for non-Wayland sessions");
});
mocks.clipboard.hasImage.mockReturnValue(false);
const { readClipboardImage } = await import("../src/utils/clipboard-image.js");
const result = await readClipboardImage({ platform: "linux", env: {} });
expect(result).toBeNull();
});
});