feat(edit): add fuzzy matching for trailing whitespace, quotes, dashes, and spaces (#713)

2026-04-15 13:03:42 +00:00 · 2026-01-14 12:22:00 +03:00 · 2026-01-14 12:22:00 +03:00 · 0c135d0141
commit 0c135d0141
parent c85a5720f2
4 changed files with 279 additions and 25 deletions
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -2,6 +2,10 @@

 ## [Unreleased]

+### Added
+
+- Edit tool now uses fuzzy matching as fallback when exact match fails, tolerating trailing whitespace, smart quotes, Unicode dashes, and special spaces
+
 ## [0.45.7] - 2026-01-13

 ### Added
--- a/packages/coding-agent/src/core/tools/edit-diff.ts
+++ b/packages/coding-agent/src/core/tools/edit-diff.ts
@ -24,6 +24,97 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
 	return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text;
 }

+/**
+ * Normalize text for fuzzy matching. Applies progressive transformations:
+ * - Strip trailing whitespace from each line
+ * - Normalize smart quotes to ASCII equivalents
+ * - Normalize Unicode dashes/hyphens to ASCII hyphen
+ * - Normalize special Unicode spaces to regular space
+ */
+export function normalizeForFuzzyMatch(text: string): string {
+	return (
+		text
+			// Strip trailing whitespace per line
+			.split("\n")
+			.map((line) => line.trimEnd())
+			.join("\n")
+			// Smart single quotes → '
+			.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
+			// Smart double quotes → "
+			.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
+			// Various dashes/hyphens → -
+			// U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash,
+			// U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus
+			.replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-")
+			// Special spaces → regular space
+			// U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP,
+			// U+205F medium math space, U+3000 ideographic space
+			.replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ")
+	);
+}
+
+export interface FuzzyMatchResult {
+	/** Whether a match was found */
+	found: boolean;
+	/** The index where the match starts (in the content that should be used for replacement) */
+	index: number;
+	/** Length of the matched text */
+	matchLength: number;
+	/** Whether fuzzy matching was used (false = exact match) */
+	usedFuzzyMatch: boolean;
+	/**
+	 * The content to use for replacement operations.
+	 * When exact match: original content. When fuzzy match: normalized content.
+	 */
+	contentForReplacement: string;
+}
+
+/**
+ * Find oldText in content, trying exact match first, then fuzzy match.
+ * When fuzzy matching is used, the returned contentForReplacement is the
+ * fuzzy-normalized version of the content (trailing whitespace stripped,
+ * Unicode quotes/dashes normalized to ASCII).
+ */
+export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
+	// Try exact match first
+	const exactIndex = content.indexOf(oldText);
+	if (exactIndex !== -1) {
+		return {
+			found: true,
+			index: exactIndex,
+			matchLength: oldText.length,
+			usedFuzzyMatch: false,
+			contentForReplacement: content,
+		};
+	}
+
+	// Try fuzzy match - work entirely in normalized space
+	const fuzzyContent = normalizeForFuzzyMatch(content);
+	const fuzzyOldText = normalizeForFuzzyMatch(oldText);
+	const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText);
+
+	if (fuzzyIndex === -1) {
+		return {
+			found: false,
+			index: -1,
+			matchLength: 0,
+			usedFuzzyMatch: false,
+			contentForReplacement: content,
+		};
+	}
+
+	// When fuzzy matching, we work in the normalized space for replacement.
+	// This means the output will have normalized whitespace/quotes/dashes,
+	// which is acceptable since we're fixing minor formatting differences anyway.
+	return {
+		found: true,
+		index: fuzzyIndex,
+		matchLength: fuzzyOldText.length,
+		usedFuzzyMatch: true,
+		contentForReplacement: fuzzyContent,
+	};
+}
+
 /** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
 export function stripBom(content: string): { bom: string; text: string } {
 	return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content };
@ -174,37 +265,43 @@ export async function computeEditDiff(
 		const normalizedOldText = normalizeToLF(oldText);
 		const normalizedNewText = normalizeToLF(newText);

-		// Check if old text exists
-		if (!normalizedContent.includes(normalizedOldText)) {
+		// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
+		const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
+
+		if (!matchResult.found) {
 			return {
 				error: `Could not find the exact text in ${path}. The old text must match exactly including all whitespace and newlines.`,
 			};
 		}

-		// Count occurrences
-		const occurrences = normalizedContent.split(normalizedOldText).length - 1;
+		// Count occurrences using fuzzy-normalized content for consistency
+		const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
+		const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
+		const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
+
 		if (occurrences > 1) {
 			return {
 				error: `Found ${occurrences} occurrences of the text in ${path}. The text must be unique. Please provide more context to make it unique.`,
 			};
 		}

-		// Compute the new content
-		const index = normalizedContent.indexOf(normalizedOldText);
-		const normalizedNewContent =
-			normalizedContent.substring(0, index) +
+		// Compute the new content using the matched position
+		// When fuzzy matching was used, contentForReplacement is the normalized version
+		const baseContent = matchResult.contentForReplacement;
+		const newContent =
+			baseContent.substring(0, matchResult.index) +
 			normalizedNewText +
-			normalizedContent.substring(index + normalizedOldText.length);
+			baseContent.substring(matchResult.index + matchResult.matchLength);

 		// Check if it would actually change anything
-		if (normalizedContent === normalizedNewContent) {
+		if (baseContent === newContent) {
 			return {
 				error: `No changes would be made to ${path}. The replacement produces identical content.`,
 			};
 		}

 		// Generate the diff
-		return generateDiffString(normalizedContent, normalizedNewContent);
+		return generateDiffString(baseContent, newContent);
 	} catch (err) {
 		return { error: err instanceof Error ? err.message : String(err) };
 	}
--- a/packages/coding-agent/src/core/tools/edit.ts
+++ b/packages/coding-agent/src/core/tools/edit.ts
@ -2,7 +2,15 @@ import type { AgentTool } from "@mariozechner/pi-agent-core";
 import { Type } from "@sinclair/typebox";
 import { constants } from "fs";
 import { access as fsAccess, readFile as fsReadFile, writeFile as fsWriteFile } from "fs/promises";
-import { detectLineEnding, generateDiffString, normalizeToLF, restoreLineEndings, stripBom } from "./edit-diff.js";
+import {
+	detectLineEnding,
+	fuzzyFindText,
+	generateDiffString,
+	normalizeForFuzzyMatch,
+	normalizeToLF,
+	restoreLineEndings,
+	stripBom,
+} from "./edit-diff.js";
 import { resolveToCwd } from "./path-utils.js";

 const editSchema = Type.Object({
@ -116,8 +124,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
 						const normalizedOldText = normalizeToLF(oldText);
 						const normalizedNewText = normalizeToLF(newText);

-						// Check if old text exists
-						if (!normalizedContent.includes(normalizedOldText)) {
+						// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
+						const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
+
+						if (!matchResult.found) {
 							if (signal) {
 								signal.removeEventListener("abort", onAbort);
 							}
@ -129,8 +139,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
 							return;
 						}

-						// Count occurrences
-						const occurrences = normalizedContent.split(normalizedOldText).length - 1;
+						// Count occurrences using fuzzy-normalized content for consistency
+						const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
+						const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
+						const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;

 						if (occurrences > 1) {
 							if (signal) {
@ -149,16 +161,16 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
 							return;
 						}

-						// Perform replacement using indexOf + substring (raw string replace, no special character interpretation)
-						// String.replace() interprets $ in the replacement string, so we do manual replacement
-						const index = normalizedContent.indexOf(normalizedOldText);
-						const normalizedNewContent =
-							normalizedContent.substring(0, index) +
+						// Perform replacement using the matched text position
+						// When fuzzy matching was used, contentForReplacement is the normalized version
+						const baseContent = matchResult.contentForReplacement;
+						const newContent =
+							baseContent.substring(0, matchResult.index) +
 							normalizedNewText +
-							normalizedContent.substring(index + normalizedOldText.length);
+							baseContent.substring(matchResult.index + matchResult.matchLength);

 						// Verify the replacement actually changed something
-						if (normalizedContent === normalizedNewContent) {
+						if (baseContent === newContent) {
 							if (signal) {
 								signal.removeEventListener("abort", onAbort);
 							}
@ -170,7 +182,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
 							return;
 						}

-						const finalContent = bom + restoreLineEndings(normalizedNewContent, originalEnding);
+						const finalContent = bom + restoreLineEndings(newContent, originalEnding);
 						await ops.writeFile(absolutePath, finalContent);

 						// Check if aborted after writing
@ -183,7 +195,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
 							signal.removeEventListener("abort", onAbort);
 						}

-						const diffResult = generateDiffString(normalizedContent, normalizedNewContent);
+						const diffResult = generateDiffString(baseContent, newContent);
 						resolve({
 							content: [
 								{
--- a/packages/coding-agent/test/tools.test.ts
+++ b/packages/coding-agent/test/tools.test.ts
@ -388,6 +388,147 @@ describe("Coding Agent Tools", () => {
 	});
 });

+describe("edit tool fuzzy matching", () => {
+	let testDir: string;
+
+	beforeEach(() => {
+		testDir = join(tmpdir(), `coding-agent-fuzzy-test-${Date.now()}`);
+		mkdirSync(testDir, { recursive: true });
+	});
+
+	afterEach(() => {
+		rmSync(testDir, { recursive: true, force: true });
+	});
+
+	it("should match text with trailing whitespace stripped", async () => {
+		const testFile = join(testDir, "trailing-ws.txt");
+		// File has trailing spaces on lines
+		writeFileSync(testFile, "line one   \nline two  \nline three\n");
+
+		// oldText without trailing whitespace should still match
+		const result = await editTool.execute("test-fuzzy-1", {
+			path: testFile,
+			oldText: "line one\nline two\n",
+			newText: "replaced\n",
+		});
+
+		expect(getTextOutput(result)).toContain("Successfully replaced");
+		const content = readFileSync(testFile, "utf-8");
+		expect(content).toBe("replaced\nline three\n");
+	});
+
+	it("should match smart single quotes to ASCII quotes", async () => {
+		const testFile = join(testDir, "smart-quotes.txt");
+		// File has smart/curly single quotes (U+2018, U+2019)
+		writeFileSync(testFile, "console.log(\u2018hello\u2019);\n");
+
+		// oldText with ASCII quotes should match
+		const result = await editTool.execute("test-fuzzy-2", {
+			path: testFile,
+			oldText: "console.log('hello');",
+			newText: "console.log('world');",
+		});
+
+		expect(getTextOutput(result)).toContain("Successfully replaced");
+		const content = readFileSync(testFile, "utf-8");
+		expect(content).toContain("world");
+	});
+
+	it("should match smart double quotes to ASCII quotes", async () => {
+		const testFile = join(testDir, "smart-double-quotes.txt");
+		// File has smart/curly double quotes (U+201C, U+201D)
+		writeFileSync(testFile, "const msg = \u201CHello World\u201D;\n");
+
+		// oldText with ASCII quotes should match
+		const result = await editTool.execute("test-fuzzy-3", {
+			path: testFile,
+			oldText: 'const msg = "Hello World";',
+			newText: 'const msg = "Goodbye";',
+		});
+
+		expect(getTextOutput(result)).toContain("Successfully replaced");
+		const content = readFileSync(testFile, "utf-8");
+		expect(content).toContain("Goodbye");
+	});
+
+	it("should match Unicode dashes to ASCII hyphen", async () => {
+		const testFile = join(testDir, "unicode-dashes.txt");
+		// File has en-dash (U+2013) and em-dash (U+2014)
+		writeFileSync(testFile, "range: 1\u20135\nbreak\u2014here\n");
+
+		// oldText with ASCII hyphens should match
+		const result = await editTool.execute("test-fuzzy-4", {
+			path: testFile,
+			oldText: "range: 1-5\nbreak-here",
+			newText: "range: 10-50\nbreak--here",
+		});
+
+		expect(getTextOutput(result)).toContain("Successfully replaced");
+		const content = readFileSync(testFile, "utf-8");
+		expect(content).toContain("10-50");
+	});
+
+	it("should match non-breaking space to regular space", async () => {
+		const testFile = join(testDir, "nbsp.txt");
+		// File has non-breaking space (U+00A0)
+		writeFileSync(testFile, "hello\u00A0world\n");
+
+		// oldText with regular space should match
+		const result = await editTool.execute("test-fuzzy-5", {
+			path: testFile,
+			oldText: "hello world",
+			newText: "hello universe",
+		});
+
+		expect(getTextOutput(result)).toContain("Successfully replaced");
+		const content = readFileSync(testFile, "utf-8");
+		expect(content).toContain("universe");
+	});
+
+	it("should prefer exact match over fuzzy match", async () => {
+		const testFile = join(testDir, "exact-preferred.txt");
+		// File has both exact and fuzzy-matchable content
+		writeFileSync(testFile, "const x = 'exact';\nconst y = 'other';\n");
+
+		const result = await editTool.execute("test-fuzzy-6", {
+			path: testFile,
+			oldText: "const x = 'exact';",
+			newText: "const x = 'changed';",
+		});
+
+		expect(getTextOutput(result)).toContain("Successfully replaced");
+		const content = readFileSync(testFile, "utf-8");
+		expect(content).toBe("const x = 'changed';\nconst y = 'other';\n");
+	});
+
+	it("should still fail when text is not found even with fuzzy matching", async () => {
+		const testFile = join(testDir, "no-match.txt");
+		writeFileSync(testFile, "completely different content\n");
+
+		await expect(
+			editTool.execute("test-fuzzy-7", {
+				path: testFile,
+				oldText: "this does not exist",
+				newText: "replacement",
+			}),
+		).rejects.toThrow(/Could not find the exact text/);
+	});
+
+	it("should detect duplicates after fuzzy normalization", async () => {
+		const testFile = join(testDir, "fuzzy-dups.txt");
+		// Two lines that are identical after trailing whitespace is stripped
+		writeFileSync(testFile, "hello world   \nhello world\n");
+
+		await expect(
+			editTool.execute("test-fuzzy-8", {
+				path: testFile,
+				oldText: "hello world",
+				newText: "replaced",
+			}),
+		).rejects.toThrow(/Found 2 occurrences/);
+	});
+});
+
 describe("edit tool CRLF handling", () => {
 	let testDir: string;