From 0c135d01410e549e32a4ff89e6730cf5e27f271b Mon Sep 17 00:00:00 2001 From: Danila Poyarkov Date: Wed, 14 Jan 2026 12:22:00 +0300 Subject: [PATCH] feat(edit): add fuzzy matching for trailing whitespace, quotes, dashes, and spaces (#713) --- packages/coding-agent/CHANGELOG.md | 4 + .../coding-agent/src/core/tools/edit-diff.ts | 119 +++++++++++++-- packages/coding-agent/src/core/tools/edit.ts | 40 +++-- packages/coding-agent/test/tools.test.ts | 141 ++++++++++++++++++ 4 files changed, 279 insertions(+), 25 deletions(-) diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 76b62efd..e76228bf 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Edit tool now uses fuzzy matching as fallback when exact match fails, tolerating trailing whitespace, smart quotes, Unicode dashes, and special spaces + ## [0.45.7] - 2026-01-13 ### Added diff --git a/packages/coding-agent/src/core/tools/edit-diff.ts b/packages/coding-agent/src/core/tools/edit-diff.ts index a29710c7..17f017bf 100644 --- a/packages/coding-agent/src/core/tools/edit-diff.ts +++ b/packages/coding-agent/src/core/tools/edit-diff.ts @@ -24,6 +24,97 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text; } +/** + * Normalize text for fuzzy matching. Applies progressive transformations: + * - Strip trailing whitespace from each line + * - Normalize smart quotes to ASCII equivalents + * - Normalize Unicode dashes/hyphens to ASCII hyphen + * - Normalize special Unicode spaces to regular space + */ +export function normalizeForFuzzyMatch(text: string): string { + return ( + text + // Strip trailing whitespace per line + .split("\n") + .map((line) => line.trimEnd()) + .join("\n") + // Smart single quotes → ' + .replace(/[\u2018\u2019\u201A\u201B]/g, "'") + // Smart double quotes → " + .replace(/[\u201C\u201D\u201E\u201F]/g, '"') + // Various dashes/hyphens → - + // U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash, + // U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus + .replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-") + // Special spaces → regular space + // U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP, + // U+205F medium math space, U+3000 ideographic space + .replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ") + ); +} + +export interface FuzzyMatchResult { + /** Whether a match was found */ + found: boolean; + /** The index where the match starts (in the content that should be used for replacement) */ + index: number; + /** Length of the matched text */ + matchLength: number; + /** Whether fuzzy matching was used (false = exact match) */ + usedFuzzyMatch: boolean; + /** + * The content to use for replacement operations. + * When exact match: original content. When fuzzy match: normalized content. + */ + contentForReplacement: string; +} + +/** + * Find oldText in content, trying exact match first, then fuzzy match. + * When fuzzy matching is used, the returned contentForReplacement is the + * fuzzy-normalized version of the content (trailing whitespace stripped, + * Unicode quotes/dashes normalized to ASCII). + */ +export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult { + // Try exact match first + const exactIndex = content.indexOf(oldText); + if (exactIndex !== -1) { + return { + found: true, + index: exactIndex, + matchLength: oldText.length, + usedFuzzyMatch: false, + contentForReplacement: content, + }; + } + + // Try fuzzy match - work entirely in normalized space + const fuzzyContent = normalizeForFuzzyMatch(content); + const fuzzyOldText = normalizeForFuzzyMatch(oldText); + const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText); + + if (fuzzyIndex === -1) { + return { + found: false, + index: -1, + matchLength: 0, + usedFuzzyMatch: false, + contentForReplacement: content, + }; + } + + // When fuzzy matching, we work in the normalized space for replacement. + // This means the output will have normalized whitespace/quotes/dashes, + // which is acceptable since we're fixing minor formatting differences anyway. + return { + found: true, + index: fuzzyIndex, + matchLength: fuzzyOldText.length, + usedFuzzyMatch: true, + contentForReplacement: fuzzyContent, + }; +} + /** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */ export function stripBom(content: string): { bom: string; text: string } { return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content }; @@ -174,37 +265,43 @@ export async function computeEditDiff( const normalizedOldText = normalizeToLF(oldText); const normalizedNewText = normalizeToLF(newText); - // Check if old text exists - if (!normalizedContent.includes(normalizedOldText)) { + // Find the old text using fuzzy matching (tries exact match first, then fuzzy) + const matchResult = fuzzyFindText(normalizedContent, normalizedOldText); + + if (!matchResult.found) { return { error: `Could not find the exact text in ${path}. The old text must match exactly including all whitespace and newlines.`, }; } - // Count occurrences - const occurrences = normalizedContent.split(normalizedOldText).length - 1; + // Count occurrences using fuzzy-normalized content for consistency + const fuzzyContent = normalizeForFuzzyMatch(normalizedContent); + const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText); + const occurrences = fuzzyContent.split(fuzzyOldText).length - 1; + if (occurrences > 1) { return { error: `Found ${occurrences} occurrences of the text in ${path}. The text must be unique. Please provide more context to make it unique.`, }; } - // Compute the new content - const index = normalizedContent.indexOf(normalizedOldText); - const normalizedNewContent = - normalizedContent.substring(0, index) + + // Compute the new content using the matched position + // When fuzzy matching was used, contentForReplacement is the normalized version + const baseContent = matchResult.contentForReplacement; + const newContent = + baseContent.substring(0, matchResult.index) + normalizedNewText + - normalizedContent.substring(index + normalizedOldText.length); + baseContent.substring(matchResult.index + matchResult.matchLength); // Check if it would actually change anything - if (normalizedContent === normalizedNewContent) { + if (baseContent === newContent) { return { error: `No changes would be made to ${path}. The replacement produces identical content.`, }; } // Generate the diff - return generateDiffString(normalizedContent, normalizedNewContent); + return generateDiffString(baseContent, newContent); } catch (err) { return { error: err instanceof Error ? err.message : String(err) }; } diff --git a/packages/coding-agent/src/core/tools/edit.ts b/packages/coding-agent/src/core/tools/edit.ts index 0658117a..ecdc0ae6 100644 --- a/packages/coding-agent/src/core/tools/edit.ts +++ b/packages/coding-agent/src/core/tools/edit.ts @@ -2,7 +2,15 @@ import type { AgentTool } from "@mariozechner/pi-agent-core"; import { Type } from "@sinclair/typebox"; import { constants } from "fs"; import { access as fsAccess, readFile as fsReadFile, writeFile as fsWriteFile } from "fs/promises"; -import { detectLineEnding, generateDiffString, normalizeToLF, restoreLineEndings, stripBom } from "./edit-diff.js"; +import { + detectLineEnding, + fuzzyFindText, + generateDiffString, + normalizeForFuzzyMatch, + normalizeToLF, + restoreLineEndings, + stripBom, +} from "./edit-diff.js"; import { resolveToCwd } from "./path-utils.js"; const editSchema = Type.Object({ @@ -116,8 +124,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo const normalizedOldText = normalizeToLF(oldText); const normalizedNewText = normalizeToLF(newText); - // Check if old text exists - if (!normalizedContent.includes(normalizedOldText)) { + // Find the old text using fuzzy matching (tries exact match first, then fuzzy) + const matchResult = fuzzyFindText(normalizedContent, normalizedOldText); + + if (!matchResult.found) { if (signal) { signal.removeEventListener("abort", onAbort); } @@ -129,8 +139,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo return; } - // Count occurrences - const occurrences = normalizedContent.split(normalizedOldText).length - 1; + // Count occurrences using fuzzy-normalized content for consistency + const fuzzyContent = normalizeForFuzzyMatch(normalizedContent); + const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText); + const occurrences = fuzzyContent.split(fuzzyOldText).length - 1; if (occurrences > 1) { if (signal) { @@ -149,16 +161,16 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo return; } - // Perform replacement using indexOf + substring (raw string replace, no special character interpretation) - // String.replace() interprets $ in the replacement string, so we do manual replacement - const index = normalizedContent.indexOf(normalizedOldText); - const normalizedNewContent = - normalizedContent.substring(0, index) + + // Perform replacement using the matched text position + // When fuzzy matching was used, contentForReplacement is the normalized version + const baseContent = matchResult.contentForReplacement; + const newContent = + baseContent.substring(0, matchResult.index) + normalizedNewText + - normalizedContent.substring(index + normalizedOldText.length); + baseContent.substring(matchResult.index + matchResult.matchLength); // Verify the replacement actually changed something - if (normalizedContent === normalizedNewContent) { + if (baseContent === newContent) { if (signal) { signal.removeEventListener("abort", onAbort); } @@ -170,7 +182,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo return; } - const finalContent = bom + restoreLineEndings(normalizedNewContent, originalEnding); + const finalContent = bom + restoreLineEndings(newContent, originalEnding); await ops.writeFile(absolutePath, finalContent); // Check if aborted after writing @@ -183,7 +195,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo signal.removeEventListener("abort", onAbort); } - const diffResult = generateDiffString(normalizedContent, normalizedNewContent); + const diffResult = generateDiffString(baseContent, newContent); resolve({ content: [ { diff --git a/packages/coding-agent/test/tools.test.ts b/packages/coding-agent/test/tools.test.ts index fac3f61a..63d4d93f 100644 --- a/packages/coding-agent/test/tools.test.ts +++ b/packages/coding-agent/test/tools.test.ts @@ -388,6 +388,147 @@ describe("Coding Agent Tools", () => { }); }); +describe("edit tool fuzzy matching", () => { + let testDir: string; + + beforeEach(() => { + testDir = join(tmpdir(), `coding-agent-fuzzy-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("should match text with trailing whitespace stripped", async () => { + const testFile = join(testDir, "trailing-ws.txt"); + // File has trailing spaces on lines + writeFileSync(testFile, "line one \nline two \nline three\n"); + + // oldText without trailing whitespace should still match + const result = await editTool.execute("test-fuzzy-1", { + path: testFile, + oldText: "line one\nline two\n", + newText: "replaced\n", + }); + + expect(getTextOutput(result)).toContain("Successfully replaced"); + const content = readFileSync(testFile, "utf-8"); + expect(content).toBe("replaced\nline three\n"); + }); + + it("should match smart single quotes to ASCII quotes", async () => { + const testFile = join(testDir, "smart-quotes.txt"); + // File has smart/curly single quotes (U+2018, U+2019) + writeFileSync(testFile, "console.log(\u2018hello\u2019);\n"); + + // oldText with ASCII quotes should match + const result = await editTool.execute("test-fuzzy-2", { + path: testFile, + oldText: "console.log('hello');", + newText: "console.log('world');", + }); + + expect(getTextOutput(result)).toContain("Successfully replaced"); + const content = readFileSync(testFile, "utf-8"); + expect(content).toContain("world"); + }); + + it("should match smart double quotes to ASCII quotes", async () => { + const testFile = join(testDir, "smart-double-quotes.txt"); + // File has smart/curly double quotes (U+201C, U+201D) + writeFileSync(testFile, "const msg = \u201CHello World\u201D;\n"); + + // oldText with ASCII quotes should match + const result = await editTool.execute("test-fuzzy-3", { + path: testFile, + oldText: 'const msg = "Hello World";', + newText: 'const msg = "Goodbye";', + }); + + expect(getTextOutput(result)).toContain("Successfully replaced"); + const content = readFileSync(testFile, "utf-8"); + expect(content).toContain("Goodbye"); + }); + + it("should match Unicode dashes to ASCII hyphen", async () => { + const testFile = join(testDir, "unicode-dashes.txt"); + // File has en-dash (U+2013) and em-dash (U+2014) + writeFileSync(testFile, "range: 1\u20135\nbreak\u2014here\n"); + + // oldText with ASCII hyphens should match + const result = await editTool.execute("test-fuzzy-4", { + path: testFile, + oldText: "range: 1-5\nbreak-here", + newText: "range: 10-50\nbreak--here", + }); + + expect(getTextOutput(result)).toContain("Successfully replaced"); + const content = readFileSync(testFile, "utf-8"); + expect(content).toContain("10-50"); + }); + + it("should match non-breaking space to regular space", async () => { + const testFile = join(testDir, "nbsp.txt"); + // File has non-breaking space (U+00A0) + writeFileSync(testFile, "hello\u00A0world\n"); + + // oldText with regular space should match + const result = await editTool.execute("test-fuzzy-5", { + path: testFile, + oldText: "hello world", + newText: "hello universe", + }); + + expect(getTextOutput(result)).toContain("Successfully replaced"); + const content = readFileSync(testFile, "utf-8"); + expect(content).toContain("universe"); + }); + + it("should prefer exact match over fuzzy match", async () => { + const testFile = join(testDir, "exact-preferred.txt"); + // File has both exact and fuzzy-matchable content + writeFileSync(testFile, "const x = 'exact';\nconst y = 'other';\n"); + + const result = await editTool.execute("test-fuzzy-6", { + path: testFile, + oldText: "const x = 'exact';", + newText: "const x = 'changed';", + }); + + expect(getTextOutput(result)).toContain("Successfully replaced"); + const content = readFileSync(testFile, "utf-8"); + expect(content).toBe("const x = 'changed';\nconst y = 'other';\n"); + }); + + it("should still fail when text is not found even with fuzzy matching", async () => { + const testFile = join(testDir, "no-match.txt"); + writeFileSync(testFile, "completely different content\n"); + + await expect( + editTool.execute("test-fuzzy-7", { + path: testFile, + oldText: "this does not exist", + newText: "replacement", + }), + ).rejects.toThrow(/Could not find the exact text/); + }); + + it("should detect duplicates after fuzzy normalization", async () => { + const testFile = join(testDir, "fuzzy-dups.txt"); + // Two lines that are identical after trailing whitespace is stripped + writeFileSync(testFile, "hello world \nhello world\n"); + + await expect( + editTool.execute("test-fuzzy-8", { + path: testFile, + oldText: "hello world", + newText: "replaced", + }), + ).rejects.toThrow(/Found 2 occurrences/); + }); +}); + describe("edit tool CRLF handling", () => { let testDir: string;