feat(edit): add fuzzy matching for trailing whitespace, quotes, dashes, and spaces (#713)

This commit is contained in:
Danila Poyarkov 2026-01-14 12:22:00 +03:00 committed by GitHub
parent c85a5720f2
commit 0c135d0141
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 279 additions and 25 deletions

View file

@ -2,6 +2,10 @@
## [Unreleased]
### Added
- Edit tool now uses fuzzy matching as fallback when exact match fails, tolerating trailing whitespace, smart quotes, Unicode dashes, and special spaces
## [0.45.7] - 2026-01-13
### Added

View file

@ -24,6 +24,97 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text;
}
/**
* Normalize text for fuzzy matching. Applies progressive transformations:
* - Strip trailing whitespace from each line
* - Normalize smart quotes to ASCII equivalents
* - Normalize Unicode dashes/hyphens to ASCII hyphen
* - Normalize special Unicode spaces to regular space
*/
export function normalizeForFuzzyMatch(text: string): string {
return (
text
// Strip trailing whitespace per line
.split("\n")
.map((line) => line.trimEnd())
.join("\n")
// Smart single quotes → '
.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
// Smart double quotes → "
.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
// Various dashes/hyphens → -
// U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash,
// U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus
.replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-")
// Special spaces → regular space
// U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP,
// U+205F medium math space, U+3000 ideographic space
.replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ")
);
}
export interface FuzzyMatchResult {
/** Whether a match was found */
found: boolean;
/** The index where the match starts (in the content that should be used for replacement) */
index: number;
/** Length of the matched text */
matchLength: number;
/** Whether fuzzy matching was used (false = exact match) */
usedFuzzyMatch: boolean;
/**
* The content to use for replacement operations.
* When exact match: original content. When fuzzy match: normalized content.
*/
contentForReplacement: string;
}
/**
* Find oldText in content, trying exact match first, then fuzzy match.
* When fuzzy matching is used, the returned contentForReplacement is the
* fuzzy-normalized version of the content (trailing whitespace stripped,
* Unicode quotes/dashes normalized to ASCII).
*/
export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
// Try exact match first
const exactIndex = content.indexOf(oldText);
if (exactIndex !== -1) {
return {
found: true,
index: exactIndex,
matchLength: oldText.length,
usedFuzzyMatch: false,
contentForReplacement: content,
};
}
// Try fuzzy match - work entirely in normalized space
const fuzzyContent = normalizeForFuzzyMatch(content);
const fuzzyOldText = normalizeForFuzzyMatch(oldText);
const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText);
if (fuzzyIndex === -1) {
return {
found: false,
index: -1,
matchLength: 0,
usedFuzzyMatch: false,
contentForReplacement: content,
};
}
// When fuzzy matching, we work in the normalized space for replacement.
// This means the output will have normalized whitespace/quotes/dashes,
// which is acceptable since we're fixing minor formatting differences anyway.
return {
found: true,
index: fuzzyIndex,
matchLength: fuzzyOldText.length,
usedFuzzyMatch: true,
contentForReplacement: fuzzyContent,
};
}
/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
export function stripBom(content: string): { bom: string; text: string } {
return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content };
@ -174,37 +265,43 @@ export async function computeEditDiff(
const normalizedOldText = normalizeToLF(oldText);
const normalizedNewText = normalizeToLF(newText);
// Check if old text exists
if (!normalizedContent.includes(normalizedOldText)) {
// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
if (!matchResult.found) {
return {
error: `Could not find the exact text in ${path}. The old text must match exactly including all whitespace and newlines.`,
};
}
// Count occurrences
const occurrences = normalizedContent.split(normalizedOldText).length - 1;
// Count occurrences using fuzzy-normalized content for consistency
const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
if (occurrences > 1) {
return {
error: `Found ${occurrences} occurrences of the text in ${path}. The text must be unique. Please provide more context to make it unique.`,
};
}
// Compute the new content
const index = normalizedContent.indexOf(normalizedOldText);
const normalizedNewContent =
normalizedContent.substring(0, index) +
// Compute the new content using the matched position
// When fuzzy matching was used, contentForReplacement is the normalized version
const baseContent = matchResult.contentForReplacement;
const newContent =
baseContent.substring(0, matchResult.index) +
normalizedNewText +
normalizedContent.substring(index + normalizedOldText.length);
baseContent.substring(matchResult.index + matchResult.matchLength);
// Check if it would actually change anything
if (normalizedContent === normalizedNewContent) {
if (baseContent === newContent) {
return {
error: `No changes would be made to ${path}. The replacement produces identical content.`,
};
}
// Generate the diff
return generateDiffString(normalizedContent, normalizedNewContent);
return generateDiffString(baseContent, newContent);
} catch (err) {
return { error: err instanceof Error ? err.message : String(err) };
}

View file

@ -2,7 +2,15 @@ import type { AgentTool } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { constants } from "fs";
import { access as fsAccess, readFile as fsReadFile, writeFile as fsWriteFile } from "fs/promises";
import { detectLineEnding, generateDiffString, normalizeToLF, restoreLineEndings, stripBom } from "./edit-diff.js";
import {
detectLineEnding,
fuzzyFindText,
generateDiffString,
normalizeForFuzzyMatch,
normalizeToLF,
restoreLineEndings,
stripBom,
} from "./edit-diff.js";
import { resolveToCwd } from "./path-utils.js";
const editSchema = Type.Object({
@ -116,8 +124,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
const normalizedOldText = normalizeToLF(oldText);
const normalizedNewText = normalizeToLF(newText);
// Check if old text exists
if (!normalizedContent.includes(normalizedOldText)) {
// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
if (!matchResult.found) {
if (signal) {
signal.removeEventListener("abort", onAbort);
}
@ -129,8 +139,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
return;
}
// Count occurrences
const occurrences = normalizedContent.split(normalizedOldText).length - 1;
// Count occurrences using fuzzy-normalized content for consistency
const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
if (occurrences > 1) {
if (signal) {
@ -149,16 +161,16 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
return;
}
// Perform replacement using indexOf + substring (raw string replace, no special character interpretation)
// String.replace() interprets $ in the replacement string, so we do manual replacement
const index = normalizedContent.indexOf(normalizedOldText);
const normalizedNewContent =
normalizedContent.substring(0, index) +
// Perform replacement using the matched text position
// When fuzzy matching was used, contentForReplacement is the normalized version
const baseContent = matchResult.contentForReplacement;
const newContent =
baseContent.substring(0, matchResult.index) +
normalizedNewText +
normalizedContent.substring(index + normalizedOldText.length);
baseContent.substring(matchResult.index + matchResult.matchLength);
// Verify the replacement actually changed something
if (normalizedContent === normalizedNewContent) {
if (baseContent === newContent) {
if (signal) {
signal.removeEventListener("abort", onAbort);
}
@ -170,7 +182,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
return;
}
const finalContent = bom + restoreLineEndings(normalizedNewContent, originalEnding);
const finalContent = bom + restoreLineEndings(newContent, originalEnding);
await ops.writeFile(absolutePath, finalContent);
// Check if aborted after writing
@ -183,7 +195,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
signal.removeEventListener("abort", onAbort);
}
const diffResult = generateDiffString(normalizedContent, normalizedNewContent);
const diffResult = generateDiffString(baseContent, newContent);
resolve({
content: [
{

View file

@ -388,6 +388,147 @@ describe("Coding Agent Tools", () => {
});
});
describe("edit tool fuzzy matching", () => {
let testDir: string;
beforeEach(() => {
testDir = join(tmpdir(), `coding-agent-fuzzy-test-${Date.now()}`);
mkdirSync(testDir, { recursive: true });
});
afterEach(() => {
rmSync(testDir, { recursive: true, force: true });
});
it("should match text with trailing whitespace stripped", async () => {
const testFile = join(testDir, "trailing-ws.txt");
// File has trailing spaces on lines
writeFileSync(testFile, "line one \nline two \nline three\n");
// oldText without trailing whitespace should still match
const result = await editTool.execute("test-fuzzy-1", {
path: testFile,
oldText: "line one\nline two\n",
newText: "replaced\n",
});
expect(getTextOutput(result)).toContain("Successfully replaced");
const content = readFileSync(testFile, "utf-8");
expect(content).toBe("replaced\nline three\n");
});
it("should match smart single quotes to ASCII quotes", async () => {
const testFile = join(testDir, "smart-quotes.txt");
// File has smart/curly single quotes (U+2018, U+2019)
writeFileSync(testFile, "console.log(\u2018hello\u2019);\n");
// oldText with ASCII quotes should match
const result = await editTool.execute("test-fuzzy-2", {
path: testFile,
oldText: "console.log('hello');",
newText: "console.log('world');",
});
expect(getTextOutput(result)).toContain("Successfully replaced");
const content = readFileSync(testFile, "utf-8");
expect(content).toContain("world");
});
it("should match smart double quotes to ASCII quotes", async () => {
const testFile = join(testDir, "smart-double-quotes.txt");
// File has smart/curly double quotes (U+201C, U+201D)
writeFileSync(testFile, "const msg = \u201CHello World\u201D;\n");
// oldText with ASCII quotes should match
const result = await editTool.execute("test-fuzzy-3", {
path: testFile,
oldText: 'const msg = "Hello World";',
newText: 'const msg = "Goodbye";',
});
expect(getTextOutput(result)).toContain("Successfully replaced");
const content = readFileSync(testFile, "utf-8");
expect(content).toContain("Goodbye");
});
it("should match Unicode dashes to ASCII hyphen", async () => {
const testFile = join(testDir, "unicode-dashes.txt");
// File has en-dash (U+2013) and em-dash (U+2014)
writeFileSync(testFile, "range: 1\u20135\nbreak\u2014here\n");
// oldText with ASCII hyphens should match
const result = await editTool.execute("test-fuzzy-4", {
path: testFile,
oldText: "range: 1-5\nbreak-here",
newText: "range: 10-50\nbreak--here",
});
expect(getTextOutput(result)).toContain("Successfully replaced");
const content = readFileSync(testFile, "utf-8");
expect(content).toContain("10-50");
});
it("should match non-breaking space to regular space", async () => {
const testFile = join(testDir, "nbsp.txt");
// File has non-breaking space (U+00A0)
writeFileSync(testFile, "hello\u00A0world\n");
// oldText with regular space should match
const result = await editTool.execute("test-fuzzy-5", {
path: testFile,
oldText: "hello world",
newText: "hello universe",
});
expect(getTextOutput(result)).toContain("Successfully replaced");
const content = readFileSync(testFile, "utf-8");
expect(content).toContain("universe");
});
it("should prefer exact match over fuzzy match", async () => {
const testFile = join(testDir, "exact-preferred.txt");
// File has both exact and fuzzy-matchable content
writeFileSync(testFile, "const x = 'exact';\nconst y = 'other';\n");
const result = await editTool.execute("test-fuzzy-6", {
path: testFile,
oldText: "const x = 'exact';",
newText: "const x = 'changed';",
});
expect(getTextOutput(result)).toContain("Successfully replaced");
const content = readFileSync(testFile, "utf-8");
expect(content).toBe("const x = 'changed';\nconst y = 'other';\n");
});
it("should still fail when text is not found even with fuzzy matching", async () => {
const testFile = join(testDir, "no-match.txt");
writeFileSync(testFile, "completely different content\n");
await expect(
editTool.execute("test-fuzzy-7", {
path: testFile,
oldText: "this does not exist",
newText: "replacement",
}),
).rejects.toThrow(/Could not find the exact text/);
});
it("should detect duplicates after fuzzy normalization", async () => {
const testFile = join(testDir, "fuzzy-dups.txt");
// Two lines that are identical after trailing whitespace is stripped
writeFileSync(testFile, "hello world \nhello world\n");
await expect(
editTool.execute("test-fuzzy-8", {
path: testFile,
oldText: "hello world",
newText: "replaced",
}),
).rejects.toThrow(/Found 2 occurrences/);
});
});
describe("edit tool CRLF handling", () => {
let testDir: string;