mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 12:03:49 +00:00
feat(edit): add fuzzy matching for trailing whitespace, quotes, dashes, and spaces (#713)
This commit is contained in:
parent
c85a5720f2
commit
0c135d0141
4 changed files with 279 additions and 25 deletions
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Edit tool now uses fuzzy matching as fallback when exact match fails, tolerating trailing whitespace, smart quotes, Unicode dashes, and special spaces
|
||||
|
||||
## [0.45.7] - 2026-01-13
|
||||
|
||||
### Added
|
||||
|
|
|
|||
|
|
@ -24,6 +24,97 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
|
|||
return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize text for fuzzy matching. Applies progressive transformations:
|
||||
* - Strip trailing whitespace from each line
|
||||
* - Normalize smart quotes to ASCII equivalents
|
||||
* - Normalize Unicode dashes/hyphens to ASCII hyphen
|
||||
* - Normalize special Unicode spaces to regular space
|
||||
*/
|
||||
export function normalizeForFuzzyMatch(text: string): string {
|
||||
return (
|
||||
text
|
||||
// Strip trailing whitespace per line
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.join("\n")
|
||||
// Smart single quotes → '
|
||||
.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
|
||||
// Smart double quotes → "
|
||||
.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
|
||||
// Various dashes/hyphens → -
|
||||
// U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash,
|
||||
// U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus
|
||||
.replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-")
|
||||
// Special spaces → regular space
|
||||
// U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP,
|
||||
// U+205F medium math space, U+3000 ideographic space
|
||||
.replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ")
|
||||
);
|
||||
}
|
||||
|
||||
export interface FuzzyMatchResult {
|
||||
/** Whether a match was found */
|
||||
found: boolean;
|
||||
/** The index where the match starts (in the content that should be used for replacement) */
|
||||
index: number;
|
||||
/** Length of the matched text */
|
||||
matchLength: number;
|
||||
/** Whether fuzzy matching was used (false = exact match) */
|
||||
usedFuzzyMatch: boolean;
|
||||
/**
|
||||
* The content to use for replacement operations.
|
||||
* When exact match: original content. When fuzzy match: normalized content.
|
||||
*/
|
||||
contentForReplacement: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find oldText in content, trying exact match first, then fuzzy match.
|
||||
* When fuzzy matching is used, the returned contentForReplacement is the
|
||||
* fuzzy-normalized version of the content (trailing whitespace stripped,
|
||||
* Unicode quotes/dashes normalized to ASCII).
|
||||
*/
|
||||
export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
|
||||
// Try exact match first
|
||||
const exactIndex = content.indexOf(oldText);
|
||||
if (exactIndex !== -1) {
|
||||
return {
|
||||
found: true,
|
||||
index: exactIndex,
|
||||
matchLength: oldText.length,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// Try fuzzy match - work entirely in normalized space
|
||||
const fuzzyContent = normalizeForFuzzyMatch(content);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(oldText);
|
||||
const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText);
|
||||
|
||||
if (fuzzyIndex === -1) {
|
||||
return {
|
||||
found: false,
|
||||
index: -1,
|
||||
matchLength: 0,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// When fuzzy matching, we work in the normalized space for replacement.
|
||||
// This means the output will have normalized whitespace/quotes/dashes,
|
||||
// which is acceptable since we're fixing minor formatting differences anyway.
|
||||
return {
|
||||
found: true,
|
||||
index: fuzzyIndex,
|
||||
matchLength: fuzzyOldText.length,
|
||||
usedFuzzyMatch: true,
|
||||
contentForReplacement: fuzzyContent,
|
||||
};
|
||||
}
|
||||
|
||||
/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
|
||||
export function stripBom(content: string): { bom: string; text: string } {
|
||||
return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content };
|
||||
|
|
@ -174,37 +265,43 @@ export async function computeEditDiff(
|
|||
const normalizedOldText = normalizeToLF(oldText);
|
||||
const normalizedNewText = normalizeToLF(newText);
|
||||
|
||||
// Check if old text exists
|
||||
if (!normalizedContent.includes(normalizedOldText)) {
|
||||
// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
|
||||
const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
|
||||
|
||||
if (!matchResult.found) {
|
||||
return {
|
||||
error: `Could not find the exact text in ${path}. The old text must match exactly including all whitespace and newlines.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Count occurrences
|
||||
const occurrences = normalizedContent.split(normalizedOldText).length - 1;
|
||||
// Count occurrences using fuzzy-normalized content for consistency
|
||||
const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
|
||||
const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
|
||||
|
||||
if (occurrences > 1) {
|
||||
return {
|
||||
error: `Found ${occurrences} occurrences of the text in ${path}. The text must be unique. Please provide more context to make it unique.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Compute the new content
|
||||
const index = normalizedContent.indexOf(normalizedOldText);
|
||||
const normalizedNewContent =
|
||||
normalizedContent.substring(0, index) +
|
||||
// Compute the new content using the matched position
|
||||
// When fuzzy matching was used, contentForReplacement is the normalized version
|
||||
const baseContent = matchResult.contentForReplacement;
|
||||
const newContent =
|
||||
baseContent.substring(0, matchResult.index) +
|
||||
normalizedNewText +
|
||||
normalizedContent.substring(index + normalizedOldText.length);
|
||||
baseContent.substring(matchResult.index + matchResult.matchLength);
|
||||
|
||||
// Check if it would actually change anything
|
||||
if (normalizedContent === normalizedNewContent) {
|
||||
if (baseContent === newContent) {
|
||||
return {
|
||||
error: `No changes would be made to ${path}. The replacement produces identical content.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Generate the diff
|
||||
return generateDiffString(normalizedContent, normalizedNewContent);
|
||||
return generateDiffString(baseContent, newContent);
|
||||
} catch (err) {
|
||||
return { error: err instanceof Error ? err.message : String(err) };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,15 @@ import type { AgentTool } from "@mariozechner/pi-agent-core";
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import { constants } from "fs";
|
||||
import { access as fsAccess, readFile as fsReadFile, writeFile as fsWriteFile } from "fs/promises";
|
||||
import { detectLineEnding, generateDiffString, normalizeToLF, restoreLineEndings, stripBom } from "./edit-diff.js";
|
||||
import {
|
||||
detectLineEnding,
|
||||
fuzzyFindText,
|
||||
generateDiffString,
|
||||
normalizeForFuzzyMatch,
|
||||
normalizeToLF,
|
||||
restoreLineEndings,
|
||||
stripBom,
|
||||
} from "./edit-diff.js";
|
||||
import { resolveToCwd } from "./path-utils.js";
|
||||
|
||||
const editSchema = Type.Object({
|
||||
|
|
@ -116,8 +124,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
const normalizedOldText = normalizeToLF(oldText);
|
||||
const normalizedNewText = normalizeToLF(newText);
|
||||
|
||||
// Check if old text exists
|
||||
if (!normalizedContent.includes(normalizedOldText)) {
|
||||
// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
|
||||
const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
|
||||
|
||||
if (!matchResult.found) {
|
||||
if (signal) {
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
|
|
@ -129,8 +139,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
return;
|
||||
}
|
||||
|
||||
// Count occurrences
|
||||
const occurrences = normalizedContent.split(normalizedOldText).length - 1;
|
||||
// Count occurrences using fuzzy-normalized content for consistency
|
||||
const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
|
||||
const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
|
||||
|
||||
if (occurrences > 1) {
|
||||
if (signal) {
|
||||
|
|
@ -149,16 +161,16 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
return;
|
||||
}
|
||||
|
||||
// Perform replacement using indexOf + substring (raw string replace, no special character interpretation)
|
||||
// String.replace() interprets $ in the replacement string, so we do manual replacement
|
||||
const index = normalizedContent.indexOf(normalizedOldText);
|
||||
const normalizedNewContent =
|
||||
normalizedContent.substring(0, index) +
|
||||
// Perform replacement using the matched text position
|
||||
// When fuzzy matching was used, contentForReplacement is the normalized version
|
||||
const baseContent = matchResult.contentForReplacement;
|
||||
const newContent =
|
||||
baseContent.substring(0, matchResult.index) +
|
||||
normalizedNewText +
|
||||
normalizedContent.substring(index + normalizedOldText.length);
|
||||
baseContent.substring(matchResult.index + matchResult.matchLength);
|
||||
|
||||
// Verify the replacement actually changed something
|
||||
if (normalizedContent === normalizedNewContent) {
|
||||
if (baseContent === newContent) {
|
||||
if (signal) {
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
|
|
@ -170,7 +182,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
return;
|
||||
}
|
||||
|
||||
const finalContent = bom + restoreLineEndings(normalizedNewContent, originalEnding);
|
||||
const finalContent = bom + restoreLineEndings(newContent, originalEnding);
|
||||
await ops.writeFile(absolutePath, finalContent);
|
||||
|
||||
// Check if aborted after writing
|
||||
|
|
@ -183,7 +195,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
|
||||
const diffResult = generateDiffString(normalizedContent, normalizedNewContent);
|
||||
const diffResult = generateDiffString(baseContent, newContent);
|
||||
resolve({
|
||||
content: [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -388,6 +388,147 @@ describe("Coding Agent Tools", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("edit tool fuzzy matching", () => {
|
||||
let testDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
testDir = join(tmpdir(), `coding-agent-fuzzy-test-${Date.now()}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("should match text with trailing whitespace stripped", async () => {
|
||||
const testFile = join(testDir, "trailing-ws.txt");
|
||||
// File has trailing spaces on lines
|
||||
writeFileSync(testFile, "line one \nline two \nline three\n");
|
||||
|
||||
// oldText without trailing whitespace should still match
|
||||
const result = await editTool.execute("test-fuzzy-1", {
|
||||
path: testFile,
|
||||
oldText: "line one\nline two\n",
|
||||
newText: "replaced\n",
|
||||
});
|
||||
|
||||
expect(getTextOutput(result)).toContain("Successfully replaced");
|
||||
const content = readFileSync(testFile, "utf-8");
|
||||
expect(content).toBe("replaced\nline three\n");
|
||||
});
|
||||
|
||||
it("should match smart single quotes to ASCII quotes", async () => {
|
||||
const testFile = join(testDir, "smart-quotes.txt");
|
||||
// File has smart/curly single quotes (U+2018, U+2019)
|
||||
writeFileSync(testFile, "console.log(\u2018hello\u2019);\n");
|
||||
|
||||
// oldText with ASCII quotes should match
|
||||
const result = await editTool.execute("test-fuzzy-2", {
|
||||
path: testFile,
|
||||
oldText: "console.log('hello');",
|
||||
newText: "console.log('world');",
|
||||
});
|
||||
|
||||
expect(getTextOutput(result)).toContain("Successfully replaced");
|
||||
const content = readFileSync(testFile, "utf-8");
|
||||
expect(content).toContain("world");
|
||||
});
|
||||
|
||||
it("should match smart double quotes to ASCII quotes", async () => {
|
||||
const testFile = join(testDir, "smart-double-quotes.txt");
|
||||
// File has smart/curly double quotes (U+201C, U+201D)
|
||||
writeFileSync(testFile, "const msg = \u201CHello World\u201D;\n");
|
||||
|
||||
// oldText with ASCII quotes should match
|
||||
const result = await editTool.execute("test-fuzzy-3", {
|
||||
path: testFile,
|
||||
oldText: 'const msg = "Hello World";',
|
||||
newText: 'const msg = "Goodbye";',
|
||||
});
|
||||
|
||||
expect(getTextOutput(result)).toContain("Successfully replaced");
|
||||
const content = readFileSync(testFile, "utf-8");
|
||||
expect(content).toContain("Goodbye");
|
||||
});
|
||||
|
||||
it("should match Unicode dashes to ASCII hyphen", async () => {
|
||||
const testFile = join(testDir, "unicode-dashes.txt");
|
||||
// File has en-dash (U+2013) and em-dash (U+2014)
|
||||
writeFileSync(testFile, "range: 1\u20135\nbreak\u2014here\n");
|
||||
|
||||
// oldText with ASCII hyphens should match
|
||||
const result = await editTool.execute("test-fuzzy-4", {
|
||||
path: testFile,
|
||||
oldText: "range: 1-5\nbreak-here",
|
||||
newText: "range: 10-50\nbreak--here",
|
||||
});
|
||||
|
||||
expect(getTextOutput(result)).toContain("Successfully replaced");
|
||||
const content = readFileSync(testFile, "utf-8");
|
||||
expect(content).toContain("10-50");
|
||||
});
|
||||
|
||||
it("should match non-breaking space to regular space", async () => {
|
||||
const testFile = join(testDir, "nbsp.txt");
|
||||
// File has non-breaking space (U+00A0)
|
||||
writeFileSync(testFile, "hello\u00A0world\n");
|
||||
|
||||
// oldText with regular space should match
|
||||
const result = await editTool.execute("test-fuzzy-5", {
|
||||
path: testFile,
|
||||
oldText: "hello world",
|
||||
newText: "hello universe",
|
||||
});
|
||||
|
||||
expect(getTextOutput(result)).toContain("Successfully replaced");
|
||||
const content = readFileSync(testFile, "utf-8");
|
||||
expect(content).toContain("universe");
|
||||
});
|
||||
|
||||
it("should prefer exact match over fuzzy match", async () => {
|
||||
const testFile = join(testDir, "exact-preferred.txt");
|
||||
// File has both exact and fuzzy-matchable content
|
||||
writeFileSync(testFile, "const x = 'exact';\nconst y = 'other';\n");
|
||||
|
||||
const result = await editTool.execute("test-fuzzy-6", {
|
||||
path: testFile,
|
||||
oldText: "const x = 'exact';",
|
||||
newText: "const x = 'changed';",
|
||||
});
|
||||
|
||||
expect(getTextOutput(result)).toContain("Successfully replaced");
|
||||
const content = readFileSync(testFile, "utf-8");
|
||||
expect(content).toBe("const x = 'changed';\nconst y = 'other';\n");
|
||||
});
|
||||
|
||||
it("should still fail when text is not found even with fuzzy matching", async () => {
|
||||
const testFile = join(testDir, "no-match.txt");
|
||||
writeFileSync(testFile, "completely different content\n");
|
||||
|
||||
await expect(
|
||||
editTool.execute("test-fuzzy-7", {
|
||||
path: testFile,
|
||||
oldText: "this does not exist",
|
||||
newText: "replacement",
|
||||
}),
|
||||
).rejects.toThrow(/Could not find the exact text/);
|
||||
});
|
||||
|
||||
it("should detect duplicates after fuzzy normalization", async () => {
|
||||
const testFile = join(testDir, "fuzzy-dups.txt");
|
||||
// Two lines that are identical after trailing whitespace is stripped
|
||||
writeFileSync(testFile, "hello world \nhello world\n");
|
||||
|
||||
await expect(
|
||||
editTool.execute("test-fuzzy-8", {
|
||||
path: testFile,
|
||||
oldText: "hello world",
|
||||
newText: "replaced",
|
||||
}),
|
||||
).rejects.toThrow(/Found 2 occurrences/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("edit tool CRLF handling", () => {
|
||||
let testDir: string;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue