mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 23:01:30 +00:00
feat(edit): add fuzzy matching for trailing whitespace, quotes, dashes, and spaces (#713)
This commit is contained in:
parent
c85a5720f2
commit
0c135d0141
4 changed files with 279 additions and 25 deletions
|
|
@ -24,6 +24,97 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
|
|||
return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize text for fuzzy matching. Applies progressive transformations:
|
||||
* - Strip trailing whitespace from each line
|
||||
* - Normalize smart quotes to ASCII equivalents
|
||||
* - Normalize Unicode dashes/hyphens to ASCII hyphen
|
||||
* - Normalize special Unicode spaces to regular space
|
||||
*/
|
||||
export function normalizeForFuzzyMatch(text: string): string {
|
||||
return (
|
||||
text
|
||||
// Strip trailing whitespace per line
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.join("\n")
|
||||
// Smart single quotes → '
|
||||
.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
|
||||
// Smart double quotes → "
|
||||
.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
|
||||
// Various dashes/hyphens → -
|
||||
// U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash,
|
||||
// U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus
|
||||
.replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-")
|
||||
// Special spaces → regular space
|
||||
// U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP,
|
||||
// U+205F medium math space, U+3000 ideographic space
|
||||
.replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ")
|
||||
);
|
||||
}
|
||||
|
||||
export interface FuzzyMatchResult {
|
||||
/** Whether a match was found */
|
||||
found: boolean;
|
||||
/** The index where the match starts (in the content that should be used for replacement) */
|
||||
index: number;
|
||||
/** Length of the matched text */
|
||||
matchLength: number;
|
||||
/** Whether fuzzy matching was used (false = exact match) */
|
||||
usedFuzzyMatch: boolean;
|
||||
/**
|
||||
* The content to use for replacement operations.
|
||||
* When exact match: original content. When fuzzy match: normalized content.
|
||||
*/
|
||||
contentForReplacement: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find oldText in content, trying exact match first, then fuzzy match.
|
||||
* When fuzzy matching is used, the returned contentForReplacement is the
|
||||
* fuzzy-normalized version of the content (trailing whitespace stripped,
|
||||
* Unicode quotes/dashes normalized to ASCII).
|
||||
*/
|
||||
export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
|
||||
// Try exact match first
|
||||
const exactIndex = content.indexOf(oldText);
|
||||
if (exactIndex !== -1) {
|
||||
return {
|
||||
found: true,
|
||||
index: exactIndex,
|
||||
matchLength: oldText.length,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// Try fuzzy match - work entirely in normalized space
|
||||
const fuzzyContent = normalizeForFuzzyMatch(content);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(oldText);
|
||||
const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText);
|
||||
|
||||
if (fuzzyIndex === -1) {
|
||||
return {
|
||||
found: false,
|
||||
index: -1,
|
||||
matchLength: 0,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// When fuzzy matching, we work in the normalized space for replacement.
|
||||
// This means the output will have normalized whitespace/quotes/dashes,
|
||||
// which is acceptable since we're fixing minor formatting differences anyway.
|
||||
return {
|
||||
found: true,
|
||||
index: fuzzyIndex,
|
||||
matchLength: fuzzyOldText.length,
|
||||
usedFuzzyMatch: true,
|
||||
contentForReplacement: fuzzyContent,
|
||||
};
|
||||
}
|
||||
|
||||
/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
|
||||
export function stripBom(content: string): { bom: string; text: string } {
|
||||
return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content };
|
||||
|
|
@ -174,37 +265,43 @@ export async function computeEditDiff(
|
|||
const normalizedOldText = normalizeToLF(oldText);
|
||||
const normalizedNewText = normalizeToLF(newText);
|
||||
|
||||
// Check if old text exists
|
||||
if (!normalizedContent.includes(normalizedOldText)) {
|
||||
// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
|
||||
const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
|
||||
|
||||
if (!matchResult.found) {
|
||||
return {
|
||||
error: `Could not find the exact text in ${path}. The old text must match exactly including all whitespace and newlines.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Count occurrences
|
||||
const occurrences = normalizedContent.split(normalizedOldText).length - 1;
|
||||
// Count occurrences using fuzzy-normalized content for consistency
|
||||
const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
|
||||
const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
|
||||
|
||||
if (occurrences > 1) {
|
||||
return {
|
||||
error: `Found ${occurrences} occurrences of the text in ${path}. The text must be unique. Please provide more context to make it unique.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Compute the new content
|
||||
const index = normalizedContent.indexOf(normalizedOldText);
|
||||
const normalizedNewContent =
|
||||
normalizedContent.substring(0, index) +
|
||||
// Compute the new content using the matched position
|
||||
// When fuzzy matching was used, contentForReplacement is the normalized version
|
||||
const baseContent = matchResult.contentForReplacement;
|
||||
const newContent =
|
||||
baseContent.substring(0, matchResult.index) +
|
||||
normalizedNewText +
|
||||
normalizedContent.substring(index + normalizedOldText.length);
|
||||
baseContent.substring(matchResult.index + matchResult.matchLength);
|
||||
|
||||
// Check if it would actually change anything
|
||||
if (normalizedContent === normalizedNewContent) {
|
||||
if (baseContent === newContent) {
|
||||
return {
|
||||
error: `No changes would be made to ${path}. The replacement produces identical content.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Generate the diff
|
||||
return generateDiffString(normalizedContent, normalizedNewContent);
|
||||
return generateDiffString(baseContent, newContent);
|
||||
} catch (err) {
|
||||
return { error: err instanceof Error ? err.message : String(err) };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,15 @@ import type { AgentTool } from "@mariozechner/pi-agent-core";
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import { constants } from "fs";
|
||||
import { access as fsAccess, readFile as fsReadFile, writeFile as fsWriteFile } from "fs/promises";
|
||||
import { detectLineEnding, generateDiffString, normalizeToLF, restoreLineEndings, stripBom } from "./edit-diff.js";
|
||||
import {
|
||||
detectLineEnding,
|
||||
fuzzyFindText,
|
||||
generateDiffString,
|
||||
normalizeForFuzzyMatch,
|
||||
normalizeToLF,
|
||||
restoreLineEndings,
|
||||
stripBom,
|
||||
} from "./edit-diff.js";
|
||||
import { resolveToCwd } from "./path-utils.js";
|
||||
|
||||
const editSchema = Type.Object({
|
||||
|
|
@ -116,8 +124,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
const normalizedOldText = normalizeToLF(oldText);
|
||||
const normalizedNewText = normalizeToLF(newText);
|
||||
|
||||
// Check if old text exists
|
||||
if (!normalizedContent.includes(normalizedOldText)) {
|
||||
// Find the old text using fuzzy matching (tries exact match first, then fuzzy)
|
||||
const matchResult = fuzzyFindText(normalizedContent, normalizedOldText);
|
||||
|
||||
if (!matchResult.found) {
|
||||
if (signal) {
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
|
|
@ -129,8 +139,10 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
return;
|
||||
}
|
||||
|
||||
// Count occurrences
|
||||
const occurrences = normalizedContent.split(normalizedOldText).length - 1;
|
||||
// Count occurrences using fuzzy-normalized content for consistency
|
||||
const fuzzyContent = normalizeForFuzzyMatch(normalizedContent);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText);
|
||||
const occurrences = fuzzyContent.split(fuzzyOldText).length - 1;
|
||||
|
||||
if (occurrences > 1) {
|
||||
if (signal) {
|
||||
|
|
@ -149,16 +161,16 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
return;
|
||||
}
|
||||
|
||||
// Perform replacement using indexOf + substring (raw string replace, no special character interpretation)
|
||||
// String.replace() interprets $ in the replacement string, so we do manual replacement
|
||||
const index = normalizedContent.indexOf(normalizedOldText);
|
||||
const normalizedNewContent =
|
||||
normalizedContent.substring(0, index) +
|
||||
// Perform replacement using the matched text position
|
||||
// When fuzzy matching was used, contentForReplacement is the normalized version
|
||||
const baseContent = matchResult.contentForReplacement;
|
||||
const newContent =
|
||||
baseContent.substring(0, matchResult.index) +
|
||||
normalizedNewText +
|
||||
normalizedContent.substring(index + normalizedOldText.length);
|
||||
baseContent.substring(matchResult.index + matchResult.matchLength);
|
||||
|
||||
// Verify the replacement actually changed something
|
||||
if (normalizedContent === normalizedNewContent) {
|
||||
if (baseContent === newContent) {
|
||||
if (signal) {
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
|
|
@ -170,7 +182,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
return;
|
||||
}
|
||||
|
||||
const finalContent = bom + restoreLineEndings(normalizedNewContent, originalEnding);
|
||||
const finalContent = bom + restoreLineEndings(newContent, originalEnding);
|
||||
await ops.writeFile(absolutePath, finalContent);
|
||||
|
||||
// Check if aborted after writing
|
||||
|
|
@ -183,7 +195,7 @@ export function createEditTool(cwd: string, options?: EditToolOptions): AgentToo
|
|||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
|
||||
const diffResult = generateDiffString(normalizedContent, normalizedNewContent);
|
||||
const diffResult = generateDiffString(baseContent, newContent);
|
||||
resolve({
|
||||
content: [
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue