tui: only check for emojis in visibleWidth when necessary

The initial render of a session, and any re-draws caused by terminal resizing are noticeably slow, especially on conversations with 20+ turns and many tool calls. From profiling with `bun --cpu-prof` (available since bun 1.3.2), the majority of the rendering (90%) is spent on detection of emojis in the string-width library, running the expensive `/\p{RGI_Emoji}$/v` regular expression on every individual grapheme cluster in the entire scrollback. I believe it essentially expands to a fixed search against every possible emoji sequence, hence the amount of CPU time spent in it. This change replaces the `stringWidth` from string-width with a `graphemeWidth` function that performs a similar check, but avoids running the `/\p{RGI_Emoji}$/v` regex for emoji detection unless it contains codepoints that could be emojis. The `visibleWidth` function also has two more optimisations: - Short-circuits string length detection for strings that are entirely printable ASCII characters - Adds a cache for non-ASCII segments to avoid recomputing string length when resizing
2026-04-17 05:00:16 +00:00 · 2025-12-30 16:40:06 +11:00 · 2025-12-30 16:40:06 +11:00 · 6e4270a286
commit 6e4270a286
parent 02175d908b
4 changed files with 130 additions and 34 deletions
--- a/packages/tui/src/utils.ts
+++ b/packages/tui/src/utils.ts
@ -1,13 +1,132 @@
-import stringWidth from "string-width";
+import { eastAsianWidth } from "get-east-asian-width";
+
+// Grapheme segmenter (shared instance)
+const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
+
+/**
+ * Get the shared grapheme segmenter instance.
+ */
+export function getSegmenter(): Intl.Segmenter {
+	return segmenter;
+}
+
+/**
+ * Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji.
+ * This is a fast heuristic to avoid the expensive rgiEmojiRegex test.
+ * The tested Unicode blocks are deliberately broad to account for future
+ * Unicode additions.
+ */
+function couldBeEmoji(segment: string): boolean {
+	const cp = segment.codePointAt(0)!;
+	return (
+		(cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph
+		(cp >= 0x2300 && cp <= 0x23ff) || // Misc technical
+		(cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats
+		(cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles
+		segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector)
+		segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.)
+	);
+}
+
+// Regexes for character classification (same as string-width library)
+const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v;
+const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v;
+const rgiEmojiRegex = /^\p{RGI_Emoji}$/v;
+
+// Cache for non-ASCII strings
+const WIDTH_CACHE_SIZE = 512;
+const widthCache = new Map<string, number>();
+
+/**
+ * Calculate the terminal width of a single grapheme cluster.
+ * Based on code from the string-width library, but includes a possible-emoji
+ * check to avoid running the RGI_Emoji regex unnecessarily.
+ */
+function graphemeWidth(segment: string): number {
+	// Zero-width clusters
+	if (zeroWidthRegex.test(segment)) {
+		return 0;
+	}
+
+	// Emoji check with pre-filter
+	if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) {
+		return 2;
+	}
+
+	// Get base visible codepoint
+	const base = segment.replace(leadingNonPrintingRegex, "");
+	const cp = base.codePointAt(0);
+	if (cp === undefined) {
+		return 0;
+	}
+
+	let width = eastAsianWidth(cp);
+
+	// Trailing halfwidth/fullwidth forms
+	if (segment.length > 1) {
+		for (const char of segment.slice(1)) {
+			const c = char.codePointAt(0)!;
+			if (c >= 0xff00 && c <= 0xffef) {
+				width += eastAsianWidth(c);
+			}
+		}
+	}
+
+	return width;
+}

 /**
 * Calculate the visible width of a string in terminal columns.
 */
 export function visibleWidth(str: string): number {
-	if (!str) return 0;
-	// Replace tabs and strip Unicode format characters (Cf) that crash string-width
-	const normalized = str.replace(/\t/g, "   ").replace(/\p{Cf}/gu, "");
-	return stringWidth(normalized);
+	if (str.length === 0) {
+		return 0;
+	}
+
+	// Fast path: pure ASCII printable
+	let isPureAscii = true;
+	for (let i = 0; i < str.length; i++) {
+		const code = str.charCodeAt(i);
+		if (code < 0x20 || code > 0x7e) {
+			isPureAscii = false;
+			break;
+		}
+	}
+	if (isPureAscii) {
+		return str.length;
+	}
+
+	// Check cache
+	const cached = widthCache.get(str);
+	if (cached !== undefined) {
+		return cached;
+	}
+
+	// Normalize: tabs to 3 spaces, strip ANSI escape codes
+	let clean = str;
+	if (str.includes("\t")) {
+		clean = clean.replace(/\t/g, "   ");
+	}
+	if (clean.includes("\x1b")) {
+		clean = clean.replace(/\x1b\[[0-9;]*[mGKHJ]/g, "");
+	}
+
+	// Calculate width
+	let width = 0;
+	for (const { segment } of segmenter.segment(clean)) {
+		width += graphemeWidth(segment);
+	}
+
+	// Cache result
+	if (widthCache.size >= WIDTH_CACHE_SIZE) {
+		const firstKey = widthCache.keys().next().value;
+		if (firstKey !== undefined) {
+			widthCache.delete(firstKey);
+		}
+	}
+	widthCache.set(str, width);
+
+	return width;
 }

 /**
@ -408,15 +527,6 @@ function wrapSingleLine(line: string, width: number): string[] {
 	return wrapped.length > 0 ? wrapped : [""];
 }

-const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
-
-/**
- * Get the shared grapheme segmenter instance.
- */
-export function getSegmenter(): Intl.Segmenter {
-	return segmenter;
-}
-
 const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/;

 /**