tui: only check for emojis in visibleWidth when necessary

The initial render of a session, and any re-draws caused by terminal resizing are noticeably slow, especially on conversations with 20+ turns and many tool calls. From profiling with `bun --cpu-prof` (available since bun 1.3.2), the majority of the rendering (90%) is spent on detection of emojis in the string-width library, running the expensive `/\p{RGI_Emoji}$/v` regular expression on every individual grapheme cluster in the entire scrollback. I believe it essentially expands to a fixed search against every possible emoji sequence, hence the amount of CPU time spent in it. This change replaces the `stringWidth` from string-width with a `graphemeWidth` function that performs a similar check, but avoids running the `/\p{RGI_Emoji}$/v` regex for emoji detection unless it contains codepoints that could be emojis. The `visibleWidth` function also has two more optimisations: - Short-circuits string length detection for strings that are entirely printable ASCII characters - Adds a cache for non-ASCII segments to avoid recomputing string length when resizing
2026-04-16 12:03:23 +00:00 · 2025-12-30 16:40:06 +11:00 · 2025-12-30 16:40:06 +11:00 · 6e4270a286
commit 6e4270a286
parent 02175d908b
4 changed files with 130 additions and 34 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -5529,22 +5529,6 @@
 				"safe-buffer": "~5.2.0"
 			}
 		},
-		"node_modules/string-width": {
-			"version": "8.1.0",
-			"resolved": "https://registry.npmjs.org/string-width/-/string-width-8.1.0.tgz",
-			"integrity": "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg==",
-			"license": "MIT",
-			"dependencies": {
-				"get-east-asian-width": "^1.3.0",
-				"strip-ansi": "^7.1.0"
-			},
-			"engines": {
-				"node": ">=20"
-			},
-			"funding": {
-				"url": "https://github.com/sponsors/sindresorhus"
-			}
-		},
 		"node_modules/string-width-cjs": {
 			"name": "string-width",
 			"version": "4.2.3",
@ -6614,9 +6598,9 @@
 			"dependencies": {
 				"@types/mime-types": "^2.1.4",
 				"chalk": "^5.5.0",
+				"get-east-asian-width": "^1.3.0",
 				"marked": "^15.0.12",
-				"mime-types": "^3.0.1",
-				"string-width": "^8.1.0"
+				"mime-types": "^3.0.1"
 			},
 			"devDependencies": {
 				"@xterm/headless": "^5.5.0",
--- a/packages/tui/CHANGELOG.md
+++ b/packages/tui/CHANGELOG.md
@ -12,11 +12,13 @@
 ### Changed

 - README.md completely rewritten with accurate component documentation, theme interfaces, and examples
+- `visibleWidth()` reimplemented with grapheme-based width calculation, 10x faster on Bun and ~15% faster on Node ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong))

 ### Fixed

 - Markdown component now renders HTML tags as plain text instead of silently dropping them ([#359](https://github.com/badlogic/pi-mono/issues/359))
 - Crash in `visibleWidth()` and grapheme iteration when encountering undefined code points ([#372](https://github.com/badlogic/pi-mono/pull/372) by [@HACKE-RC](https://github.com/HACKE-RC))
+- ZWJ emoji sequences (rainbow flag, family, etc.) now render with correct width instead of being split into multiple characters ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong))

 ## [0.29.0] - 2025-12-25

--- a/packages/tui/package.json
+++ b/packages/tui/package.json
@ -38,9 +38,9 @@
 	"dependencies": {
 		"@types/mime-types": "^2.1.4",
 		"chalk": "^5.5.0",
+		"get-east-asian-width": "^1.3.0",
 		"marked": "^15.0.12",
-		"mime-types": "^3.0.1",
-		"string-width": "^8.1.0"
+		"mime-types": "^3.0.1"
 	},
 	"devDependencies": {
 		"@xterm/headless": "^5.5.0",
--- a/packages/tui/src/utils.ts
+++ b/packages/tui/src/utils.ts
@ -1,13 +1,132 @@
-import stringWidth from "string-width";
+import { eastAsianWidth } from "get-east-asian-width";
+
+// Grapheme segmenter (shared instance)
+const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
+
+/**
+ * Get the shared grapheme segmenter instance.
+ */
+export function getSegmenter(): Intl.Segmenter {
+	return segmenter;
+}
+
+/**
+ * Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji.
+ * This is a fast heuristic to avoid the expensive rgiEmojiRegex test.
+ * The tested Unicode blocks are deliberately broad to account for future
+ * Unicode additions.
+ */
+function couldBeEmoji(segment: string): boolean {
+	const cp = segment.codePointAt(0)!;
+	return (
+		(cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph
+		(cp >= 0x2300 && cp <= 0x23ff) || // Misc technical
+		(cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats
+		(cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles
+		segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector)
+		segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.)
+	);
+}
+
+// Regexes for character classification (same as string-width library)
+const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v;
+const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v;
+const rgiEmojiRegex = /^\p{RGI_Emoji}$/v;
+
+// Cache for non-ASCII strings
+const WIDTH_CACHE_SIZE = 512;
+const widthCache = new Map<string, number>();
+
+/**
+ * Calculate the terminal width of a single grapheme cluster.
+ * Based on code from the string-width library, but includes a possible-emoji
+ * check to avoid running the RGI_Emoji regex unnecessarily.
+ */
+function graphemeWidth(segment: string): number {
+	// Zero-width clusters
+	if (zeroWidthRegex.test(segment)) {
+		return 0;
+	}
+
+	// Emoji check with pre-filter
+	if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) {
+		return 2;
+	}
+
+	// Get base visible codepoint
+	const base = segment.replace(leadingNonPrintingRegex, "");
+	const cp = base.codePointAt(0);
+	if (cp === undefined) {
+		return 0;
+	}
+
+	let width = eastAsianWidth(cp);
+
+	// Trailing halfwidth/fullwidth forms
+	if (segment.length > 1) {
+		for (const char of segment.slice(1)) {
+			const c = char.codePointAt(0)!;
+			if (c >= 0xff00 && c <= 0xffef) {
+				width += eastAsianWidth(c);
+			}
+		}
+	}
+
+	return width;
+}

 /**
 * Calculate the visible width of a string in terminal columns.
 */
 export function visibleWidth(str: string): number {
-	if (!str) return 0;
-	// Replace tabs and strip Unicode format characters (Cf) that crash string-width
-	const normalized = str.replace(/\t/g, "   ").replace(/\p{Cf}/gu, "");
-	return stringWidth(normalized);
+	if (str.length === 0) {
+		return 0;
+	}
+
+	// Fast path: pure ASCII printable
+	let isPureAscii = true;
+	for (let i = 0; i < str.length; i++) {
+		const code = str.charCodeAt(i);
+		if (code < 0x20 || code > 0x7e) {
+			isPureAscii = false;
+			break;
+		}
+	}
+	if (isPureAscii) {
+		return str.length;
+	}
+
+	// Check cache
+	const cached = widthCache.get(str);
+	if (cached !== undefined) {
+		return cached;
+	}
+
+	// Normalize: tabs to 3 spaces, strip ANSI escape codes
+	let clean = str;
+	if (str.includes("\t")) {
+		clean = clean.replace(/\t/g, "   ");
+	}
+	if (clean.includes("\x1b")) {
+		clean = clean.replace(/\x1b\[[0-9;]*[mGKHJ]/g, "");
+	}
+
+	// Calculate width
+	let width = 0;
+	for (const { segment } of segmenter.segment(clean)) {
+		width += graphemeWidth(segment);
+	}
+
+	// Cache result
+	if (widthCache.size >= WIDTH_CACHE_SIZE) {
+		const firstKey = widthCache.keys().next().value;
+		if (firstKey !== undefined) {
+			widthCache.delete(firstKey);
+		}
+	}
+	widthCache.set(str, width);
+
+	return width;
 }

 /**
@ -408,15 +527,6 @@ function wrapSingleLine(line: string, width: number): string[] {
 	return wrapped.length > 0 ? wrapped : [""];
 }

-const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
-
-/**
- * Get the shared grapheme segmenter instance.
- */
-export function getSegmenter(): Intl.Segmenter {
-	return segmenter;
-}
-
 const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/;

 /**