tui: only check for emojis in visibleWidth when necessary

The initial render of a session, and any re-draws caused by terminal
resizing are noticeably slow, especially on conversations with 20+
turns and many tool calls.

From profiling with `bun --cpu-prof` (available since bun 1.3.2), the
majority of the rendering (90%) is spent on detection of emojis in the
string-width library, running the expensive `/\p{RGI_Emoji}$/v`
regular expression on every individual grapheme cluster in the entire
scrollback. I believe it essentially expands to a fixed search against
every possible emoji sequence, hence the amount of CPU time spent in it.

This change replaces the `stringWidth` from string-width with a
`graphemeWidth` function that performs a similar check, but avoids
running the `/\p{RGI_Emoji}$/v` regex for emoji detection unless it
contains codepoints that could be emojis.

The `visibleWidth` function also has two more optimisations:
- Short-circuits string length detection for strings that are entirely
  printable ASCII characters
- Adds a cache for non-ASCII segments to avoid recomputing string length
  when resizing
This commit is contained in:
nathyong 2025-12-30 16:40:06 +11:00 committed by Mario Zechner
parent 02175d908b
commit 6e4270a286
4 changed files with 130 additions and 34 deletions

View file

@ -1,13 +1,132 @@
import stringWidth from "string-width";
import { eastAsianWidth } from "get-east-asian-width";
// Grapheme segmenter (shared instance)
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
/**
* Get the shared grapheme segmenter instance.
*/
export function getSegmenter(): Intl.Segmenter {
return segmenter;
}
/**
* Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji.
* This is a fast heuristic to avoid the expensive rgiEmojiRegex test.
* The tested Unicode blocks are deliberately broad to account for future
* Unicode additions.
*/
function couldBeEmoji(segment: string): boolean {
const cp = segment.codePointAt(0)!;
return (
(cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph
(cp >= 0x2300 && cp <= 0x23ff) || // Misc technical
(cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats
(cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles
segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector)
segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.)
);
}
// Regexes for character classification (same as string-width library)
const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v;
const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v;
const rgiEmojiRegex = /^\p{RGI_Emoji}$/v;
// Cache for non-ASCII strings
const WIDTH_CACHE_SIZE = 512;
const widthCache = new Map<string, number>();
/**
* Calculate the terminal width of a single grapheme cluster.
* Based on code from the string-width library, but includes a possible-emoji
* check to avoid running the RGI_Emoji regex unnecessarily.
*/
function graphemeWidth(segment: string): number {
// Zero-width clusters
if (zeroWidthRegex.test(segment)) {
return 0;
}
// Emoji check with pre-filter
if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) {
return 2;
}
// Get base visible codepoint
const base = segment.replace(leadingNonPrintingRegex, "");
const cp = base.codePointAt(0);
if (cp === undefined) {
return 0;
}
let width = eastAsianWidth(cp);
// Trailing halfwidth/fullwidth forms
if (segment.length > 1) {
for (const char of segment.slice(1)) {
const c = char.codePointAt(0)!;
if (c >= 0xff00 && c <= 0xffef) {
width += eastAsianWidth(c);
}
}
}
return width;
}
/**
* Calculate the visible width of a string in terminal columns.
*/
export function visibleWidth(str: string): number {
if (!str) return 0;
// Replace tabs and strip Unicode format characters (Cf) that crash string-width
const normalized = str.replace(/\t/g, " ").replace(/\p{Cf}/gu, "");
return stringWidth(normalized);
if (str.length === 0) {
return 0;
}
// Fast path: pure ASCII printable
let isPureAscii = true;
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i);
if (code < 0x20 || code > 0x7e) {
isPureAscii = false;
break;
}
}
if (isPureAscii) {
return str.length;
}
// Check cache
const cached = widthCache.get(str);
if (cached !== undefined) {
return cached;
}
// Normalize: tabs to 3 spaces, strip ANSI escape codes
let clean = str;
if (str.includes("\t")) {
clean = clean.replace(/\t/g, " ");
}
if (clean.includes("\x1b")) {
clean = clean.replace(/\x1b\[[0-9;]*[mGKHJ]/g, "");
}
// Calculate width
let width = 0;
for (const { segment } of segmenter.segment(clean)) {
width += graphemeWidth(segment);
}
// Cache result
if (widthCache.size >= WIDTH_CACHE_SIZE) {
const firstKey = widthCache.keys().next().value;
if (firstKey !== undefined) {
widthCache.delete(firstKey);
}
}
widthCache.set(str, width);
return width;
}
/**
@ -408,15 +527,6 @@ function wrapSingleLine(line: string, width: number): string[] {
return wrapped.length > 0 ? wrapped : [""];
}
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
/**
* Get the shared grapheme segmenter instance.
*/
export function getSegmenter(): Intl.Segmenter {
return segmenter;
}
const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/;
/**