tui: only check for emojis in visibleWidth when necessary

The initial render of a session, and any re-draws caused by terminal
resizing are noticeably slow, especially on conversations with 20+
turns and many tool calls.

From profiling with `bun --cpu-prof` (available since bun 1.3.2), the
majority of the rendering (90%) is spent on detection of emojis in the
string-width library, running the expensive `/\p{RGI_Emoji}$/v`
regular expression on every individual grapheme cluster in the entire
scrollback. I believe it essentially expands to a fixed search against
every possible emoji sequence, hence the amount of CPU time spent in it.

This change replaces the `stringWidth` from string-width with a
`graphemeWidth` function that performs a similar check, but avoids
running the `/\p{RGI_Emoji}$/v` regex for emoji detection unless it
contains codepoints that could be emojis.

The `visibleWidth` function also has two more optimisations:
- Short-circuits string length detection for strings that are entirely
  printable ASCII characters
- Adds a cache for non-ASCII segments to avoid recomputing string length
  when resizing
This commit is contained in:
nathyong 2025-12-30 16:40:06 +11:00 committed by Mario Zechner
parent 02175d908b
commit 6e4270a286
4 changed files with 130 additions and 34 deletions

20
package-lock.json generated
View file

@ -5529,22 +5529,6 @@
"safe-buffer": "~5.2.0"
}
},
"node_modules/string-width": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-8.1.0.tgz",
"integrity": "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg==",
"license": "MIT",
"dependencies": {
"get-east-asian-width": "^1.3.0",
"strip-ansi": "^7.1.0"
},
"engines": {
"node": ">=20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/string-width-cjs": {
"name": "string-width",
"version": "4.2.3",
@ -6614,9 +6598,9 @@
"dependencies": {
"@types/mime-types": "^2.1.4",
"chalk": "^5.5.0",
"get-east-asian-width": "^1.3.0",
"marked": "^15.0.12",
"mime-types": "^3.0.1",
"string-width": "^8.1.0"
"mime-types": "^3.0.1"
},
"devDependencies": {
"@xterm/headless": "^5.5.0",

View file

@ -12,11 +12,13 @@
### Changed
- README.md completely rewritten with accurate component documentation, theme interfaces, and examples
- `visibleWidth()` reimplemented with grapheme-based width calculation, 10x faster on Bun and ~15% faster on Node ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong))
### Fixed
- Markdown component now renders HTML tags as plain text instead of silently dropping them ([#359](https://github.com/badlogic/pi-mono/issues/359))
- Crash in `visibleWidth()` and grapheme iteration when encountering undefined code points ([#372](https://github.com/badlogic/pi-mono/pull/372) by [@HACKE-RC](https://github.com/HACKE-RC))
- ZWJ emoji sequences (rainbow flag, family, etc.) now render with correct width instead of being split into multiple characters ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong))
## [0.29.0] - 2025-12-25

View file

@ -38,9 +38,9 @@
"dependencies": {
"@types/mime-types": "^2.1.4",
"chalk": "^5.5.0",
"get-east-asian-width": "^1.3.0",
"marked": "^15.0.12",
"mime-types": "^3.0.1",
"string-width": "^8.1.0"
"mime-types": "^3.0.1"
},
"devDependencies": {
"@xterm/headless": "^5.5.0",

View file

@ -1,13 +1,132 @@
import stringWidth from "string-width";
import { eastAsianWidth } from "get-east-asian-width";
// Grapheme segmenter (shared instance)
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
/**
* Get the shared grapheme segmenter instance.
*/
export function getSegmenter(): Intl.Segmenter {
return segmenter;
}
/**
* Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji.
* This is a fast heuristic to avoid the expensive rgiEmojiRegex test.
* The tested Unicode blocks are deliberately broad to account for future
* Unicode additions.
*/
function couldBeEmoji(segment: string): boolean {
const cp = segment.codePointAt(0)!;
return (
(cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph
(cp >= 0x2300 && cp <= 0x23ff) || // Misc technical
(cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats
(cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles
segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector)
segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.)
);
}
// Regexes for character classification (same as string-width library)
const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v;
const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v;
const rgiEmojiRegex = /^\p{RGI_Emoji}$/v;
// Cache for non-ASCII strings
const WIDTH_CACHE_SIZE = 512;
const widthCache = new Map<string, number>();
/**
* Calculate the terminal width of a single grapheme cluster.
* Based on code from the string-width library, but includes a possible-emoji
* check to avoid running the RGI_Emoji regex unnecessarily.
*/
function graphemeWidth(segment: string): number {
// Zero-width clusters
if (zeroWidthRegex.test(segment)) {
return 0;
}
// Emoji check with pre-filter
if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) {
return 2;
}
// Get base visible codepoint
const base = segment.replace(leadingNonPrintingRegex, "");
const cp = base.codePointAt(0);
if (cp === undefined) {
return 0;
}
let width = eastAsianWidth(cp);
// Trailing halfwidth/fullwidth forms
if (segment.length > 1) {
for (const char of segment.slice(1)) {
const c = char.codePointAt(0)!;
if (c >= 0xff00 && c <= 0xffef) {
width += eastAsianWidth(c);
}
}
}
return width;
}
/**
* Calculate the visible width of a string in terminal columns.
*/
export function visibleWidth(str: string): number {
if (!str) return 0;
// Replace tabs and strip Unicode format characters (Cf) that crash string-width
const normalized = str.replace(/\t/g, " ").replace(/\p{Cf}/gu, "");
return stringWidth(normalized);
if (str.length === 0) {
return 0;
}
// Fast path: pure ASCII printable
let isPureAscii = true;
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i);
if (code < 0x20 || code > 0x7e) {
isPureAscii = false;
break;
}
}
if (isPureAscii) {
return str.length;
}
// Check cache
const cached = widthCache.get(str);
if (cached !== undefined) {
return cached;
}
// Normalize: tabs to 3 spaces, strip ANSI escape codes
let clean = str;
if (str.includes("\t")) {
clean = clean.replace(/\t/g, " ");
}
if (clean.includes("\x1b")) {
clean = clean.replace(/\x1b\[[0-9;]*[mGKHJ]/g, "");
}
// Calculate width
let width = 0;
for (const { segment } of segmenter.segment(clean)) {
width += graphemeWidth(segment);
}
// Cache result
if (widthCache.size >= WIDTH_CACHE_SIZE) {
const firstKey = widthCache.keys().next().value;
if (firstKey !== undefined) {
widthCache.delete(firstKey);
}
}
widthCache.set(str, width);
return width;
}
/**
@ -408,15 +527,6 @@ function wrapSingleLine(line: string, width: number): string[] {
return wrapped.length > 0 ? wrapped : [""];
}
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
/**
* Get the shared grapheme segmenter instance.
*/
export function getSegmenter(): Intl.Segmenter {
return segmenter;
}
const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/;
/**