diff --git a/package-lock.json b/package-lock.json index 3e84ed4d..d375064f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5529,22 +5529,6 @@ "safe-buffer": "~5.2.0" } }, - "node_modules/string-width": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-8.1.0.tgz", - "integrity": "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg==", - "license": "MIT", - "dependencies": { - "get-east-asian-width": "^1.3.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/string-width-cjs": { "name": "string-width", "version": "4.2.3", @@ -6614,9 +6598,9 @@ "dependencies": { "@types/mime-types": "^2.1.4", "chalk": "^5.5.0", + "get-east-asian-width": "^1.3.0", "marked": "^15.0.12", - "mime-types": "^3.0.1", - "string-width": "^8.1.0" + "mime-types": "^3.0.1" }, "devDependencies": { "@xterm/headless": "^5.5.0", diff --git a/packages/tui/CHANGELOG.md b/packages/tui/CHANGELOG.md index 3e066be9..87a22f25 100644 --- a/packages/tui/CHANGELOG.md +++ b/packages/tui/CHANGELOG.md @@ -12,11 +12,13 @@ ### Changed - README.md completely rewritten with accurate component documentation, theme interfaces, and examples +- `visibleWidth()` reimplemented with grapheme-based width calculation, 10x faster on Bun and ~15% faster on Node ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong)) ### Fixed - Markdown component now renders HTML tags as plain text instead of silently dropping them ([#359](https://github.com/badlogic/pi-mono/issues/359)) - Crash in `visibleWidth()` and grapheme iteration when encountering undefined code points ([#372](https://github.com/badlogic/pi-mono/pull/372) by [@HACKE-RC](https://github.com/HACKE-RC)) +- ZWJ emoji sequences (rainbow flag, family, etc.) now render with correct width instead of being split into multiple characters ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong)) ## [0.29.0] - 2025-12-25 diff --git a/packages/tui/package.json b/packages/tui/package.json index 5e65e545..0472a281 100644 --- a/packages/tui/package.json +++ b/packages/tui/package.json @@ -38,9 +38,9 @@ "dependencies": { "@types/mime-types": "^2.1.4", "chalk": "^5.5.0", + "get-east-asian-width": "^1.3.0", "marked": "^15.0.12", - "mime-types": "^3.0.1", - "string-width": "^8.1.0" + "mime-types": "^3.0.1" }, "devDependencies": { "@xterm/headless": "^5.5.0", diff --git a/packages/tui/src/utils.ts b/packages/tui/src/utils.ts index 3c4f80bf..1e87722b 100644 --- a/packages/tui/src/utils.ts +++ b/packages/tui/src/utils.ts @@ -1,13 +1,132 @@ -import stringWidth from "string-width"; +import { eastAsianWidth } from "get-east-asian-width"; + +// Grapheme segmenter (shared instance) +const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" }); + +/** + * Get the shared grapheme segmenter instance. + */ +export function getSegmenter(): Intl.Segmenter { + return segmenter; +} + +/** + * Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji. + * This is a fast heuristic to avoid the expensive rgiEmojiRegex test. + * The tested Unicode blocks are deliberately broad to account for future + * Unicode additions. + */ +function couldBeEmoji(segment: string): boolean { + const cp = segment.codePointAt(0)!; + return ( + (cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph + (cp >= 0x2300 && cp <= 0x23ff) || // Misc technical + (cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats + (cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles + segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector) + segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.) + ); +} + +// Regexes for character classification (same as string-width library) +const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v; +const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v; +const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; + +// Cache for non-ASCII strings +const WIDTH_CACHE_SIZE = 512; +const widthCache = new Map(); + +/** + * Calculate the terminal width of a single grapheme cluster. + * Based on code from the string-width library, but includes a possible-emoji + * check to avoid running the RGI_Emoji regex unnecessarily. + */ +function graphemeWidth(segment: string): number { + // Zero-width clusters + if (zeroWidthRegex.test(segment)) { + return 0; + } + + // Emoji check with pre-filter + if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) { + return 2; + } + + // Get base visible codepoint + const base = segment.replace(leadingNonPrintingRegex, ""); + const cp = base.codePointAt(0); + if (cp === undefined) { + return 0; + } + + let width = eastAsianWidth(cp); + + // Trailing halfwidth/fullwidth forms + if (segment.length > 1) { + for (const char of segment.slice(1)) { + const c = char.codePointAt(0)!; + if (c >= 0xff00 && c <= 0xffef) { + width += eastAsianWidth(c); + } + } + } + + return width; +} /** * Calculate the visible width of a string in terminal columns. */ export function visibleWidth(str: string): number { - if (!str) return 0; - // Replace tabs and strip Unicode format characters (Cf) that crash string-width - const normalized = str.replace(/\t/g, " ").replace(/\p{Cf}/gu, ""); - return stringWidth(normalized); + if (str.length === 0) { + return 0; + } + + // Fast path: pure ASCII printable + let isPureAscii = true; + for (let i = 0; i < str.length; i++) { + const code = str.charCodeAt(i); + if (code < 0x20 || code > 0x7e) { + isPureAscii = false; + break; + } + } + if (isPureAscii) { + return str.length; + } + + // Check cache + const cached = widthCache.get(str); + if (cached !== undefined) { + return cached; + } + + // Normalize: tabs to 3 spaces, strip ANSI escape codes + let clean = str; + if (str.includes("\t")) { + clean = clean.replace(/\t/g, " "); + } + if (clean.includes("\x1b")) { + clean = clean.replace(/\x1b\[[0-9;]*[mGKHJ]/g, ""); + } + + // Calculate width + let width = 0; + for (const { segment } of segmenter.segment(clean)) { + width += graphemeWidth(segment); + } + + // Cache result + if (widthCache.size >= WIDTH_CACHE_SIZE) { + const firstKey = widthCache.keys().next().value; + if (firstKey !== undefined) { + widthCache.delete(firstKey); + } + } + widthCache.set(str, width); + + return width; } /** @@ -408,15 +527,6 @@ function wrapSingleLine(line: string, width: number): string[] { return wrapped.length > 0 ? wrapped : [""]; } -const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" }); - -/** - * Get the shared grapheme segmenter instance. - */ -export function getSegmenter(): Intl.Segmenter { - return segmenter; -} - const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/; /**