mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 12:03:23 +00:00
tui: only check for emojis in visibleWidth when necessary
The initial render of a session, and any re-draws caused by terminal
resizing are noticeably slow, especially on conversations with 20+
turns and many tool calls.
From profiling with `bun --cpu-prof` (available since bun 1.3.2), the
majority of the rendering (90%) is spent on detection of emojis in the
string-width library, running the expensive `/\p{RGI_Emoji}$/v`
regular expression on every individual grapheme cluster in the entire
scrollback. I believe it essentially expands to a fixed search against
every possible emoji sequence, hence the amount of CPU time spent in it.
This change replaces the `stringWidth` from string-width with a
`graphemeWidth` function that performs a similar check, but avoids
running the `/\p{RGI_Emoji}$/v` regex for emoji detection unless it
contains codepoints that could be emojis.
The `visibleWidth` function also has two more optimisations:
- Short-circuits string length detection for strings that are entirely
printable ASCII characters
- Adds a cache for non-ASCII segments to avoid recomputing string length
when resizing
This commit is contained in:
parent
02175d908b
commit
6e4270a286
4 changed files with 130 additions and 34 deletions
20
package-lock.json
generated
20
package-lock.json
generated
|
|
@ -5529,22 +5529,6 @@
|
|||
"safe-buffer": "~5.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width": {
|
||||
"version": "8.1.0",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-8.1.0.tgz",
|
||||
"integrity": "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"get-east-asian-width": "^1.3.0",
|
||||
"strip-ansi": "^7.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width-cjs": {
|
||||
"name": "string-width",
|
||||
"version": "4.2.3",
|
||||
|
|
@ -6614,9 +6598,9 @@
|
|||
"dependencies": {
|
||||
"@types/mime-types": "^2.1.4",
|
||||
"chalk": "^5.5.0",
|
||||
"get-east-asian-width": "^1.3.0",
|
||||
"marked": "^15.0.12",
|
||||
"mime-types": "^3.0.1",
|
||||
"string-width": "^8.1.0"
|
||||
"mime-types": "^3.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@xterm/headless": "^5.5.0",
|
||||
|
|
|
|||
|
|
@ -12,11 +12,13 @@
|
|||
### Changed
|
||||
|
||||
- README.md completely rewritten with accurate component documentation, theme interfaces, and examples
|
||||
- `visibleWidth()` reimplemented with grapheme-based width calculation, 10x faster on Bun and ~15% faster on Node ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Markdown component now renders HTML tags as plain text instead of silently dropping them ([#359](https://github.com/badlogic/pi-mono/issues/359))
|
||||
- Crash in `visibleWidth()` and grapheme iteration when encountering undefined code points ([#372](https://github.com/badlogic/pi-mono/pull/372) by [@HACKE-RC](https://github.com/HACKE-RC))
|
||||
- ZWJ emoji sequences (rainbow flag, family, etc.) now render with correct width instead of being split into multiple characters ([#369](https://github.com/badlogic/pi-mono/pull/369) by [@nathyong](https://github.com/nathyong))
|
||||
|
||||
## [0.29.0] - 2025-12-25
|
||||
|
||||
|
|
|
|||
|
|
@ -38,9 +38,9 @@
|
|||
"dependencies": {
|
||||
"@types/mime-types": "^2.1.4",
|
||||
"chalk": "^5.5.0",
|
||||
"get-east-asian-width": "^1.3.0",
|
||||
"marked": "^15.0.12",
|
||||
"mime-types": "^3.0.1",
|
||||
"string-width": "^8.1.0"
|
||||
"mime-types": "^3.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@xterm/headless": "^5.5.0",
|
||||
|
|
|
|||
|
|
@ -1,13 +1,132 @@
|
|||
import stringWidth from "string-width";
|
||||
import { eastAsianWidth } from "get-east-asian-width";
|
||||
|
||||
// Grapheme segmenter (shared instance)
|
||||
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
|
||||
|
||||
/**
|
||||
* Get the shared grapheme segmenter instance.
|
||||
*/
|
||||
export function getSegmenter(): Intl.Segmenter {
|
||||
return segmenter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji.
|
||||
* This is a fast heuristic to avoid the expensive rgiEmojiRegex test.
|
||||
* The tested Unicode blocks are deliberately broad to account for future
|
||||
* Unicode additions.
|
||||
*/
|
||||
function couldBeEmoji(segment: string): boolean {
|
||||
const cp = segment.codePointAt(0)!;
|
||||
return (
|
||||
(cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph
|
||||
(cp >= 0x2300 && cp <= 0x23ff) || // Misc technical
|
||||
(cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats
|
||||
(cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles
|
||||
segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector)
|
||||
segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.)
|
||||
);
|
||||
}
|
||||
|
||||
// Regexes for character classification (same as string-width library)
|
||||
const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v;
|
||||
const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v;
|
||||
const rgiEmojiRegex = /^\p{RGI_Emoji}$/v;
|
||||
|
||||
// Cache for non-ASCII strings
|
||||
const WIDTH_CACHE_SIZE = 512;
|
||||
const widthCache = new Map<string, number>();
|
||||
|
||||
/**
|
||||
* Calculate the terminal width of a single grapheme cluster.
|
||||
* Based on code from the string-width library, but includes a possible-emoji
|
||||
* check to avoid running the RGI_Emoji regex unnecessarily.
|
||||
*/
|
||||
function graphemeWidth(segment: string): number {
|
||||
// Zero-width clusters
|
||||
if (zeroWidthRegex.test(segment)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Emoji check with pre-filter
|
||||
if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Get base visible codepoint
|
||||
const base = segment.replace(leadingNonPrintingRegex, "");
|
||||
const cp = base.codePointAt(0);
|
||||
if (cp === undefined) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let width = eastAsianWidth(cp);
|
||||
|
||||
// Trailing halfwidth/fullwidth forms
|
||||
if (segment.length > 1) {
|
||||
for (const char of segment.slice(1)) {
|
||||
const c = char.codePointAt(0)!;
|
||||
if (c >= 0xff00 && c <= 0xffef) {
|
||||
width += eastAsianWidth(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the visible width of a string in terminal columns.
|
||||
*/
|
||||
export function visibleWidth(str: string): number {
|
||||
if (!str) return 0;
|
||||
// Replace tabs and strip Unicode format characters (Cf) that crash string-width
|
||||
const normalized = str.replace(/\t/g, " ").replace(/\p{Cf}/gu, "");
|
||||
return stringWidth(normalized);
|
||||
if (str.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Fast path: pure ASCII printable
|
||||
let isPureAscii = true;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const code = str.charCodeAt(i);
|
||||
if (code < 0x20 || code > 0x7e) {
|
||||
isPureAscii = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isPureAscii) {
|
||||
return str.length;
|
||||
}
|
||||
|
||||
// Check cache
|
||||
const cached = widthCache.get(str);
|
||||
if (cached !== undefined) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Normalize: tabs to 3 spaces, strip ANSI escape codes
|
||||
let clean = str;
|
||||
if (str.includes("\t")) {
|
||||
clean = clean.replace(/\t/g, " ");
|
||||
}
|
||||
if (clean.includes("\x1b")) {
|
||||
clean = clean.replace(/\x1b\[[0-9;]*[mGKHJ]/g, "");
|
||||
}
|
||||
|
||||
// Calculate width
|
||||
let width = 0;
|
||||
for (const { segment } of segmenter.segment(clean)) {
|
||||
width += graphemeWidth(segment);
|
||||
}
|
||||
|
||||
// Cache result
|
||||
if (widthCache.size >= WIDTH_CACHE_SIZE) {
|
||||
const firstKey = widthCache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
widthCache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
widthCache.set(str, width);
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -408,15 +527,6 @@ function wrapSingleLine(line: string, width: number): string[] {
|
|||
return wrapped.length > 0 ? wrapped : [""];
|
||||
}
|
||||
|
||||
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
|
||||
|
||||
/**
|
||||
* Get the shared grapheme segmenter instance.
|
||||
*/
|
||||
export function getSegmenter(): Intl.Segmenter {
|
||||
return segmenter;
|
||||
}
|
||||
|
||||
const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/;
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue