import { eastAsianWidth } from "get-east-asian-width"; // Grapheme segmenter (shared instance) const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" }); /** * Get the shared grapheme segmenter instance. */ export function getSegmenter(): Intl.Segmenter { return segmenter; } /** * Check if a grapheme cluster (after segmentation) could possibly be an RGI emoji. * This is a fast heuristic to avoid the expensive rgiEmojiRegex test. * The tested Unicode blocks are deliberately broad to account for future * Unicode additions. */ function couldBeEmoji(segment: string): boolean { const cp = segment.codePointAt(0)!; return ( (cp >= 0x1f000 && cp <= 0x1fbff) || // Emoji and Pictograph (cp >= 0x2300 && cp <= 0x23ff) || // Misc technical (cp >= 0x2600 && cp <= 0x27bf) || // Misc symbols, dingbats (cp >= 0x2b50 && cp <= 0x2b55) || // Specific stars/circles segment.includes("\uFE0F") || // Contains VS16 (emoji presentation selector) segment.length > 2 // Multi-codepoint sequences (ZWJ, skin tones, etc.) ); } // Regexes for character classification (same as string-width library) const zeroWidthRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Mark}|\p{Surrogate})+$/v; const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v; const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; // Cache for non-ASCII strings const WIDTH_CACHE_SIZE = 512; const widthCache = new Map(); /** * Calculate the terminal width of a single grapheme cluster. * Based on code from the string-width library, but includes a possible-emoji * check to avoid running the RGI_Emoji regex unnecessarily. */ function graphemeWidth(segment: string): number { // Zero-width clusters if (zeroWidthRegex.test(segment)) { return 0; } // Emoji check with pre-filter if (couldBeEmoji(segment) && rgiEmojiRegex.test(segment)) { return 2; } // Get base visible codepoint const base = segment.replace(leadingNonPrintingRegex, ""); const cp = base.codePointAt(0); if (cp === undefined) { return 0; } // Regional indicator symbols (U+1F1E6..U+1F1FF) are often rendered as // full-width emoji in terminals, even when isolated during streaming. // Keep width conservative (2) to avoid terminal auto-wrap drift artifacts. if (cp >= 0x1f1e6 && cp <= 0x1f1ff) { return 2; } let width = eastAsianWidth(cp); // Trailing halfwidth/fullwidth forms if (segment.length > 1) { for (const char of segment.slice(1)) { const c = char.codePointAt(0)!; if (c >= 0xff00 && c <= 0xffef) { width += eastAsianWidth(c); } } } return width; } /** * Calculate the visible width of a string in terminal columns. */ export function visibleWidth(str: string): number { if (str.length === 0) { return 0; } // Fast path: pure ASCII printable let isPureAscii = true; for (let i = 0; i < str.length; i++) { const code = str.charCodeAt(i); if (code < 0x20 || code > 0x7e) { isPureAscii = false; break; } } if (isPureAscii) { return str.length; } // Check cache const cached = widthCache.get(str); if (cached !== undefined) { return cached; } // Normalize: tabs to 3 spaces, strip ANSI escape codes let clean = str; if (str.includes("\t")) { clean = clean.replace(/\t/g, " "); } if (clean.includes("\x1b")) { // Strip supported ANSI/OSC/APC escape sequences in one pass. // This covers CSI styling/cursor codes, OSC hyperlinks and prompt markers, // and APC sequences like CURSOR_MARKER. let stripped = ""; let i = 0; while (i < clean.length) { const ansi = extractAnsiCode(clean, i); if (ansi) { i += ansi.length; continue; } stripped += clean[i]; i++; } clean = stripped; } // Calculate width let width = 0; for (const { segment } of segmenter.segment(clean)) { width += graphemeWidth(segment); } // Cache result if (widthCache.size >= WIDTH_CACHE_SIZE) { const firstKey = widthCache.keys().next().value; if (firstKey !== undefined) { widthCache.delete(firstKey); } } widthCache.set(str, width); return width; } /** * Extract ANSI escape sequences from a string at the given position. */ export function extractAnsiCode( str: string, pos: number, ): { code: string; length: number } | null { if (pos >= str.length || str[pos] !== "\x1b") return null; const next = str[pos + 1]; // CSI sequence: ESC [ ... m/G/K/H/J if (next === "[") { let j = pos + 2; while (j < str.length && !/[mGKHJ]/.test(str[j]!)) j++; if (j < str.length) return { code: str.substring(pos, j + 1), length: j + 1 - pos }; return null; } // OSC sequence: ESC ] ... BEL or ESC ] ... ST (ESC \) // Used for hyperlinks (OSC 8), window titles, etc. if (next === "]") { let j = pos + 2; while (j < str.length) { if (str[j] === "\x07") return { code: str.substring(pos, j + 1), length: j + 1 - pos }; if (str[j] === "\x1b" && str[j + 1] === "\\") return { code: str.substring(pos, j + 2), length: j + 2 - pos }; j++; } return null; } // APC sequence: ESC _ ... BEL or ESC _ ... ST (ESC \) // Used for cursor marker and application-specific commands if (next === "_") { let j = pos + 2; while (j < str.length) { if (str[j] === "\x07") return { code: str.substring(pos, j + 1), length: j + 1 - pos }; if (str[j] === "\x1b" && str[j + 1] === "\\") return { code: str.substring(pos, j + 2), length: j + 2 - pos }; j++; } return null; } return null; } /** * Track active ANSI SGR codes to preserve styling across line breaks. */ class AnsiCodeTracker { // Track individual attributes separately so we can reset them specifically private bold = false; private dim = false; private italic = false; private underline = false; private blink = false; private inverse = false; private hidden = false; private strikethrough = false; private fgColor: string | null = null; // Stores the full code like "31" or "38;5;240" private bgColor: string | null = null; // Stores the full code like "41" or "48;5;240" process(ansiCode: string): void { if (!ansiCode.endsWith("m")) { return; } // Extract the parameters between \x1b[ and m const match = ansiCode.match(/\x1b\[([\d;]*)m/); if (!match) return; const params = match[1]; if (params === "" || params === "0") { // Full reset this.reset(); return; } // Parse parameters (can be semicolon-separated) const parts = params.split(";"); let i = 0; while (i < parts.length) { const code = Number.parseInt(parts[i], 10); // Handle 256-color and RGB codes which consume multiple parameters if (code === 38 || code === 48) { // 38;5;N (256 color fg) or 38;2;R;G;B (RGB fg) // 48;5;N (256 color bg) or 48;2;R;G;B (RGB bg) if (parts[i + 1] === "5" && parts[i + 2] !== undefined) { // 256 color: 38;5;N or 48;5;N const colorCode = `${parts[i]};${parts[i + 1]};${parts[i + 2]}`; if (code === 38) { this.fgColor = colorCode; } else { this.bgColor = colorCode; } i += 3; continue; } else if (parts[i + 1] === "2" && parts[i + 4] !== undefined) { // RGB color: 38;2;R;G;B or 48;2;R;G;B const colorCode = `${parts[i]};${parts[i + 1]};${parts[i + 2]};${parts[i + 3]};${parts[i + 4]}`; if (code === 38) { this.fgColor = colorCode; } else { this.bgColor = colorCode; } i += 5; continue; } } // Standard SGR codes switch (code) { case 0: this.reset(); break; case 1: this.bold = true; break; case 2: this.dim = true; break; case 3: this.italic = true; break; case 4: this.underline = true; break; case 5: this.blink = true; break; case 7: this.inverse = true; break; case 8: this.hidden = true; break; case 9: this.strikethrough = true; break; case 21: this.bold = false; break; // Some terminals case 22: this.bold = false; this.dim = false; break; case 23: this.italic = false; break; case 24: this.underline = false; break; case 25: this.blink = false; break; case 27: this.inverse = false; break; case 28: this.hidden = false; break; case 29: this.strikethrough = false; break; case 39: this.fgColor = null; break; // Default fg case 49: this.bgColor = null; break; // Default bg default: // Standard foreground colors 30-37, 90-97 if ((code >= 30 && code <= 37) || (code >= 90 && code <= 97)) { this.fgColor = String(code); } // Standard background colors 40-47, 100-107 else if ((code >= 40 && code <= 47) || (code >= 100 && code <= 107)) { this.bgColor = String(code); } break; } i++; } } private reset(): void { this.bold = false; this.dim = false; this.italic = false; this.underline = false; this.blink = false; this.inverse = false; this.hidden = false; this.strikethrough = false; this.fgColor = null; this.bgColor = null; } /** Clear all state for reuse. */ clear(): void { this.reset(); } getActiveCodes(): string { const codes: string[] = []; if (this.bold) codes.push("1"); if (this.dim) codes.push("2"); if (this.italic) codes.push("3"); if (this.underline) codes.push("4"); if (this.blink) codes.push("5"); if (this.inverse) codes.push("7"); if (this.hidden) codes.push("8"); if (this.strikethrough) codes.push("9"); if (this.fgColor) codes.push(this.fgColor); if (this.bgColor) codes.push(this.bgColor); if (codes.length === 0) return ""; return `\x1b[${codes.join(";")}m`; } hasActiveCodes(): boolean { return ( this.bold || this.dim || this.italic || this.underline || this.blink || this.inverse || this.hidden || this.strikethrough || this.fgColor !== null || this.bgColor !== null ); } /** * Get reset codes for attributes that need to be turned off at line end, * specifically underline which bleeds into padding. * Returns empty string if no problematic attributes are active. */ getLineEndReset(): string { // Only underline causes visual bleeding into padding // Other attributes like colors don't visually bleed to padding if (this.underline) { return "\x1b[24m"; // Underline off only } return ""; } } function updateTrackerFromText(text: string, tracker: AnsiCodeTracker): void { let i = 0; while (i < text.length) { const ansiResult = extractAnsiCode(text, i); if (ansiResult) { tracker.process(ansiResult.code); i += ansiResult.length; } else { i++; } } } /** * Split text into words while keeping ANSI codes attached. */ function splitIntoTokensWithAnsi(text: string): string[] { const tokens: string[] = []; let current = ""; let pendingAnsi = ""; // ANSI codes waiting to be attached to next visible content let inWhitespace = false; let i = 0; while (i < text.length) { const ansiResult = extractAnsiCode(text, i); if (ansiResult) { // Hold ANSI codes separately - they'll be attached to the next visible char pendingAnsi += ansiResult.code; i += ansiResult.length; continue; } const char = text[i]; const charIsSpace = char === " "; if (charIsSpace !== inWhitespace && current) { // Switching between whitespace and non-whitespace, push current token tokens.push(current); current = ""; } // Attach any pending ANSI codes to this visible character if (pendingAnsi) { current += pendingAnsi; pendingAnsi = ""; } inWhitespace = charIsSpace; current += char; i++; } // Handle any remaining pending ANSI codes (attach to last token) if (pendingAnsi) { current += pendingAnsi; } if (current) { tokens.push(current); } return tokens; } /** * Wrap text with ANSI codes preserved. * * ONLY does word wrapping - NO padding, NO background colors. * Returns lines where each line is <= width visible chars. * Active ANSI codes are preserved across line breaks. * * @param text - Text to wrap (may contain ANSI codes and newlines) * @param width - Maximum visible width per line * @returns Array of wrapped lines (NOT padded to width) */ export function wrapTextWithAnsi(text: string, width: number): string[] { if (!text) { return [""]; } // Handle newlines by processing each line separately // Track ANSI state across lines so styles carry over after literal newlines const inputLines = text.split("\n"); const result: string[] = []; const tracker = new AnsiCodeTracker(); for (const inputLine of inputLines) { // Prepend active ANSI codes from previous lines (except for first line) const prefix = result.length > 0 ? tracker.getActiveCodes() : ""; result.push(...wrapSingleLine(prefix + inputLine, width)); // Update tracker with codes from this line for next iteration updateTrackerFromText(inputLine, tracker); } return result.length > 0 ? result : [""]; } function wrapSingleLine(line: string, width: number): string[] { if (!line) { return [""]; } const visibleLength = visibleWidth(line); if (visibleLength <= width) { return [line]; } const wrapped: string[] = []; const tracker = new AnsiCodeTracker(); const tokens = splitIntoTokensWithAnsi(line); let currentLine = ""; let currentVisibleLength = 0; for (const token of tokens) { const tokenVisibleLength = visibleWidth(token); const isWhitespace = token.trim() === ""; // Token itself is too long - break it character by character if (tokenVisibleLength > width && !isWhitespace) { if (currentLine) { // Add specific reset for underline only (preserves background) const lineEndReset = tracker.getLineEndReset(); if (lineEndReset) { currentLine += lineEndReset; } wrapped.push(currentLine); currentLine = ""; currentVisibleLength = 0; } // Break long token - breakLongWord handles its own resets const broken = breakLongWord(token, width, tracker); wrapped.push(...broken.slice(0, -1)); currentLine = broken[broken.length - 1]; currentVisibleLength = visibleWidth(currentLine); continue; } // Check if adding this token would exceed width const totalNeeded = currentVisibleLength + tokenVisibleLength; if (totalNeeded > width && currentVisibleLength > 0) { // Trim trailing whitespace, then add underline reset (not full reset, to preserve background) let lineToWrap = currentLine.trimEnd(); const lineEndReset = tracker.getLineEndReset(); if (lineEndReset) { lineToWrap += lineEndReset; } wrapped.push(lineToWrap); if (isWhitespace) { // Don't start new line with whitespace currentLine = tracker.getActiveCodes(); currentVisibleLength = 0; } else { currentLine = tracker.getActiveCodes() + token; currentVisibleLength = tokenVisibleLength; } } else { // Add to current line currentLine += token; currentVisibleLength += tokenVisibleLength; } updateTrackerFromText(token, tracker); } if (currentLine) { // No reset at end of final line - let caller handle it wrapped.push(currentLine); } // Trailing whitespace can cause lines to exceed the requested width return wrapped.length > 0 ? wrapped.map((line) => line.trimEnd()) : [""]; } const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/; /** * Check if a character is whitespace. */ export function isWhitespaceChar(char: string): boolean { return /\s/.test(char); } /** * Check if a character is punctuation. */ export function isPunctuationChar(char: string): boolean { return PUNCTUATION_REGEX.test(char); } function breakLongWord( word: string, width: number, tracker: AnsiCodeTracker, ): string[] { const lines: string[] = []; let currentLine = tracker.getActiveCodes(); let currentWidth = 0; // First, separate ANSI codes from visible content // We need to handle ANSI codes specially since they're not graphemes let i = 0; const segments: Array<{ type: "ansi" | "grapheme"; value: string }> = []; while (i < word.length) { const ansiResult = extractAnsiCode(word, i); if (ansiResult) { segments.push({ type: "ansi", value: ansiResult.code }); i += ansiResult.length; } else { // Find the next ANSI code or end of string let end = i; while (end < word.length) { const nextAnsi = extractAnsiCode(word, end); if (nextAnsi) break; end++; } // Segment this non-ANSI portion into graphemes const textPortion = word.slice(i, end); for (const seg of segmenter.segment(textPortion)) { segments.push({ type: "grapheme", value: seg.segment }); } i = end; } } // Now process segments for (const seg of segments) { if (seg.type === "ansi") { currentLine += seg.value; tracker.process(seg.value); continue; } const grapheme = seg.value; // Skip empty graphemes to avoid issues with string-width calculation if (!grapheme) continue; const graphemeWidth = visibleWidth(grapheme); if (currentWidth + graphemeWidth > width) { // Add specific reset for underline only (preserves background) const lineEndReset = tracker.getLineEndReset(); if (lineEndReset) { currentLine += lineEndReset; } lines.push(currentLine); currentLine = tracker.getActiveCodes(); currentWidth = 0; } currentLine += grapheme; currentWidth += graphemeWidth; } if (currentLine) { // No reset at end of final segment - caller handles continuation lines.push(currentLine); } return lines.length > 0 ? lines : [""]; } /** * Apply background color to a line, padding to full width. * * @param line - Line of text (may contain ANSI codes) * @param width - Total width to pad to * @param bgFn - Background color function * @returns Line with background applied and padded to width */ export function applyBackgroundToLine( line: string, width: number, bgFn: (text: string) => string, ): string { // Calculate padding needed const visibleLen = visibleWidth(line); const paddingNeeded = Math.max(0, width - visibleLen); const padding = " ".repeat(paddingNeeded); // Apply background to content + padding const withPadding = line + padding; return bgFn(withPadding); } /** * Truncate text to fit within a maximum visible width, adding ellipsis if needed. * Optionally pad with spaces to reach exactly maxWidth. * Properly handles ANSI escape codes (they don't count toward width). * * @param text - Text to truncate (may contain ANSI codes) * @param maxWidth - Maximum visible width * @param ellipsis - Ellipsis string to append when truncating (default: "...") * @param pad - If true, pad result with spaces to exactly maxWidth (default: false) * @returns Truncated text, optionally padded to exactly maxWidth */ export function truncateToWidth( text: string, maxWidth: number, ellipsis: string = "...", pad: boolean = false, ): string { const textVisibleWidth = visibleWidth(text); if (textVisibleWidth <= maxWidth) { return pad ? text + " ".repeat(maxWidth - textVisibleWidth) : text; } const ellipsisWidth = visibleWidth(ellipsis); const targetWidth = maxWidth - ellipsisWidth; if (targetWidth <= 0) { return ellipsis.substring(0, maxWidth); } // Separate ANSI codes from visible content using grapheme segmentation let i = 0; const segments: Array<{ type: "ansi" | "grapheme"; value: string }> = []; while (i < text.length) { const ansiResult = extractAnsiCode(text, i); if (ansiResult) { segments.push({ type: "ansi", value: ansiResult.code }); i += ansiResult.length; } else { // Find the next ANSI code or end of string let end = i; while (end < text.length) { const nextAnsi = extractAnsiCode(text, end); if (nextAnsi) break; end++; } // Segment this non-ANSI portion into graphemes const textPortion = text.slice(i, end); for (const seg of segmenter.segment(textPortion)) { segments.push({ type: "grapheme", value: seg.segment }); } i = end; } } // Build truncated string from segments let result = ""; let currentWidth = 0; for (const seg of segments) { if (seg.type === "ansi") { result += seg.value; continue; } const grapheme = seg.value; // Skip empty graphemes to avoid issues with string-width calculation if (!grapheme) continue; const graphemeWidth = visibleWidth(grapheme); if (currentWidth + graphemeWidth > targetWidth) { break; } result += grapheme; currentWidth += graphemeWidth; } // Add reset code before ellipsis to prevent styling leaking into it const truncated = `${result}\x1b[0m${ellipsis}`; if (pad) { const truncatedWidth = visibleWidth(truncated); return truncated + " ".repeat(Math.max(0, maxWidth - truncatedWidth)); } return truncated; } /** * Extract a range of visible columns from a line. Handles ANSI codes and wide chars. * @param strict - If true, exclude wide chars at boundary that would extend past the range */ export function sliceByColumn( line: string, startCol: number, length: number, strict = false, ): string { return sliceWithWidth(line, startCol, length, strict).text; } /** Like sliceByColumn but also returns the actual visible width of the result. */ export function sliceWithWidth( line: string, startCol: number, length: number, strict = false, ): { text: string; width: number } { if (length <= 0) return { text: "", width: 0 }; const endCol = startCol + length; let result = "", resultWidth = 0, currentCol = 0, i = 0, pendingAnsi = ""; while (i < line.length) { const ansi = extractAnsiCode(line, i); if (ansi) { if (currentCol >= startCol && currentCol < endCol) result += ansi.code; else if (currentCol < startCol) pendingAnsi += ansi.code; i += ansi.length; continue; } let textEnd = i; while (textEnd < line.length && !extractAnsiCode(line, textEnd)) textEnd++; for (const { segment } of segmenter.segment(line.slice(i, textEnd))) { const w = graphemeWidth(segment); const inRange = currentCol >= startCol && currentCol < endCol; const fits = !strict || currentCol + w <= endCol; if (inRange && fits) { if (pendingAnsi) { result += pendingAnsi; pendingAnsi = ""; } result += segment; resultWidth += w; } currentCol += w; if (currentCol >= endCol) break; } i = textEnd; if (currentCol >= endCol) break; } return { text: result, width: resultWidth }; } // Pooled tracker instance for extractSegments (avoids allocation per call) const pooledStyleTracker = new AnsiCodeTracker(); /** * Extract "before" and "after" segments from a line in a single pass. * Used for overlay compositing where we need content before and after the overlay region. * Preserves styling from before the overlay that should affect content after it. */ export function extractSegments( line: string, beforeEnd: number, afterStart: number, afterLen: number, strictAfter = false, ): { before: string; beforeWidth: number; after: string; afterWidth: number } { let before = "", beforeWidth = 0, after = "", afterWidth = 0; let currentCol = 0, i = 0; let pendingAnsiBefore = ""; let afterStarted = false; const afterEnd = afterStart + afterLen; // Track styling state so "after" inherits styling from before the overlay pooledStyleTracker.clear(); while (i < line.length) { const ansi = extractAnsiCode(line, i); if (ansi) { // Track all SGR codes to know styling state at afterStart pooledStyleTracker.process(ansi.code); // Include ANSI codes in their respective segments if (currentCol < beforeEnd) { pendingAnsiBefore += ansi.code; } else if ( currentCol >= afterStart && currentCol < afterEnd && afterStarted ) { // Only include after we've started "after" (styling already prepended) after += ansi.code; } i += ansi.length; continue; } let textEnd = i; while (textEnd < line.length && !extractAnsiCode(line, textEnd)) textEnd++; for (const { segment } of segmenter.segment(line.slice(i, textEnd))) { const w = graphemeWidth(segment); if (currentCol < beforeEnd) { if (pendingAnsiBefore) { before += pendingAnsiBefore; pendingAnsiBefore = ""; } before += segment; beforeWidth += w; } else if (currentCol >= afterStart && currentCol < afterEnd) { const fits = !strictAfter || currentCol + w <= afterEnd; if (fits) { // On first "after" grapheme, prepend inherited styling from before overlay if (!afterStarted) { after += pooledStyleTracker.getActiveCodes(); afterStarted = true; } after += segment; afterWidth += w; } } currentCol += w; // Early exit: done with "before" only, or done with both segments if (afterLen <= 0 ? currentCol >= beforeEnd : currentCol >= afterEnd) break; } i = textEnd; if (afterLen <= 0 ? currentCol >= beforeEnd : currentCol >= afterEnd) break; } return { before, beforeWidth, after, afterWidth }; }