co-mono/packages/tui/src/utils.ts
Mr. Rc bbf23bd5f1
Fix characters (#372)
* Fix cat command

* Fix text rendering crash from undefined code points in bash output

* Revert unintentional model parameter changes from fix cat command commit
2026-01-01 02:16:29 +01:00

598 lines
15 KiB
TypeScript

import stringWidth from "string-width";
/**
* Calculate the visible width of a string in terminal columns.
*/
export function visibleWidth(str: string): number {
if (!str) return 0;
const normalized = str.replace(/\t/g, " ");
return stringWidth(normalized);
}
/**
* Extract ANSI escape sequences from a string at the given position.
*/
function extractAnsiCode(str: string, pos: number): { code: string; length: number } | null {
if (pos >= str.length || str[pos] !== "\x1b" || str[pos + 1] !== "[") {
return null;
}
let j = pos + 2;
while (j < str.length && str[j] && !/[mGKHJ]/.test(str[j]!)) {
j++;
}
if (j < str.length) {
return {
code: str.substring(pos, j + 1),
length: j + 1 - pos,
};
}
return null;
}
/**
* Track active ANSI SGR codes to preserve styling across line breaks.
*/
class AnsiCodeTracker {
// Track individual attributes separately so we can reset them specifically
private bold = false;
private dim = false;
private italic = false;
private underline = false;
private blink = false;
private inverse = false;
private hidden = false;
private strikethrough = false;
private fgColor: string | null = null; // Stores the full code like "31" or "38;5;240"
private bgColor: string | null = null; // Stores the full code like "41" or "48;5;240"
process(ansiCode: string): void {
if (!ansiCode.endsWith("m")) {
return;
}
// Extract the parameters between \x1b[ and m
const match = ansiCode.match(/\x1b\[([\d;]*)m/);
if (!match) return;
const params = match[1];
if (params === "" || params === "0") {
// Full reset
this.reset();
return;
}
// Parse parameters (can be semicolon-separated)
const parts = params.split(";");
let i = 0;
while (i < parts.length) {
const code = Number.parseInt(parts[i], 10);
// Handle 256-color and RGB codes which consume multiple parameters
if (code === 38 || code === 48) {
// 38;5;N (256 color fg) or 38;2;R;G;B (RGB fg)
// 48;5;N (256 color bg) or 48;2;R;G;B (RGB bg)
if (parts[i + 1] === "5" && parts[i + 2] !== undefined) {
// 256 color: 38;5;N or 48;5;N
const colorCode = `${parts[i]};${parts[i + 1]};${parts[i + 2]}`;
if (code === 38) {
this.fgColor = colorCode;
} else {
this.bgColor = colorCode;
}
i += 3;
continue;
} else if (parts[i + 1] === "2" && parts[i + 4] !== undefined) {
// RGB color: 38;2;R;G;B or 48;2;R;G;B
const colorCode = `${parts[i]};${parts[i + 1]};${parts[i + 2]};${parts[i + 3]};${parts[i + 4]}`;
if (code === 38) {
this.fgColor = colorCode;
} else {
this.bgColor = colorCode;
}
i += 5;
continue;
}
}
// Standard SGR codes
switch (code) {
case 0:
this.reset();
break;
case 1:
this.bold = true;
break;
case 2:
this.dim = true;
break;
case 3:
this.italic = true;
break;
case 4:
this.underline = true;
break;
case 5:
this.blink = true;
break;
case 7:
this.inverse = true;
break;
case 8:
this.hidden = true;
break;
case 9:
this.strikethrough = true;
break;
case 21:
this.bold = false;
break; // Some terminals
case 22:
this.bold = false;
this.dim = false;
break;
case 23:
this.italic = false;
break;
case 24:
this.underline = false;
break;
case 25:
this.blink = false;
break;
case 27:
this.inverse = false;
break;
case 28:
this.hidden = false;
break;
case 29:
this.strikethrough = false;
break;
case 39:
this.fgColor = null;
break; // Default fg
case 49:
this.bgColor = null;
break; // Default bg
default:
// Standard foreground colors 30-37, 90-97
if ((code >= 30 && code <= 37) || (code >= 90 && code <= 97)) {
this.fgColor = String(code);
}
// Standard background colors 40-47, 100-107
else if ((code >= 40 && code <= 47) || (code >= 100 && code <= 107)) {
this.bgColor = String(code);
}
break;
}
i++;
}
}
private reset(): void {
this.bold = false;
this.dim = false;
this.italic = false;
this.underline = false;
this.blink = false;
this.inverse = false;
this.hidden = false;
this.strikethrough = false;
this.fgColor = null;
this.bgColor = null;
}
getActiveCodes(): string {
const codes: string[] = [];
if (this.bold) codes.push("1");
if (this.dim) codes.push("2");
if (this.italic) codes.push("3");
if (this.underline) codes.push("4");
if (this.blink) codes.push("5");
if (this.inverse) codes.push("7");
if (this.hidden) codes.push("8");
if (this.strikethrough) codes.push("9");
if (this.fgColor) codes.push(this.fgColor);
if (this.bgColor) codes.push(this.bgColor);
if (codes.length === 0) return "";
return `\x1b[${codes.join(";")}m`;
}
hasActiveCodes(): boolean {
return (
this.bold ||
this.dim ||
this.italic ||
this.underline ||
this.blink ||
this.inverse ||
this.hidden ||
this.strikethrough ||
this.fgColor !== null ||
this.bgColor !== null
);
}
/**
* Get reset codes for attributes that need to be turned off at line end,
* specifically underline which bleeds into padding.
* Returns empty string if no problematic attributes are active.
*/
getLineEndReset(): string {
// Only underline causes visual bleeding into padding
// Other attributes like colors don't visually bleed to padding
if (this.underline) {
return "\x1b[24m"; // Underline off only
}
return "";
}
}
function updateTrackerFromText(text: string, tracker: AnsiCodeTracker): void {
let i = 0;
while (i < text.length) {
const ansiResult = extractAnsiCode(text, i);
if (ansiResult) {
tracker.process(ansiResult.code);
i += ansiResult.length;
} else {
i++;
}
}
}
/**
* Split text into words while keeping ANSI codes attached.
*/
function splitIntoTokensWithAnsi(text: string): string[] {
const tokens: string[] = [];
let current = "";
let pendingAnsi = ""; // ANSI codes waiting to be attached to next visible content
let inWhitespace = false;
let i = 0;
while (i < text.length) {
const ansiResult = extractAnsiCode(text, i);
if (ansiResult) {
// Hold ANSI codes separately - they'll be attached to the next visible char
pendingAnsi += ansiResult.code;
i += ansiResult.length;
continue;
}
const char = text[i];
const charIsSpace = char === " ";
if (charIsSpace !== inWhitespace && current) {
// Switching between whitespace and non-whitespace, push current token
tokens.push(current);
current = "";
}
// Attach any pending ANSI codes to this visible character
if (pendingAnsi) {
current += pendingAnsi;
pendingAnsi = "";
}
inWhitespace = charIsSpace;
current += char;
i++;
}
// Handle any remaining pending ANSI codes (attach to last token)
if (pendingAnsi) {
current += pendingAnsi;
}
if (current) {
tokens.push(current);
}
return tokens;
}
/**
* Wrap text with ANSI codes preserved.
*
* ONLY does word wrapping - NO padding, NO background colors.
* Returns lines where each line is <= width visible chars.
* Active ANSI codes are preserved across line breaks.
*
* @param text - Text to wrap (may contain ANSI codes and newlines)
* @param width - Maximum visible width per line
* @returns Array of wrapped lines (NOT padded to width)
*/
export function wrapTextWithAnsi(text: string, width: number): string[] {
if (!text) {
return [""];
}
// Handle newlines by processing each line separately
// Track ANSI state across lines so styles carry over after literal newlines
const inputLines = text.split("\n");
const result: string[] = [];
const tracker = new AnsiCodeTracker();
for (const inputLine of inputLines) {
// Prepend active ANSI codes from previous lines (except for first line)
const prefix = result.length > 0 ? tracker.getActiveCodes() : "";
result.push(...wrapSingleLine(prefix + inputLine, width));
// Update tracker with codes from this line for next iteration
updateTrackerFromText(inputLine, tracker);
}
return result.length > 0 ? result : [""];
}
function wrapSingleLine(line: string, width: number): string[] {
if (!line) {
return [""];
}
const visibleLength = visibleWidth(line);
if (visibleLength <= width) {
return [line];
}
const wrapped: string[] = [];
const tracker = new AnsiCodeTracker();
const tokens = splitIntoTokensWithAnsi(line);
let currentLine = "";
let currentVisibleLength = 0;
for (const token of tokens) {
const tokenVisibleLength = visibleWidth(token);
const isWhitespace = token.trim() === "";
// Token itself is too long - break it character by character
if (tokenVisibleLength > width && !isWhitespace) {
if (currentLine) {
// Add specific reset for underline only (preserves background)
const lineEndReset = tracker.getLineEndReset();
if (lineEndReset) {
currentLine += lineEndReset;
}
wrapped.push(currentLine);
currentLine = "";
currentVisibleLength = 0;
}
// Break long token - breakLongWord handles its own resets
const broken = breakLongWord(token, width, tracker);
wrapped.push(...broken.slice(0, -1));
currentLine = broken[broken.length - 1];
currentVisibleLength = visibleWidth(currentLine);
continue;
}
// Check if adding this token would exceed width
const totalNeeded = currentVisibleLength + tokenVisibleLength;
if (totalNeeded > width && currentVisibleLength > 0) {
// Add specific reset for underline only (preserves background)
let lineToWrap = currentLine.trimEnd();
const lineEndReset = tracker.getLineEndReset();
if (lineEndReset) {
lineToWrap += lineEndReset;
}
wrapped.push(lineToWrap);
if (isWhitespace) {
// Don't start new line with whitespace
currentLine = tracker.getActiveCodes();
currentVisibleLength = 0;
} else {
currentLine = tracker.getActiveCodes() + token;
currentVisibleLength = tokenVisibleLength;
}
} else {
// Add to current line
currentLine += token;
currentVisibleLength += tokenVisibleLength;
}
updateTrackerFromText(token, tracker);
}
if (currentLine) {
// No reset at end of final line - let caller handle it
wrapped.push(currentLine);
}
return wrapped.length > 0 ? wrapped : [""];
}
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
/**
* Get the shared grapheme segmenter instance.
*/
export function getSegmenter(): Intl.Segmenter {
return segmenter;
}
const PUNCTUATION_REGEX = /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/;
/**
* Check if a character is whitespace.
*/
export function isWhitespaceChar(char: string): boolean {
return /\s/.test(char);
}
/**
* Check if a character is punctuation.
*/
export function isPunctuationChar(char: string): boolean {
return PUNCTUATION_REGEX.test(char);
}
function breakLongWord(word: string, width: number, tracker: AnsiCodeTracker): string[] {
const lines: string[] = [];
let currentLine = tracker.getActiveCodes();
let currentWidth = 0;
// First, separate ANSI codes from visible content
// We need to handle ANSI codes specially since they're not graphemes
let i = 0;
const segments: Array<{ type: "ansi" | "grapheme"; value: string }> = [];
while (i < word.length) {
const ansiResult = extractAnsiCode(word, i);
if (ansiResult) {
segments.push({ type: "ansi", value: ansiResult.code });
i += ansiResult.length;
} else {
// Find the next ANSI code or end of string
let end = i;
while (end < word.length) {
const nextAnsi = extractAnsiCode(word, end);
if (nextAnsi) break;
end++;
}
// Segment this non-ANSI portion into graphemes
const textPortion = word.slice(i, end);
for (const seg of segmenter.segment(textPortion)) {
segments.push({ type: "grapheme", value: seg.segment });
}
i = end;
}
}
// Now process segments
for (const seg of segments) {
if (seg.type === "ansi") {
currentLine += seg.value;
tracker.process(seg.value);
continue;
}
const grapheme = seg.value;
// Skip empty graphemes to avoid issues with string-width calculation
if (!grapheme) continue;
const graphemeWidth = visibleWidth(grapheme);
if (currentWidth + graphemeWidth > width) {
// Add specific reset for underline only (preserves background)
const lineEndReset = tracker.getLineEndReset();
if (lineEndReset) {
currentLine += lineEndReset;
}
lines.push(currentLine);
currentLine = tracker.getActiveCodes();
currentWidth = 0;
}
currentLine += grapheme;
currentWidth += graphemeWidth;
}
if (currentLine) {
// No reset at end of final segment - caller handles continuation
lines.push(currentLine);
}
return lines.length > 0 ? lines : [""];
}
/**
* Apply background color to a line, padding to full width.
*
* @param line - Line of text (may contain ANSI codes)
* @param width - Total width to pad to
* @param bgFn - Background color function
* @returns Line with background applied and padded to width
*/
export function applyBackgroundToLine(line: string, width: number, bgFn: (text: string) => string): string {
// Calculate padding needed
const visibleLen = visibleWidth(line);
const paddingNeeded = Math.max(0, width - visibleLen);
const padding = " ".repeat(paddingNeeded);
// Apply background to content + padding
const withPadding = line + padding;
return bgFn(withPadding);
}
/**
* Truncate text to fit within a maximum visible width, adding ellipsis if needed.
* Properly handles ANSI escape codes (they don't count toward width).
*
* @param text - Text to truncate (may contain ANSI codes)
* @param maxWidth - Maximum visible width
* @param ellipsis - Ellipsis string to append when truncating (default: "...")
* @returns Truncated text with ellipsis if it exceeded maxWidth
*/
export function truncateToWidth(text: string, maxWidth: number, ellipsis: string = "..."): string {
const textVisibleWidth = visibleWidth(text);
if (textVisibleWidth <= maxWidth) {
return text;
}
const ellipsisWidth = visibleWidth(ellipsis);
const targetWidth = maxWidth - ellipsisWidth;
if (targetWidth <= 0) {
return ellipsis.substring(0, maxWidth);
}
// Separate ANSI codes from visible content using grapheme segmentation
let i = 0;
const segments: Array<{ type: "ansi" | "grapheme"; value: string }> = [];
while (i < text.length) {
const ansiResult = extractAnsiCode(text, i);
if (ansiResult) {
segments.push({ type: "ansi", value: ansiResult.code });
i += ansiResult.length;
} else {
// Find the next ANSI code or end of string
let end = i;
while (end < text.length) {
const nextAnsi = extractAnsiCode(text, end);
if (nextAnsi) break;
end++;
}
// Segment this non-ANSI portion into graphemes
const textPortion = text.slice(i, end);
for (const seg of segmenter.segment(textPortion)) {
segments.push({ type: "grapheme", value: seg.segment });
}
i = end;
}
}
// Build truncated string from segments
let result = "";
let currentWidth = 0;
for (const seg of segments) {
if (seg.type === "ansi") {
result += seg.value;
continue;
}
const grapheme = seg.value;
// Skip empty graphemes to avoid issues with string-width calculation
if (!grapheme) continue;
const graphemeWidth = visibleWidth(grapheme);
if (currentWidth + graphemeWidth > targetWidth) {
break;
}
result += grapheme;
currentWidth += graphemeWidth;
}
// Add reset code before ellipsis to prevent styling leaking into it
return `${result}\x1b[0m${ellipsis}`;
}