Fix characters (#372)

* Fix cat command

* Fix text rendering crash from undefined code points in bash output

* Revert unintentional model parameter changes from fix cat command commit
This commit is contained in:
Mr. Rc 2026-01-01 06:46:29 +05:30 committed by GitHub
parent 46bb5dcde8
commit bbf23bd5f1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 40 additions and 9 deletions

View file

@ -13,6 +13,7 @@ import {
import stripAnsi from "strip-ansi";
import type { CustomTool } from "../../../core/custom-tools/types.js";
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize } from "../../../core/tools/truncate.js";
import { sanitizeBinaryOutput } from "../../../utils/shell.js";
import { getLanguageFromPath, highlightCode, theme } from "../theme/theme.js";
import { renderDiff } from "./diff.js";
import { truncateToVisualLines } from "./visual-truncate.js";
@ -295,10 +296,8 @@ export class ToolExecutionComponent extends Container {
let output = textBlocks
.map((c: any) => {
let text = stripAnsi(c.text || "").replace(/\r/g, "");
text = text.replace(/\x1b./g, "");
text = text.replace(/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]/g, "");
return text;
// Use sanitizeBinaryOutput to handle binary data that crashes string-width
return sanitizeBinaryOutput(stripAnsi(c.text || "")).replace(/\r/g, "");
})
.join("\n");

View file

@ -100,13 +100,38 @@ export function getShellConfig(): { shell: string; args: string[] } {
* - Control characters (except tab, newline, carriage return)
* - Lone surrogates
* - Unicode Format characters (crash string-width due to a bug)
* - Characters with undefined code points
*/
export function sanitizeBinaryOutput(str: string): string {
// Fast path: use regex to remove problematic characters
// - \p{Format}: Unicode format chars like \u0601 that crash string-width
// - \p{Surrogate}: Lone surrogates from invalid UTF-8
// - Control chars except \t \n \r
return str.replace(/[\p{Format}\p{Surrogate}]/gu, "").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
// Use Array.from to properly iterate over code points (not code units)
// This handles surrogate pairs correctly and catches edge cases where
// codePointAt() might return undefined
return Array.from(str)
.filter((char) => {
// Filter out characters that cause string-width to crash
// This includes:
// - Unicode format characters
// - Lone surrogates (already filtered by Array.from)
// - Control chars except \t \n \r
// - Characters with undefined code points
const code = char.codePointAt(0);
// Skip if code point is undefined (edge case with invalid strings)
if (code === undefined) return false;
// Allow tab, newline, carriage return
if (code === 0x09 || code === 0x0a || code === 0x0d) return true;
// Filter out control characters (0x00-0x1F, except 0x09, 0x0a, 0x0x0d)
if (code <= 0x1f) return false;
// Filter out Unicode format characters
if (code >= 0xfff9 && code <= 0xfffb) return false;
return true;
})
.join("");
}
/**

View file

@ -4,6 +4,7 @@ import stringWidth from "string-width";
* Calculate the visible width of a string in terminal columns.
*/
export function visibleWidth(str: string): number {
if (!str) return 0;
const normalized = str.replace(/\t/g, " ");
return stringWidth(normalized);
}
@ -472,6 +473,9 @@ function breakLongWord(word: string, width: number, tracker: AnsiCodeTracker): s
}
const grapheme = seg.value;
// Skip empty graphemes to avoid issues with string-width calculation
if (!grapheme) continue;
const graphemeWidth = visibleWidth(grapheme);
if (currentWidth + graphemeWidth > width) {
@ -576,6 +580,9 @@ export function truncateToWidth(text: string, maxWidth: number, ellipsis: string
}
const grapheme = seg.value;
// Skip empty graphemes to avoid issues with string-width calculation
if (!grapheme) continue;
const graphemeWidth = visibleWidth(grapheme);
if (currentWidth + graphemeWidth > targetWidth) {