Merge pull request #924 from Perlence/fix/wrap-whitespace

fix(tui): rewrite word wrap to not strip whitespace
2026-04-15 13:03:42 +00:00 · 2026-01-24 03:02:48 +01:00 · 2026-01-24 03:02:48 +01:00 · cd8ce452c7
commit cd8ce452c7
parent 25bdfb0b8e 9090268b7d
2 changed files with 127 additions and 137 deletions
--- a/packages/tui/src/components/editor.ts
+++ b/packages/tui/src/components/editor.ts
@ -11,7 +11,7 @@ const segmenter = getSegmenter();
 * Represents a chunk of text for word-wrap layout.
 * Tracks both the text content and its position in the original line.
 */
-interface TextChunk {
+export interface TextChunk {
 	text: string;
 	startIndex: number;
 	endIndex: number;
@ -26,7 +26,7 @@ interface TextChunk {
 * @param maxWidth - Maximum visible width per chunk
 * @returns Array of chunks with text and position information
 */
-function wordWrapLine(line: string, maxWidth: number): TextChunk[] {
+export function wordWrapLine(line: string, maxWidth: number): TextChunk[] {
 	if (!line || maxWidth <= 0) {
 		return [{ text: "", startIndex: 0, endIndex: 0 }];
 	}
@ -37,154 +37,56 @@ function wordWrapLine(line: string, maxWidth: number): TextChunk[] {
 	}

 	const chunks: TextChunk[] = [];
+	const segments = [...segmenter.segment(line)];

-	// Split into tokens (words and whitespace runs)
-	const tokens: { text: string; startIndex: number; endIndex: number; isWhitespace: boolean }[] = [];
-	let currentToken = "";
-	let tokenStart = 0;
-	let inWhitespace = false;
-	let charIndex = 0;
-
-	for (const seg of segmenter.segment(line)) {
-		const grapheme = seg.segment;
-		const graphemeIsWhitespace = isWhitespaceChar(grapheme);
-
-		if (currentToken === "") {
-			inWhitespace = graphemeIsWhitespace;
-			tokenStart = charIndex;
-		} else if (graphemeIsWhitespace !== inWhitespace) {
-			// Token type changed - save current token
-			tokens.push({
-				text: currentToken,
-				startIndex: tokenStart,
-				endIndex: charIndex,
-				isWhitespace: inWhitespace,
-			});
-			currentToken = "";
-			tokenStart = charIndex;
-			inWhitespace = graphemeIsWhitespace;
-		}
-
-		currentToken += grapheme;
-		charIndex += grapheme.length;
-	}
-
-	// Push final token
-	if (currentToken) {
-		tokens.push({
-			text: currentToken,
-			startIndex: tokenStart,
-			endIndex: charIndex,
-			isWhitespace: inWhitespace,
-		});
-	}
-
-	// Build chunks using word wrapping
-	let currentChunk = "";
 	let currentWidth = 0;
-	let chunkStartIndex = 0;
-	let atLineStart = true; // Track if we're at the start of a line (for skipping whitespace)
+	let chunkStart = 0;

-	for (const token of tokens) {
-		const tokenWidth = visibleWidth(token.text);
+	// Wrap opportunity: the position after the last whitespace before a non-whitespace
+	// grapheme, i.e. where a line break is allowed.
+	let wrapOppIndex = -1;
+	let wrapOppWidth = 0;

-		// Skip leading whitespace at line start
-		if (atLineStart && token.isWhitespace) {
-			chunkStartIndex = token.endIndex;
-			continue;
-		}
-		atLineStart = false;
+	for (let i = 0; i < segments.length; i++) {
+		const seg = segments[i]!;
+		const grapheme = seg.segment;
+		const gWidth = visibleWidth(grapheme);
+		const charIndex = seg.index;
+		const isWs = isWhitespaceChar(grapheme);

-		// If this single token is wider than maxWidth, we need to break it
-		if (tokenWidth > maxWidth) {
-			// First, push any accumulated chunk
-			if (currentChunk) {
-				chunks.push({
-					text: currentChunk,
-					startIndex: chunkStartIndex,
-					endIndex: token.startIndex,
-				});
-				currentChunk = "";
+		// Overflow check before advancing.
+		if (currentWidth + gWidth > maxWidth) {
+			if (wrapOppIndex >= 0) {
+				// Backtrack to last wrap opportunity.
+				chunks.push({ text: line.slice(chunkStart, wrapOppIndex), startIndex: chunkStart, endIndex: wrapOppIndex });
+				chunkStart = wrapOppIndex;
+				currentWidth -= wrapOppWidth;
+			} else if (chunkStart < charIndex) {
+				// No wrap opportunity: force-break at current position.
+				chunks.push({ text: line.slice(chunkStart, charIndex), startIndex: chunkStart, endIndex: charIndex });
+				chunkStart = charIndex;
 				currentWidth = 0;
-				chunkStartIndex = token.startIndex;
 			}
-
-			// Break the long token by grapheme
-			let tokenChunk = "";
-			let tokenChunkWidth = 0;
-			let tokenChunkStart = token.startIndex;
-			let tokenCharIndex = token.startIndex;
-
-			for (const seg of segmenter.segment(token.text)) {
-				const grapheme = seg.segment;
-				const graphemeWidth = visibleWidth(grapheme);
-
-				if (tokenChunkWidth + graphemeWidth > maxWidth && tokenChunk) {
-					chunks.push({
-						text: tokenChunk,
-						startIndex: tokenChunkStart,
-						endIndex: tokenCharIndex,
-					});
-					tokenChunk = grapheme;
-					tokenChunkWidth = graphemeWidth;
-					tokenChunkStart = tokenCharIndex;
-				} else {
-					tokenChunk += grapheme;
-					tokenChunkWidth += graphemeWidth;
-				}
-				tokenCharIndex += grapheme.length;
-			}
-
-			// Keep remainder as start of next chunk
-			if (tokenChunk) {
-				currentChunk = tokenChunk;
-				currentWidth = tokenChunkWidth;
-				chunkStartIndex = tokenChunkStart;
-			}
-			continue;
+			wrapOppIndex = -1;
 		}

-		// Check if adding this token would exceed width
-		if (currentWidth + tokenWidth > maxWidth) {
-			// Push current chunk (trimming trailing whitespace for display)
-			const trimmedChunk = currentChunk.trimEnd();
-			if (trimmedChunk || chunks.length === 0) {
-				chunks.push({
-					text: trimmedChunk,
-					startIndex: chunkStartIndex,
-					endIndex: chunkStartIndex + currentChunk.length,
-				});
-			}
+		// Advance.
+		currentWidth += gWidth;

-			// Start new line - skip leading whitespace
-			atLineStart = true;
-			if (token.isWhitespace) {
-				currentChunk = "";
-				currentWidth = 0;
-				chunkStartIndex = token.endIndex;
-			} else {
-				currentChunk = token.text;
-				currentWidth = tokenWidth;
-				chunkStartIndex = token.startIndex;
-				atLineStart = false;
-			}
-		} else {
-			// Add token to current chunk
-			currentChunk += token.text;
-			currentWidth += tokenWidth;
+		// Record wrap opportunity: whitespace followed by non-whitespace.
+		// Multiple spaces join (no break between them); the break point is
+		// after the last space before the next word.
+		const next = segments[i + 1];
+		if (isWs && next && !isWhitespaceChar(next.segment)) {
+			wrapOppIndex = next.index;
+			wrapOppWidth = currentWidth;
 		}
 	}

-	// Push final chunk
-	if (currentChunk) {
-		chunks.push({
-			text: currentChunk,
-			startIndex: chunkStartIndex,
-			endIndex: line.length,
-		});
-	}
+	// Push final chunk.
+	chunks.push({ text: line.slice(chunkStart), startIndex: chunkStart, endIndex: line.length });

-	return chunks.length > 0 ? chunks : [{ text: "", startIndex: 0, endIndex: 0 }];
+	return chunks;
 }

 // Kitty CSI-u sequences for printable keys, including optional shifted/base codepoints.
--- a/packages/tui/test/editor.test.ts
+++ b/packages/tui/test/editor.test.ts
@ -2,7 +2,7 @@ import assert from "node:assert";
 import { describe, it } from "node:test";
 import { stripVTControlCharacters } from "node:util";
 import type { AutocompleteProvider } from "../src/autocomplete.js";
-import { Editor } from "../src/components/editor.js";
+import { Editor, wordWrapLine } from "../src/components/editor.js";
 import { TUI } from "../src/tui.js";
 import { visibleWidth } from "../src/utils.js";
 import { defaultEditorTheme } from "./test-themes.js";
@ -698,6 +698,94 @@ describe("Editor component", () => {
 			const contentLine = stripVTControlCharacters(lines[1]!);
 			assert.ok(contentLine.includes("1234567890"), "Content should contain the word");
 		});
+
+		it("wraps word to next line when it ends exactly at terminal width", () => {
+			// "hello " (6) + "world" (5) = 11, but "world" is non-whitespace ending at width.
+			// Thus, wrap it to next line. The trailing space stays with "hello" on line 1
+			const chunks = wordWrapLine("hello world test", 11);
+
+			assert.strictEqual(chunks.length, 2);
+			assert.strictEqual(chunks[0]!.text, "hello ");
+			assert.strictEqual(chunks[1]!.text, "world test");
+		});
+
+		it("keeps whitespace at terminal width boundary on same line", () => {
+			// "hello world " is exactly 12 chars (including trailing space)
+			// The space at position 12 should stay on the first line
+			const chunks = wordWrapLine("hello world test", 12);
+
+			assert.strictEqual(chunks.length, 2);
+			assert.strictEqual(chunks[0]!.text, "hello world ");
+			assert.strictEqual(chunks[1]!.text, "test");
+		});
+
+		it("handles unbreakable word filling width exactly followed by space", () => {
+			const chunks = wordWrapLine("aaaaaaaaaaaa aaaa", 12);
+
+			assert.strictEqual(chunks.length, 2);
+			assert.strictEqual(chunks[0]!.text, "aaaaaaaaaaaa");
+			assert.strictEqual(chunks[1]!.text, " aaaa");
+		});
+
+		it("wraps word to next line when it fits width but not remaining space", () => {
+			const chunks = wordWrapLine("      aaaaaaaaaaaa", 12);
+
+			assert.strictEqual(chunks.length, 2);
+			assert.strictEqual(chunks[0]!.text, "      ");
+			assert.strictEqual(chunks[1]!.text, "aaaaaaaaaaaa");
+		});
+
+		it("keeps word with multi-space and following word together when they fit", () => {
+			const chunks = wordWrapLine("Lorem ipsum dolor sit amet,    consectetur", 30);
+
+			assert.strictEqual(chunks.length, 2);
+			assert.strictEqual(chunks[0]!.text, "Lorem ipsum dolor sit ");
+			assert.strictEqual(chunks[1]!.text, "amet,    consectetur");
+		});
+
+		it("keeps word with multi-space and following word when they fill width exactly", () => {
+			const chunks = wordWrapLine("Lorem ipsum dolor sit amet,              consectetur", 30);
+
+			assert.strictEqual(chunks.length, 2);
+			assert.strictEqual(chunks[0]!.text, "Lorem ipsum dolor sit ");
+			assert.strictEqual(chunks[1]!.text, "amet,              consectetur");
+		});
+
+		it("splits when word plus multi-space plus word exceeds width", () => {
+			const chunks = wordWrapLine("Lorem ipsum dolor sit amet,               consectetur", 30);
+
+			assert.strictEqual(chunks.length, 3);
+			assert.strictEqual(chunks[0]!.text, "Lorem ipsum dolor sit ");
+			assert.strictEqual(chunks[1]!.text, "amet,               ");
+			assert.strictEqual(chunks[2]!.text, "consectetur");
+		});
+
+		it("breaks long whitespace at line boundary", () => {
+			const chunks = wordWrapLine("Lorem ipsum dolor sit amet,                         consectetur", 30);
+
+			assert.strictEqual(chunks.length, 3);
+			assert.strictEqual(chunks[0]!.text, "Lorem ipsum dolor sit ");
+			assert.strictEqual(chunks[1]!.text, "amet,                         ");
+			assert.strictEqual(chunks[2]!.text, "consectetur");
+		});
+
+		it("breaks long whitespace at line boundary 2", () => {
+			const chunks = wordWrapLine("Lorem ipsum dolor sit amet,                          consectetur", 30);
+
+			assert.strictEqual(chunks.length, 3);
+			assert.strictEqual(chunks[0]!.text, "Lorem ipsum dolor sit ");
+			assert.strictEqual(chunks[1]!.text, "amet,                         ");
+			assert.strictEqual(chunks[2]!.text, " consectetur");
+		});
+
+		it("breaks whitespace spanning full lines", () => {
+			const chunks = wordWrapLine("Lorem ipsum dolor sit amet,                                     consectetur", 30);
+
+			assert.strictEqual(chunks.length, 3);
+			assert.strictEqual(chunks[0]!.text, "Lorem ipsum dolor sit ");
+			assert.strictEqual(chunks[1]!.text, "amet,                         ");
+			assert.strictEqual(chunks[2]!.text, "            consectetur");
+		});
 	});

 	describe("Kill ring", () => {