diff --git a/.claude/commands/veet-generate.md b/.claude/commands/veet-generate.md
index 70e2059..f746403 100644
--- a/.claude/commands/veet-generate.md
+++ b/.claude/commands/veet-generate.md
@@ -72,7 +72,7 @@ def {function_name}({params}: {types}) -> {return_type}:
 
 ### Step 5: Write tests.py (CONSISTENT FORMAT)
 
-Follow this EXACT format for all tests:
+Follow this EXACT format for all tests. **CRITICAL: Use single-line assertions so the TUI can show inputs!**
 
 ```python
 """Tests for {problem_name}."""
@@ -85,15 +85,16 @@ class TestBasicCases:
 
     def test_example_one(self):
         """Test first example from problem description."""
-        assert {function_name}(...) == ...
+        # ALWAYS use single-line assert with function call inline
+        assert {function_name}(input1, input2) == expected_output
 
     def test_example_two(self):
         """Test second example from problem description."""
-        assert {function_name}(...) == ...
+        assert {function_name}(input1, input2) == expected_output
 
     def test_typical_case(self):
         """Test another common case."""
-        assert {function_name}(...) == ...
+        assert {function_name}(input1, input2) == expected_output
 
 
 class TestEdgeCases:
@@ -101,17 +102,23 @@ class TestEdgeCases:
 
     def test_empty_input(self):
         """Test with empty or minimal input."""
-        assert {function_name}(...) == ...
+        assert {function_name}([]) == []  # or appropriate empty case
 
     def test_single_element(self):
         """Test with single element input."""
-        assert {function_name}(...) == ...
+        assert {function_name}([1]) == expected
 
     def test_boundary_values(self):
         """Test boundary conditions."""
-        assert {function_name}(...) == ...
+        assert {function_name}(boundary_input) == expected
 
 
+# IMPORTANT TEST FORMAT RULES:
+# 1. ALWAYS use single-line assertions: assert func(args) == expected
+# 2. NEVER use: result = func(); assert result == expected (hides inputs in TUI)
+# 3. If output order doesn't matter, wrap in sorted(): assert sorted(func(...)) == sorted([...])
+# 4. Keep assertions simple - avoid 'or' conditions, use separate tests instead
+#
 # Test count by difficulty:
 # Easy: 4-5 tests (2 basic, 2-3 edge)
 # Medium: 6-8 tests (3 basic, 3-5 edge)
diff --git a/problems/easy/palindrome-checker/tests.py b/problems/easy/palindrome-checker/tests.py
index 63bda68..62b5449 100644
--- a/problems/easy/palindrome-checker/tests.py
+++ b/problems/easy/palindrome-checker/tests.py
@@ -6,39 +6,31 @@ from solution import is_palindrome
 class TestBasicCases:
     """Test basic functionality with typical inputs."""
 
+    def test_example_one(self):
+        """Test first example from problem description."""
+        assert is_palindrome("A man, a plan, a canal: Panama") == True
+
+    def test_example_two(self):
+        """Test second example from problem description."""
+        assert is_palindrome("race a car") == False
+
     def test_simple_palindrome(self):
         """Test basic palindrome word."""
         assert is_palindrome("racecar") == True
 
-    def test_sentence_palindrome(self):
-        """Test palindrome with spaces and punctuation."""
-        assert is_palindrome("A man, a plan, a canal: Panama") == True
-
-    def test_not_palindrome(self):
-        """Test non-palindrome string."""
-        assert is_palindrome("hello") == False
-
 
 class TestEdgeCases:
     """Test edge cases and boundary conditions."""
 
-    def test_empty_string(self):
-        """Test with empty input."""
+    def test_empty_input(self):
+        """Test with empty or minimal input."""
         assert is_palindrome("") == True
 
-    def test_single_character(self):
-        """Test with single character."""
+    def test_single_element(self):
+        """Test with single element input."""
         assert is_palindrome("a") == True
 
     def test_only_spaces(self):
         """Test with only whitespace."""
         assert is_palindrome("   ") == True
 
-    def test_mixed_case(self):
-        """Test case insensitivity."""
-        assert is_palindrome("RaceCar") == True
-
-    def test_numbers_in_string(self):
-        """Test with numbers."""
-        assert is_palindrome("12321") == True
-
diff --git a/problems/hard/lru-cache/tests.py b/problems/hard/lru-cache/tests.py
index c3655a3..01abae3 100644
--- a/problems/hard/lru-cache/tests.py
+++ b/problems/hard/lru-cache/tests.py
@@ -6,6 +6,24 @@ from solution import LRUCache
 class TestBasicCases:
     """Test basic functionality with typical inputs."""
 
+    def test_example_one(self):
+        """Test first example from problem description."""
+        cache = LRUCache(2)
+        cache.put("a", 1)
+        cache.put("b", 2)
+        assert cache.get("a") == 1
+        cache.put("c", 3)
+        assert cache.get("b") == -1
+        assert cache.get("c") == 3
+
+    def test_example_two(self):
+        """Test second example from problem description."""
+        cache = LRUCache(1)
+        cache.put("x", 10)
+        cache.put("y", 20)
+        assert cache.get("x") == -1
+        assert cache.get("y") == 20
+
     def test_basic_put_get(self):
         """Test basic put and get operations."""
         cache = LRUCache(2)
@@ -14,24 +32,6 @@ class TestBasicCases:
         assert cache.get("a") == 1
         assert cache.get("b") == 2
 
-    def test_eviction_lru(self):
-        """Test that least recently used is evicted."""
-        cache = LRUCache(2)
-        cache.put("a", 1)
-        cache.put("b", 2)
-        cache.get("a")        # a is now most recent
-        cache.put("c", 3)     # b should be evicted
-        assert cache.get("b") == -1
-        assert cache.get("a") == 1
-        assert cache.get("c") == 3
-
-    def test_update_existing_key(self):
-        """Test updating an existing key."""
-        cache = LRUCache(2)
-        cache.put("a", 1)
-        cache.put("a", 10)
-        assert cache.get("a") == 10
-
 
 class TestEdgeCases:
     """Test edge cases and boundary conditions."""
diff --git a/problems/hard/rate-limiter/tests.py b/problems/hard/rate-limiter/tests.py
index 2331e9e..ca39ac9 100644
--- a/problems/hard/rate-limiter/tests.py
+++ b/problems/hard/rate-limiter/tests.py
@@ -6,19 +6,22 @@ from solution import RateLimiter
 class TestBasicCases:
     """Test basic functionality with typical inputs."""
 
-    def test_allow_within_limit(self):
-        """Test requests within the limit are allowed."""
+    def test_example_one(self):
+        """Test first example from problem description."""
         limiter = RateLimiter(max_requests=3, window_seconds=60)
         assert limiter.allow_request("user1", 0) == True
         assert limiter.allow_request("user1", 30) == True
         assert limiter.allow_request("user1", 45) == True
+        assert limiter.allow_request("user1", 50) == False
+        assert limiter.allow_request("user1", 61) == True
 
-    def test_block_over_limit(self):
-        """Test requests over limit are blocked."""
-        limiter = RateLimiter(max_requests=2, window_seconds=60)
+    def test_example_two(self):
+        """Test second example from problem description."""
+        limiter = RateLimiter(max_requests=2, window_seconds=10)
         assert limiter.allow_request("user1", 0) == True
-        assert limiter.allow_request("user1", 30) == True
-        assert limiter.allow_request("user1", 45) == False
+        assert limiter.allow_request("user2", 0) == True
+        assert limiter.allow_request("user1", 5) == True
+        assert limiter.allow_request("user1", 8) == False
 
     def test_multiple_users_independent(self):
         """Test each user has independent limits."""
diff --git a/problems/medium/group-transactions/tests.py b/problems/medium/group-transactions/tests.py
index 32d8cbd..ba83dde 100644
--- a/problems/medium/group-transactions/tests.py
+++ b/problems/medium/group-transactions/tests.py
@@ -43,8 +43,7 @@ class TestEdgeCases:
             {"amount": 10, "category": "Food", "date": "2024-01-01"},
             {"amount": 20, "category": "food", "date": "2024-01-02"}
         ]
-        result = group_transactions(txns)
-        assert result == {"Food": 10, "food": 20}
+        assert group_transactions(txns) == {"Food": 10, "food": 20}
 
     def test_many_categories(self):
         """Test with many different categories."""
diff --git a/problems/medium/word-frequency/tests.py b/problems/medium/word-frequency/tests.py
index eebf55c..dca7427 100644
--- a/problems/medium/word-frequency/tests.py
+++ b/problems/medium/word-frequency/tests.py
@@ -6,47 +6,39 @@ from solution import top_words
 class TestBasicCases:
     """Test basic functionality with typical inputs."""
 
-    def test_basic_frequency(self):
-        """Test basic word counting."""
-        result = top_words("the quick brown fox jumps over the lazy dog the fox", 2)
-        assert result == [("the", 3), ("fox", 2)]
+    def test_example_one(self):
+        """Test first example from problem description."""
+        assert top_words("the quick brown fox jumps over the lazy dog the fox", 2) == [("the", 3), ("fox", 2)]
 
-    def test_all_unique(self):
-        """Test when all words are unique."""
-        result = top_words("one two three", 2)
-        assert result == [("one", 1), ("three", 1)] or result == [("one", 1), ("two", 1)]
+    def test_example_two(self):
+        """Test second example from problem description."""
+        assert top_words("hello world hello", 5) == [("hello", 2), ("world", 1)]
 
     def test_single_word_repeated(self):
         """Test with one word repeated."""
-        result = top_words("hello hello hello", 1)
-        assert result == [("hello", 3)]
+        assert top_words("hello hello hello", 1) == [("hello", 3)]
 
 
 class TestEdgeCases:
     """Test edge cases and boundary conditions."""
 
-    def test_empty_string(self):
-        """Test with empty input."""
-        result = top_words("", 5)
-        assert result == []
+    def test_empty_input(self):
+        """Test with empty or minimal input."""
+        assert top_words("", 5) == []
 
     def test_n_greater_than_unique_words(self):
         """Test when n exceeds unique word count."""
-        result = top_words("hello world", 10)
-        assert len(result) == 2
+        assert len(top_words("hello world", 10)) == 2
 
     def test_case_insensitive(self):
         """Test that counting is case-insensitive."""
-        result = top_words("Hello HELLO hello", 1)
-        assert result == [("hello", 3)]
+        assert top_words("Hello HELLO hello", 1) == [("hello", 3)]
 
     def test_punctuation_ignored(self):
         """Test that punctuation is stripped."""
-        result = top_words("hello, world! hello.", 1)
-        assert result == [("hello", 2)]
+        assert top_words("hello, world! hello.", 1) == [("hello", 2)]
 
     def test_alphabetical_tiebreaker(self):
         """Test alphabetical ordering for same frequency."""
-        result = top_words("cat bat ant", 3)
-        assert result == [("ant", 1), ("bat", 1), ("cat", 1)]
+        assert top_words("cat bat ant", 3) == [("ant", 1), ("bat", 1), ("cat", 1)]
 
diff --git a/veetcode/app.py b/veetcode/app.py
index d8f0334..976abab 100644
--- a/veetcode/app.py
+++ b/veetcode/app.py
@@ -40,6 +40,9 @@ class TestCase:
     name: str
     passed: bool
     error: str = ""
+    input_line: str = ""  # The function call with inputs
+    expected: str = ""
+    actual: str = ""
 
 
 @dataclass
@@ -131,15 +134,15 @@ def analyze_solution(solution_file: Path) -> SolutionStats:
 
 
 def parse_pytest_output(output: str) -> tuple[list[TestCase], float]:
-    """Parse pytest output to extract test results."""
+    """Parse pytest output to extract test results with detailed failure info."""
     test_cases: list[TestCase] = []
     total_time = 0.0
 
     # Strip ANSI codes for reliable parsing
     clean = strip_ansi(output)
 
-    # Match: tests.py::test_name PASSED or FAILED
-    for match in re.finditer(r"tests\.py::(\w+)\s+(PASSED|FAILED)", clean):
+    # Match: tests.py::ClassName::test_name or tests.py::test_name PASSED/FAILED
+    for match in re.finditer(r"tests\.py::(?:\w+::)?(\w+)\s+(PASSED|FAILED)", clean):
         test_cases.append(TestCase(
             name=match.group(1),
             passed=match.group(2) == "PASSED"
@@ -150,17 +153,73 @@ def parse_pytest_output(output: str) -> tuple[list[TestCase], float]:
     if time_match:
         total_time = float(time_match.group(1)) * 1000
 
-    # Extract errors for failed tests
-    for match in re.finditer(r"FAILED tests\.py::(\w+)\s*-\s*(\w+:.*?)(?=\n|$)", clean):
-        test_name, error = match.group(1), match.group(2).strip()[:100]
+    # Parse failure blocks for detailed info
+    # Split by test failure headers like "_______ TestClass.test_name _______"
+    failure_blocks = re.split(r"_{10,}\s+[\w.]+\s+_{10,}", clean)
+    
+    for block in failure_blocks[1:]:  # Skip the part before first failure
+        # Extract test name from the block
+        name_match = re.search(r"in\s+(\w+)\n", block)
+        if not name_match:
+            continue
+        test_name = name_match.group(1)
+        
+        # Find the input/function call from the test
+        # Look for either: "assert func(...) == ..." OR "result = func(...)" pattern
+        input_line = ""
+        lines_list = block.split('\n')
+        for i, line in enumerate(lines_list):
+            stripped = line.strip()
+            # Skip E lines (pytest error output)
+            if stripped.startswith('E'):
+                continue
+            # Pattern 1: Single-line assertion with function call
+            # e.g., "assert two_sum([2,7], 9) == [0,1]"
+            if stripped.startswith('assert') and '(' in stripped and '==' in stripped:
+                # Extract the function call part (left side of ==)
+                match = re.match(r'assert\s+(.+?)\s*==', stripped)
+                if match:
+                    call = match.group(1).strip()
+                    # Unwrap sorted/list/set wrappers to show actual call
+                    inner = re.search(r'(?:sorted|list|set|tuple)\((.+)\)$', call)
+                    input_line = inner.group(1) if inner else call
+                    break
+            # Pattern 2: Two-line format - "result = func(...)" followed by "assert result"
+            if '=' in stripped and not stripped.startswith('assert') and '(' in stripped:
+                # This looks like "result = func(...)"
+                match = re.search(r'=\s*(.+\(.*\))', stripped)
+                if match:
+                    input_line = match.group(1).strip()
+                    break
+        
+        # Extract actual and expected from AssertionError/assert lines
+        actual = ""
+        expected = ""
+        
+        # Find lines with assert comparisons (either "AssertionError: assert X == Y" or "E   assert X == Y")
+        for line in block.split('\n'):
+            if 'assert' in line and '==' in line and line.strip().startswith('E'):
+                # Split on == to get actual and expected
+                parts = line.split('==', 1)
+                # actual is after "assert " and before ==
+                actual_match = re.search(r'assert\s+(.+)$', parts[0])
+                if actual_match:
+                    actual = actual_match.group(1).strip()
+                if len(parts) > 1:
+                    expected = parts[1].strip()
+                break
+        
+        # Update the matching test case
         for tc in test_cases:
             if tc.name == test_name and not tc.passed:
-                tc.error = error
+                tc.input_line = input_line[:200]  # Truncate if too long
+                tc.expected = expected[:100]
+                tc.actual = actual[:100]
+                tc.error = f"expected {expected}, got {actual}" if expected else ""
                 break
 
     # Fallback: parse summary line for counts if no individual tests found
     if not test_cases:
-        # Match "1 passed" or "2 failed" etc
         passed_match = re.search(r"(\d+)\s+passed", clean)
         failed_match = re.search(r"(\d+)\s+failed", clean)
         if passed_match or failed_match:
@@ -376,26 +435,44 @@ class WatchScreen(Screen):
         # Status line
         time_str = f"{result.execution_time_ms:.0f}ms"
         if result.passed:
-            status = f"✓ {result.total} passed ({time_str})"
+            status = f"[green]✓ {result.total} passed[/green] ({time_str})"
             self.solved.add(self.problem.name)
             save_solved(self.solved_file, self.solved)
         else:
-            status = f"✗ {result.failed_count}/{result.total} failed ({time_str})"
+            status = f"[red]✗ {result.failed_count}/{result.total} failed[/red] ({time_str})"
         self.query_one("#summary-status", Static).update(status)
 
-        # Test list
+        # Test list with clean separation and input display
         lines = []
-        for i, tc in enumerate(result.test_cases, 1):
-            mark = "✓" if tc.passed else "✗"
-            lines.append(f"  {mark} {tc.name}")
-            if tc.error:
-                lines.append(f"    {tc.error[:80]}")
+        for tc in result.test_cases:
+            if tc.passed:
+                lines.append(f"[green]✓[/green] {tc.name}")
+            else:
+                lines.append(f"[red]✗[/red] [bold]{tc.name}[/bold]")
+                # Show input/expected/actual for failed tests
+                if tc.input_line:
+                    lines.append(f"  [dim]Input:[/dim]    {tc.input_line}")
+                if tc.expected:
+                    lines.append(f"  [dim]Expected:[/dim] [green]{tc.expected}[/green]")
+                if tc.actual:
+                    lines.append(f"  [dim]Got:[/dim]      [red]{tc.actual}[/red]")
+                lines.append("")  # Blank line for separation
+        
+        # Remove trailing blank line
+        while lines and lines[-1] == "":
+            lines.pop()
+        
         self.query_one("#test-summary", Static).update("\n".join(lines) or "No tests")
 
-        # Verbose output
+        # Verbose output with clean formatting
         out = self.query_one("#output", RichLog)
         out.clear()
-        out.write(result.output or "No output")
+        if result.output:
+            # Add visual separators for readability
+            out.write("[dim]─" * 60 + "[/dim]")
+            out.write(result.output)
+        else:
+            out.write("No output")
 
     def action_back(self) -> None:
         self.stop_watcher()
diff --git a/veetcode/veetcode.tcss b/veetcode/veetcode.tcss
index ab2fb04..e7561e6 100644
--- a/veetcode/veetcode.tcss
+++ b/veetcode/veetcode.tcss
@@ -62,15 +62,15 @@ OptionList > .option-list--option-highlighted {
 /* Info Row */
 #info-row {
     width: 100%;
-    height: auto;
-    min-height: 6;
-    max-height: 14;
+    height: 40%;
+    min-height: 8;
+    max-height: 20;
     margin: 0 0 1 0;
 }
 
 /* Summary Pane - Left */
 #summary-pane {
-    width: 2fr;
+    width: 3fr;
     height: 100%;
     border: solid $primary-muted;
     padding: 0;
@@ -89,9 +89,9 @@ OptionList > .option-list--option-highlighted {
 
 #summary-scroll {
     width: 100%;
-    height: auto;
-    max-height: 8;
+    height: 1fr;
     padding: 0 1;
+    overflow-y: auto;
 }
 
 #test-summary {
@@ -101,7 +101,9 @@ OptionList > .option-list--option-highlighted {
 
 /* Stats Pane - Right */
 #stats-pane {
-    width: 1fr;
+    width: auto;
+    min-width: 30;
+    max-width: 35;
     height: 100%;
     border: solid $primary-muted;
     padding: 0;