This commit is contained in:
Harivansh Rathi 2025-12-14 17:15:37 -05:00
parent 76062021e9
commit c548626d7f
13 changed files with 570 additions and 1 deletions

View file

@ -0,0 +1,32 @@
"""
Word Frequency Counter
You're building a text analytics tool for a content marketing team.
Given a block of text, return the top N most frequently used words,
sorted by frequency (highest first), then alphabetically for ties.
Example 1:
Input: text = "the quick brown fox jumps over the lazy dog the fox"
n = 2
Output: [("the", 3), ("fox", 2)]
Explanation: "the" appears 3 times, "fox" appears 2 times
Example 2:
Input: text = "hello world hello"
n = 5
Output: [("hello", 2), ("world", 1)]
Explanation: Only 2 unique words, return all of them
Constraints:
- Words are separated by whitespace
- Case-insensitive (convert to lowercase)
- Ignore punctuation attached to words
- n >= 1
- If fewer than n unique words exist, return all of them
"""
def top_words(text: str, n: int) -> list[tuple[str, int]]:
"""Return top n words by frequency as list of (word, count) tuples."""
pass # Your implementation here

View file

@ -0,0 +1,52 @@
"""Tests for word-frequency."""
import pytest
from solution import top_words
class TestBasicCases:
"""Test basic functionality with typical inputs."""
def test_basic_frequency(self):
"""Test basic word counting."""
result = top_words("the quick brown fox jumps over the lazy dog the fox", 2)
assert result == [("the", 3), ("fox", 2)]
def test_all_unique(self):
"""Test when all words are unique."""
result = top_words("one two three", 2)
assert result == [("one", 1), ("three", 1)] or result == [("one", 1), ("two", 1)]
def test_single_word_repeated(self):
"""Test with one word repeated."""
result = top_words("hello hello hello", 1)
assert result == [("hello", 3)]
class TestEdgeCases:
"""Test edge cases and boundary conditions."""
def test_empty_string(self):
"""Test with empty input."""
result = top_words("", 5)
assert result == []
def test_n_greater_than_unique_words(self):
"""Test when n exceeds unique word count."""
result = top_words("hello world", 10)
assert len(result) == 2
def test_case_insensitive(self):
"""Test that counting is case-insensitive."""
result = top_words("Hello HELLO hello", 1)
assert result == [("hello", 3)]
def test_punctuation_ignored(self):
"""Test that punctuation is stripped."""
result = top_words("hello, world! hello.", 1)
assert result == [("hello", 2)]
def test_alphabetical_tiebreaker(self):
"""Test alphabetical ordering for same frequency."""
result = top_words("cat bat ant", 3)
assert result == [("ant", 1), ("bat", 1), ("cat", 1)]