url-shortner/app/encoding.py

52 lines
1.3 KiB
Python

"""
Base62 encoding for short URL generation.
- Uses 0-9, A-Z, a-z (62 characters)
- URL-safe (no special characters)
- More compact than hex (base16) or base64
Length vs Capacity:
- 6 chars: 62^6 = 56.8 billion unique URLs
- 7 chars: 62^7 = 3.5 trillion unique URLs
"""
CHARSET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
BASE = len(CHARSET) # 62
def base62_encode(num: int) -> str:
"""
Encode an integer to a base62 string.
"""
if num < 0:
raise ValueError("Cannot encode negative numbers")
if num == 0:
return CHARSET[0]
result = []
while num:
result.append(CHARSET[num % BASE])
num //= BASE
return "".join(reversed(result))
def base62_decode(encoded: str) -> int:
"""
Decode a base62 string back to an integer.
"""
if not encoded:
raise ValueError("Cannot decode empty string")
num = 0
for char in encoded:
if char not in CHARSET:
raise ValueError(f"Invalid character: {char}")
num = num * BASE + CHARSET.index(char)
return num
def pad_to_length(encoded: str, length: int = 7) -> str:
"""Pad encoded string to minimum length with leading zeros."""
return encoded.zfill(length)[-length:] if len(encoded) < length else encoded