events { worker_connections 1024; } http { # Upstream API servers - Docker will resolve 'api' to all instances upstream api_servers { # Load balancing method: least_conn sends to least busy server # Other options: round_robin (default), ip_hash, random least_conn; # Docker Compose service discovery server api:8000; # When scaling manually, you'd list servers like: # server api_1:8000; # server api_2:8000; # server api_3:8000; } # Rate limiting zone: 10 requests per second per IP limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s; server { listen 80; server_name localhost; # Health check endpoint location /health { access_log off; return 200 "OK\n"; add_header Content-Type text/plain; } # API endpoints location / { # Apply rate limiting with burst limit_req zone=api_limit burst=20 nodelay; proxy_pass http://api_servers; proxy_http_version 1.1; # Pass client info to backend proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Timeouts proxy_connect_timeout 5s; proxy_send_timeout 10s; proxy_read_timeout 10s; } } }