This commit is contained in:
Harivansh Rathi 2026-01-23 15:06:41 -05:00
commit 9d85ca1ebb
9 changed files with 2928 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

10
.gitignore vendored Normal file
View file

@ -0,0 +1,10 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv

1
.python-version Normal file
View file

@ -0,0 +1 @@
3.10

170
README.md Normal file
View file

@ -0,0 +1,170 @@
# Audio Separator API
REST API for separating audio into vocal and instrumental stems using ML models.
## Quick Start
```bash
# Clone and install
git clone <repo-url>
cd sep
chmod +x install.sh test.sh
sudo ./install.sh
# Run tests
./test.sh
# Start the API
.venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000
```
## Requirements
- Python 3.10+
- FFmpeg
- 10GB+ disk space (for models)
- NVIDIA GPU with CUDA (optional, but recommended)
## API Endpoints
### Health Check
```bash
curl http://localhost:8000/health
```
Response:
```json
{
"status": "healthy",
"cuda_available": true,
"cuda_device": "NVIDIA GeForce RTX 5090"
}
```
### Separate Audio
```bash
curl -X POST http://localhost:8000/separate \
-F "file=@song.mp3" \
-F "output_format=mp3"
```
Response:
```json
{
"job_id": "a1b2c3d4",
"status": "completed",
"vocals_url": "/download/song_(Vocals)_model_bs_roformer.mp3",
"instrumental_url": "/download/song_(Instrumental)_model_bs_roformer.mp3"
}
```
### Download Stems
```bash
curl -O http://localhost:8000/download/song_(Vocals)_model_bs_roformer.mp3
```
### List Models
```bash
curl http://localhost:8000/models
```
## Configuration
### Output Formats
- `mp3` (default) - Good compression, iOS compatible
- `wav` - Lossless, larger files
- `flac` - Lossless compression
### Models
| Model | Quality | Speed | Best For |
|-------|---------|-------|----------|
| BS-RoFormer (default) | Highest | Slow | Production use |
| UVR_MDXNET_KARA_2 | Good | Fast | Karaoke |
| Kim_Vocal_2 | Good | Medium | Vocal isolation |
## VM Deployment
### Using systemd (Linux)
The install script creates a systemd service:
```bash
sudo systemctl enable audio-separator
sudo systemctl start audio-separator
sudo systemctl status audio-separator
```
### Manual Start
```bash
.venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000 --workers 1
```
Note: Use `--workers 1` because the ML model is not thread-safe.
## GPU Support
The API automatically detects CUDA GPUs. To verify:
```bash
./test.sh
```
Look for:
```
[PASS] CUDA available: NVIDIA GeForce RTX 5090 (32.0GB VRAM)
```
### CUDA Installation (Ubuntu)
```bash
# Add NVIDIA repo
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-toolkit-12-1
```
## iOS Integration
The API returns MP3 files by default, which are natively supported on iOS.
Example Swift code:
```swift
func separateAudio(fileURL: URL) async throws -> (vocals: URL, instrumental: URL) {
var request = URLRequest(url: URL(string: "http://your-vm:8000/separate")!)
request.httpMethod = "POST"
// Upload file and get response with download URLs
// ...
}
```
## File Cleanup
Uploaded and output files are automatically deleted after 5 minutes.
## Troubleshooting
### "CUDA not available"
1. Check NVIDIA drivers: `nvidia-smi`
2. Reinstall PyTorch with CUDA:
```bash
uv pip install torch --index-url https://download.pytorch.org/whl/cu121
```
### "Model download failed"
Check network access to huggingface.co and github.com.
### "Out of memory"
Reduce batch size or use a smaller model like `UVR_MDXNET_KARA_2`.

241
app.py Normal file
View file

@ -0,0 +1,241 @@
"""
Audio Separator API
Simple FastAPI service for stem separation using audio-separator
"""
import os
import uuid
import shutil
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, UploadFile, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel
app = FastAPI(
title="Audio Separator API",
description="Separate audio into vocal and instrumental stems using ML models",
version="1.0.0",
)
# Configuration
UPLOAD_DIR = Path("/tmp/audio-separator/uploads")
OUTPUT_DIR = Path("/tmp/audio-separator/outputs")
MODEL_DIR = Path("/tmp/audio-separator/models")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
MODEL_DIR.mkdir(parents=True, exist_ok=True)
# Lazy load separator to avoid import issues if CUDA not available
_separator = None
def get_separator():
"""Lazy initialization of separator with CUDA if available."""
global _separator
if _separator is None:
from audio_separator.separator import Separator
# Check CUDA availability
use_cuda = False
try:
import torch
use_cuda = torch.cuda.is_available()
if use_cuda:
print(f"CUDA available: {torch.cuda.get_device_name(0)}")
else:
print("CUDA not available, using CPU")
except Exception as e:
print(f"Error checking CUDA: {e}")
_separator = Separator(
output_dir=str(OUTPUT_DIR),
model_file_dir=str(MODEL_DIR),
use_cuda=use_cuda,
output_format="mp3",
)
return _separator
class SeparationRequest(BaseModel):
output_format: Optional[str] = "mp3"
model_name: Optional[str] = None
class SeparationResponse(BaseModel):
job_id: str
status: str
vocals_url: Optional[str] = None
instrumental_url: Optional[str] = None
message: Optional[str] = None
class HealthResponse(BaseModel):
status: str
cuda_available: bool
cuda_device: Optional[str] = None
def cleanup_files(file_paths: list[str], delay_seconds: int = 300):
"""Background task to cleanup temporary files after a delay."""
import time
time.sleep(delay_seconds)
for path in file_paths:
try:
if os.path.exists(path):
os.remove(path)
except Exception as e:
print(f"Error cleaning up {path}: {e}")
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Check API health and CUDA availability."""
cuda_available = False
cuda_device = None
try:
import torch
cuda_available = torch.cuda.is_available()
if cuda_available:
cuda_device = torch.cuda.get_device_name(0)
except Exception:
pass
return HealthResponse(
status="healthy",
cuda_available=cuda_available,
cuda_device=cuda_device,
)
@app.post("/separate", response_model=SeparationResponse)
async def separate_audio(
file: UploadFile,
background_tasks: BackgroundTasks,
output_format: str = "mp3",
model_name: Optional[str] = None,
):
"""
Separate audio file into vocal and instrumental stems.
- **file**: Audio file (mp3, wav, flac, m4a, etc.)
- **output_format**: Output format (mp3, wav, flac) - default: mp3
- **model_name**: Model to use (optional, uses default if not specified)
Returns URLs to download the separated stems.
"""
job_id = str(uuid.uuid4())[:8]
# Validate file
if not file.filename:
raise HTTPException(status_code=400, detail="No filename provided")
allowed_extensions = {".mp3", ".wav", ".flac", ".m4a", ".ogg", ".wma", ".aac"}
file_ext = Path(file.filename).suffix.lower()
if file_ext not in allowed_extensions:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type: {file_ext}. Allowed: {allowed_extensions}"
)
# Save uploaded file
input_path = UPLOAD_DIR / f"{job_id}_{file.filename}"
try:
with open(input_path, "wb") as f:
shutil.copyfileobj(file.file, f)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save file: {e}")
# Run separation
try:
separator = get_separator()
# Update output format if specified
separator.output_format = output_format
# Load model if specified
if model_name:
separator.load_model(model_name)
else:
separator.load_model()
# Run separation
output_files = separator.separate(str(input_path))
if not output_files or len(output_files) < 2:
raise HTTPException(status_code=500, detail="Separation failed - no output files")
# Find vocals and instrumental files
vocals_path = None
instrumental_path = None
for f in output_files:
f_lower = f.lower()
if "vocal" in f_lower:
vocals_path = f
elif "instrumental" in f_lower or "instrum" in f_lower:
instrumental_path = f
# Schedule cleanup of files after 5 minutes
files_to_cleanup = [str(input_path)]
if vocals_path:
files_to_cleanup.append(vocals_path)
if instrumental_path:
files_to_cleanup.append(instrumental_path)
background_tasks.add_task(cleanup_files, files_to_cleanup, 300)
return SeparationResponse(
job_id=job_id,
status="completed",
vocals_url=f"/download/{Path(vocals_path).name}" if vocals_path else None,
instrumental_url=f"/download/{Path(instrumental_path).name}" if instrumental_path else None,
)
except Exception as e:
# Cleanup input file on error
if input_path.exists():
input_path.unlink()
raise HTTPException(status_code=500, detail=f"Separation failed: {e}")
@app.get("/download/{filename}")
async def download_file(filename: str):
"""Download a separated stem file."""
file_path = OUTPUT_DIR / filename
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found")
# Determine media type
media_types = {
".mp3": "audio/mpeg",
".wav": "audio/wav",
".flac": "audio/flac",
}
media_type = media_types.get(file_path.suffix.lower(), "application/octet-stream")
return FileResponse(
path=str(file_path),
filename=filename,
media_type=media_type,
)
@app.get("/models")
async def list_models():
"""List available separation models."""
models = [
{"name": "BS-RoFormer (default)", "id": None, "description": "Best quality, slower"},
{"name": "UVR_MDXNET_KARA_2", "id": "UVR_MDXNET_KARA_2", "description": "Fast, good for karaoke"},
{"name": "UVR-MDX-NET-Inst_HQ_3", "id": "UVR-MDX-NET-Inst_HQ_3", "description": "High quality instrumentals"},
{"name": "Kim_Vocal_2", "id": "Kim_Vocal_2", "description": "Good vocal isolation"},
]
return {"models": models}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

191
install.sh Executable file
View file

@ -0,0 +1,191 @@
#!/bin/bash
set -e
echo "==================================="
echo "Audio Separator API - Install Script"
echo "==================================="
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
print_status() {
echo -e "${GREEN}[OK]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if running as root
if [ "$EUID" -eq 0 ]; then
print_warning "Running as root - this is fine for VM setup"
fi
# Detect OS
OS="unknown"
if [ -f /etc/os-release ]; then
. /etc/os-release
OS=$ID
fi
echo ""
echo "Detected OS: $OS"
echo ""
# Step 1: Install system dependencies
echo "Step 1: Installing system dependencies..."
if [ "$OS" = "ubuntu" ] || [ "$OS" = "debian" ]; then
apt-get update
apt-get install -y \
python3.10 \
python3.10-venv \
python3-pip \
ffmpeg \
libsndfile1 \
curl \
git
print_status "System dependencies installed"
elif [ "$OS" = "centos" ] || [ "$OS" = "rhel" ] || [ "$OS" = "fedora" ]; then
dnf install -y \
python3.10 \
python3-pip \
ffmpeg \
libsndfile \
curl \
git
print_status "System dependencies installed"
elif [[ "$OSTYPE" == "darwin"* ]]; then
if ! command -v brew &> /dev/null; then
print_error "Homebrew not found. Please install it first."
exit 1
fi
brew install python@3.10 ffmpeg libsndfile
print_status "System dependencies installed"
else
print_warning "Unknown OS - please ensure python3.10, ffmpeg, and libsndfile are installed"
fi
# Step 2: Install uv (fast Python package manager)
echo ""
echo "Step 2: Installing uv package manager..."
if ! command -v uv &> /dev/null; then
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
print_status "uv installed"
else
print_status "uv already installed"
fi
# Step 3: Create virtual environment and install dependencies
echo ""
echo "Step 3: Setting up Python environment..."
cd "$(dirname "$0")"
# Remove existing venv if present
if [ -d ".venv" ]; then
rm -rf .venv
fi
uv venv --python 3.10
print_status "Virtual environment created"
# Step 4: Install Python dependencies
echo ""
echo "Step 4: Installing Python dependencies..."
# Install with CUDA support detection
uv pip install -e ".[api]"
print_status "Base dependencies installed"
# Step 5: Check for NVIDIA GPU and install CUDA dependencies
echo ""
echo "Step 5: Checking GPU availability..."
if command -v nvidia-smi &> /dev/null; then
echo ""
nvidia-smi
echo ""
print_status "NVIDIA GPU detected"
# Install PyTorch with CUDA
echo "Installing PyTorch with CUDA support..."
uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
print_status "PyTorch with CUDA installed"
else
print_warning "No NVIDIA GPU detected - will use CPU (slower)"
fi
# Step 6: Pre-download default model
echo ""
echo "Step 6: Pre-downloading default model (this may take a few minutes)..."
.venv/bin/python -c "
from audio_separator.separator import Separator
import os
os.makedirs('/tmp/audio-separator/models', exist_ok=True)
s = Separator(model_file_dir='/tmp/audio-separator/models')
s.load_model()
print('Model downloaded successfully')
"
print_status "Default model downloaded"
# Step 7: Create systemd service file (optional)
echo ""
echo "Step 7: Creating systemd service file..."
SERVICE_FILE="/etc/systemd/system/audio-separator.service"
INSTALL_DIR="$(pwd)"
if [ -d "/etc/systemd/system" ] && [ "$EUID" -eq 0 ]; then
cat > "$SERVICE_FILE" << EOF
[Unit]
Description=Audio Separator API
After=network.target
[Service]
Type=simple
User=root
WorkingDirectory=$INSTALL_DIR
ExecStart=$INSTALL_DIR/.venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000
Restart=always
RestartSec=10
Environment="PATH=$INSTALL_DIR/.venv/bin:/usr/local/bin:/usr/bin:/bin"
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
print_status "Systemd service created at $SERVICE_FILE"
echo " To enable: systemctl enable audio-separator"
echo " To start: systemctl start audio-separator"
else
print_warning "Skipping systemd service (not root or systemd not available)"
fi
echo ""
echo "==================================="
echo "Installation complete!"
echo "==================================="
echo ""
echo "To run the API:"
echo " .venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000"
echo ""
echo "To run tests:"
echo " ./test.sh"
echo ""
echo "API will be available at http://localhost:8000"
echo "API docs at http://localhost:8000/docs"
echo ""

37
pyproject.toml Normal file
View file

@ -0,0 +1,37 @@
[project]
name = "audio-separator-api"
version = "1.0.0"
description = "API service for audio stem separation using ML models"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"audio-separator>=0.35",
"fastapi>=0.109.0",
"uvicorn[standard]>=0.27.0",
"python-multipart>=0.0.6",
"pydantic>=2.0.0",
]
[project.optional-dependencies]
api = [
"fastapi>=0.109.0",
"uvicorn[standard]>=0.27.0",
"python-multipart>=0.0.6",
"pydantic>=2.0.0",
]
dev = [
"pytest>=8.0.0",
"httpx>=0.26.0",
"ruff>=0.1.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.ruff]
line-length = 100
target-version = "py310"
[tool.ruff.lint]
select = ["E", "F", "I", "W"]

191
test.sh Executable file
View file

@ -0,0 +1,191 @@
#!/bin/bash
set -e
echo "==================================="
echo "Audio Separator API - Test Script"
echo "==================================="
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
PASS=0
FAIL=0
test_pass() {
echo -e "${GREEN}[PASS]${NC} $1"
((PASS++))
}
test_fail() {
echo -e "${RED}[FAIL]${NC} $1"
((FAIL++))
}
test_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
cd "$(dirname "$0")"
# Test 1: Check Python environment
echo ""
echo "Test 1: Python environment"
if [ -f ".venv/bin/python" ]; then
PYTHON_VERSION=$(.venv/bin/python --version 2>&1)
test_pass "Python found: $PYTHON_VERSION"
else
test_fail "Python virtual environment not found. Run ./install.sh first"
exit 1
fi
# Test 2: Check core dependencies
echo ""
echo "Test 2: Core dependencies"
.venv/bin/python << 'EOF'
import sys
deps = [
("fastapi", "FastAPI"),
("uvicorn", "Uvicorn"),
("torch", "PyTorch"),
("audio_separator", "Audio Separator"),
("pydub", "PyDub"),
]
for module, name in deps:
try:
__import__(module)
print(f" [OK] {name}")
except ImportError as e:
print(f" [FAIL] {name}: {e}")
sys.exit(1)
EOF
if [ $? -eq 0 ]; then
test_pass "All core dependencies available"
else
test_fail "Missing dependencies"
fi
# Test 3: Check CUDA availability
echo ""
echo "Test 3: CUDA / GPU availability"
CUDA_RESULT=$(.venv/bin/python << 'EOF'
import torch
if torch.cuda.is_available():
device_name = torch.cuda.get_device_name(0)
memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
print(f"CUDA_AVAILABLE|{device_name}|{memory_gb:.1f}")
else:
print("CUDA_NOT_AVAILABLE")
EOF
)
if [[ "$CUDA_RESULT" == CUDA_AVAILABLE* ]]; then
IFS='|' read -r _ GPU_NAME GPU_MEM <<< "$CUDA_RESULT"
test_pass "CUDA available: $GPU_NAME (${GPU_MEM}GB VRAM)"
else
test_warn "CUDA not available - will use CPU (slower)"
fi
# Test 4: Check model access
echo ""
echo "Test 4: Model loading"
MODEL_RESULT=$(.venv/bin/python << 'EOF'
import os
os.makedirs('/tmp/audio-separator/models', exist_ok=True)
try:
from audio_separator.separator import Separator
s = Separator(model_file_dir='/tmp/audio-separator/models')
s.load_model()
print("MODEL_OK")
except Exception as e:
print(f"MODEL_FAIL|{e}")
EOF
)
if [[ "$MODEL_RESULT" == "MODEL_OK" ]]; then
test_pass "Model loads successfully"
else
test_fail "Model loading failed: ${MODEL_RESULT#MODEL_FAIL|}"
fi
# Test 5: Check FFmpeg
echo ""
echo "Test 5: FFmpeg availability"
if command -v ffmpeg &> /dev/null; then
FFMPEG_VERSION=$(ffmpeg -version 2>&1 | head -n1)
test_pass "FFmpeg found: $FFMPEG_VERSION"
else
test_fail "FFmpeg not found - required for audio processing"
fi
# Test 6: Test API endpoints (if server is running)
echo ""
echo "Test 6: API endpoints"
API_URL="http://localhost:8000"
if curl -s --connect-timeout 2 "$API_URL/health" > /dev/null 2>&1; then
# Health endpoint
HEALTH=$(curl -s "$API_URL/health")
if echo "$HEALTH" | grep -q '"status":"healthy"'; then
test_pass "Health endpoint responding"
else
test_fail "Health endpoint unhealthy"
fi
# Models endpoint
MODELS=$(curl -s "$API_URL/models")
if echo "$MODELS" | grep -q '"models"'; then
test_pass "Models endpoint responding"
else
test_fail "Models endpoint failed"
fi
# Docs endpoint
if curl -s --connect-timeout 2 "$API_URL/docs" | grep -q "swagger"; then
test_pass "API docs available at $API_URL/docs"
else
test_warn "API docs may not be accessible"
fi
else
test_warn "API server not running - skipping endpoint tests"
echo " Start the server with: .venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000"
fi
# Test 7: Disk space check
echo ""
echo "Test 7: Disk space"
AVAILABLE_GB=$(df -BG /tmp | tail -1 | awk '{print $4}' | tr -d 'G')
if [ "$AVAILABLE_GB" -gt 10 ]; then
test_pass "Sufficient disk space: ${AVAILABLE_GB}GB available in /tmp"
else
test_warn "Low disk space: ${AVAILABLE_GB}GB available (recommend 10GB+)"
fi
# Summary
echo ""
echo "==================================="
echo "Test Summary"
echo "==================================="
echo -e "Passed: ${GREEN}$PASS${NC}"
echo -e "Failed: ${RED}$FAIL${NC}"
echo ""
if [ $FAIL -gt 0 ]; then
echo -e "${RED}Some tests failed. Please fix issues before running the API.${NC}"
exit 1
else
echo -e "${GREEN}All critical tests passed!${NC}"
exit 0
fi

2087
uv.lock generated Normal file

File diff suppressed because it is too large Load diff