init

2026-04-15 07:04:48 +00:00 · 2026-01-23 15:06:41 -05:00 · 2026-01-23 15:06:41 -05:00 · 9d85ca1ebb
commit 9d85ca1ebb
9 changed files with 2928 additions and 0 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,10 @@
 # Python-generated files
 __pycache__/
 *.py[oc]
 build/
 dist/
 wheels/
 *.egg-info
 # Virtual environments
 .venv
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
 3.10
--- a/README.md
+++ b/README.md
@ -0,0 +1,170 @@
 # Audio Separator API
 REST API for separating audio into vocal and instrumental stems using ML models.
 ## Quick Start
 ```bash
 # Clone and install
 git clone <repo-url>
 cd sep
 chmod +x install.sh test.sh
 sudo ./install.sh
 # Run tests
 ./test.sh
 # Start the API
 .venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000
 ```
 ## Requirements
 - Python 3.10+
 - FFmpeg
 - 10GB+ disk space (for models)
 - NVIDIA GPU with CUDA (optional, but recommended)
 ## API Endpoints
 ### Health Check
 ```bash
 curl http://localhost:8000/health
 ```
 Response:
 ```json
 {
  "status": "healthy",
  "cuda_available": true,
  "cuda_device": "NVIDIA GeForce RTX 5090"
 }
 ```
 ### Separate Audio
 ```bash
 curl -X POST http://localhost:8000/separate \
  -F "file=@song.mp3" \
  -F "output_format=mp3"
 ```
 Response:
 ```json
 {
  "job_id": "a1b2c3d4",
  "status": "completed",
  "vocals_url": "/download/song_(Vocals)_model_bs_roformer.mp3",
  "instrumental_url": "/download/song_(Instrumental)_model_bs_roformer.mp3"
 }
 ```
 ### Download Stems
 ```bash
 curl -O http://localhost:8000/download/song_(Vocals)_model_bs_roformer.mp3
 ```
 ### List Models
 ```bash
 curl http://localhost:8000/models
 ```
 ## Configuration
 ### Output Formats
 - `mp3` (default) - Good compression, iOS compatible
 - `wav` - Lossless, larger files
 - `flac` - Lossless compression
 ### Models
 | Model | Quality | Speed | Best For |
 |-------|---------|-------|----------|
 | BS-RoFormer (default) | Highest | Slow | Production use |
 | UVR_MDXNET_KARA_2 | Good | Fast | Karaoke |
 | Kim_Vocal_2 | Good | Medium | Vocal isolation |
 ## VM Deployment
 ### Using systemd (Linux)
 The install script creates a systemd service:
 ```bash
 sudo systemctl enable audio-separator
 sudo systemctl start audio-separator
 sudo systemctl status audio-separator
 ```
 ### Manual Start
 ```bash
 .venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000 --workers 1
 ```
 Note: Use `--workers 1` because the ML model is not thread-safe.
 ## GPU Support
 The API automatically detects CUDA GPUs. To verify:
 ```bash
 ./test.sh
 ```
 Look for:
 ```
 [PASS] CUDA available: NVIDIA GeForce RTX 5090 (32.0GB VRAM)
 ```
 ### CUDA Installation (Ubuntu)
 ```bash
 # Add NVIDIA repo
 wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
 sudo dpkg -i cuda-keyring_1.1-1_all.deb
 sudo apt-get update
 sudo apt-get install -y cuda-toolkit-12-1
 ```
 ## iOS Integration
 The API returns MP3 files by default, which are natively supported on iOS.
 Example Swift code:
 ```swift
 func separateAudio(fileURL: URL) async throws -> (vocals: URL, instrumental: URL) {
    var request = URLRequest(url: URL(string: "http://your-vm:8000/separate")!)
    request.httpMethod = "POST"
    // Upload file and get response with download URLs
    // ...
 }
 ```
 ## File Cleanup
 Uploaded and output files are automatically deleted after 5 minutes.
 ## Troubleshooting
 ### "CUDA not available"
 1. Check NVIDIA drivers: `nvidia-smi`
 2. Reinstall PyTorch with CUDA:
   ```bash
   uv pip install torch --index-url https://download.pytorch.org/whl/cu121
   ```
 ### "Model download failed"
 Check network access to huggingface.co and github.com.
 ### "Out of memory"
 Reduce batch size or use a smaller model like `UVR_MDXNET_KARA_2`.
--- a/app.py
+++ b/app.py
@ -0,0 +1,241 @@
 """
 Audio Separator API
 Simple FastAPI service for stem separation using audio-separator
 """
 import os
 import uuid
 import shutil
 from pathlib import Path
 from typing import Optional
 from fastapi import FastAPI, UploadFile, HTTPException, BackgroundTasks
 from fastapi.responses import FileResponse, JSONResponse
 from pydantic import BaseModel
 app = FastAPI(
    title="Audio Separator API",
    description="Separate audio into vocal and instrumental stems using ML models",
    version="1.0.0",
 )
 # Configuration
 UPLOAD_DIR = Path("/tmp/audio-separator/uploads")
 OUTPUT_DIR = Path("/tmp/audio-separator/outputs")
 MODEL_DIR = Path("/tmp/audio-separator/models")
 UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
 # Lazy load separator to avoid import issues if CUDA not available
 _separator = None
 def get_separator():
    """Lazy initialization of separator with CUDA if available."""
    global _separator
    if _separator is None:
        from audio_separator.separator import Separator
        # Check CUDA availability
        use_cuda = False
        try:
            import torch
            use_cuda = torch.cuda.is_available()
            if use_cuda:
                print(f"CUDA available: {torch.cuda.get_device_name(0)}")
            else:
                print("CUDA not available, using CPU")
        except Exception as e:
            print(f"Error checking CUDA: {e}")
        _separator = Separator(
            output_dir=str(OUTPUT_DIR),
            model_file_dir=str(MODEL_DIR),
            use_cuda=use_cuda,
            output_format="mp3",
        )
    return _separator
 class SeparationRequest(BaseModel):
    output_format: Optional[str] = "mp3"
    model_name: Optional[str] = None
 class SeparationResponse(BaseModel):
    job_id: str
    status: str
    vocals_url: Optional[str] = None
    instrumental_url: Optional[str] = None
    message: Optional[str] = None
 class HealthResponse(BaseModel):
    status: str
    cuda_available: bool
    cuda_device: Optional[str] = None
 def cleanup_files(file_paths: list[str], delay_seconds: int = 300):
    """Background task to cleanup temporary files after a delay."""
    import time
    time.sleep(delay_seconds)
    for path in file_paths:
        try:
            if os.path.exists(path):
                os.remove(path)
        except Exception as e:
            print(f"Error cleaning up {path}: {e}")
@app.get("/health", response_model=HealthResponse)
 async def health_check():
    """Check API health and CUDA availability."""
    cuda_available = False
    cuda_device = None
    try:
        import torch
        cuda_available = torch.cuda.is_available()
        if cuda_available:
            cuda_device = torch.cuda.get_device_name(0)
    except Exception:
        pass
    return HealthResponse(
        status="healthy",
        cuda_available=cuda_available,
        cuda_device=cuda_device,
    )
@app.post("/separate", response_model=SeparationResponse)
 async def separate_audio(
    file: UploadFile,
    background_tasks: BackgroundTasks,
    output_format: str = "mp3",
    model_name: Optional[str] = None,
 ):
    """
    Separate audio file into vocal and instrumental stems.
    - **file**: Audio file (mp3, wav, flac, m4a, etc.)
    - **output_format**: Output format (mp3, wav, flac) - default: mp3
    - **model_name**: Model to use (optional, uses default if not specified)
    Returns URLs to download the separated stems.
    """
    job_id = str(uuid.uuid4())[:8]
    # Validate file
    if not file.filename:
        raise HTTPException(status_code=400, detail="No filename provided")
    allowed_extensions = {".mp3", ".wav", ".flac", ".m4a", ".ogg", ".wma", ".aac"}
    file_ext = Path(file.filename).suffix.lower()
    if file_ext not in allowed_extensions:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported file type: {file_ext}. Allowed: {allowed_extensions}"
        )
    # Save uploaded file
    input_path = UPLOAD_DIR / f"{job_id}_{file.filename}"
    try:
        with open(input_path, "wb") as f:
            shutil.copyfileobj(file.file, f)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to save file: {e}")
    # Run separation
    try:
        separator = get_separator()
        # Update output format if specified
        separator.output_format = output_format
        # Load model if specified
        if model_name:
            separator.load_model(model_name)
        else:
            separator.load_model()
        # Run separation
        output_files = separator.separate(str(input_path))
        if not output_files or len(output_files) < 2:
            raise HTTPException(status_code=500, detail="Separation failed - no output files")
        # Find vocals and instrumental files
        vocals_path = None
        instrumental_path = None
        for f in output_files:
            f_lower = f.lower()
            if "vocal" in f_lower:
                vocals_path = f
            elif "instrumental" in f_lower or "instrum" in f_lower:
                instrumental_path = f
        # Schedule cleanup of files after 5 minutes
        files_to_cleanup = [str(input_path)]
        if vocals_path:
            files_to_cleanup.append(vocals_path)
        if instrumental_path:
            files_to_cleanup.append(instrumental_path)
        background_tasks.add_task(cleanup_files, files_to_cleanup, 300)
        return SeparationResponse(
            job_id=job_id,
            status="completed",
            vocals_url=f"/download/{Path(vocals_path).name}" if vocals_path else None,
            instrumental_url=f"/download/{Path(instrumental_path).name}" if instrumental_path else None,
        )
    except Exception as e:
        # Cleanup input file on error
        if input_path.exists():
            input_path.unlink()
        raise HTTPException(status_code=500, detail=f"Separation failed: {e}")
@app.get("/download/{filename}")
 async def download_file(filename: str):
    """Download a separated stem file."""
    file_path = OUTPUT_DIR / filename
    if not file_path.exists():
        raise HTTPException(status_code=404, detail="File not found")
    # Determine media type
    media_types = {
        ".mp3": "audio/mpeg",
        ".wav": "audio/wav",
        ".flac": "audio/flac",
    }
    media_type = media_types.get(file_path.suffix.lower(), "application/octet-stream")
    return FileResponse(
        path=str(file_path),
        filename=filename,
        media_type=media_type,
    )
@app.get("/models")
 async def list_models():
    """List available separation models."""
    models = [
        {"name": "BS-RoFormer (default)", "id": None, "description": "Best quality, slower"},
        {"name": "UVR_MDXNET_KARA_2", "id": "UVR_MDXNET_KARA_2", "description": "Fast, good for karaoke"},
        {"name": "UVR-MDX-NET-Inst_HQ_3", "id": "UVR-MDX-NET-Inst_HQ_3", "description": "High quality instrumentals"},
        {"name": "Kim_Vocal_2", "id": "Kim_Vocal_2", "description": "Good vocal isolation"},
    ]
    return {"models": models}
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/install.sh
+++ b/install.sh
@ -0,0 +1,191 @@
 #!/bin/bash
 set -e
 echo "==================================="
 echo "Audio Separator API - Install Script"
 echo "==================================="
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 print_status() {
    echo -e "${GREEN}[OK]${NC} $1"
 }
 print_warning() {
    echo -e "${YELLOW}[WARN]${NC} $1"
 }
 print_error() {
    echo -e "${RED}[ERROR]${NC} $1"
 }
 # Check if running as root
 if [ "$EUID" -eq 0 ]; then
    print_warning "Running as root - this is fine for VM setup"
 fi
 # Detect OS
 OS="unknown"
 if [ -f /etc/os-release ]; then
    . /etc/os-release
    OS=$ID
 fi
 echo ""
 echo "Detected OS: $OS"
 echo ""
 # Step 1: Install system dependencies
 echo "Step 1: Installing system dependencies..."
 if [ "$OS" = "ubuntu" ] || [ "$OS" = "debian" ]; then
    apt-get update
    apt-get install -y \
        python3.10 \
        python3.10-venv \
        python3-pip \
        ffmpeg \
        libsndfile1 \
        curl \
        git
    print_status "System dependencies installed"
 elif [ "$OS" = "centos" ] || [ "$OS" = "rhel" ] || [ "$OS" = "fedora" ]; then
    dnf install -y \
        python3.10 \
        python3-pip \
        ffmpeg \
        libsndfile \
        curl \
        git
    print_status "System dependencies installed"
 elif [[ "$OSTYPE" == "darwin"* ]]; then
    if ! command -v brew &> /dev/null; then
        print_error "Homebrew not found. Please install it first."
        exit 1
    fi
    brew install python@3.10 ffmpeg libsndfile
    print_status "System dependencies installed"
 else
    print_warning "Unknown OS - please ensure python3.10, ffmpeg, and libsndfile are installed"
 fi
 # Step 2: Install uv (fast Python package manager)
 echo ""
 echo "Step 2: Installing uv package manager..."
 if ! command -v uv &> /dev/null; then
    curl -LsSf https://astral.sh/uv/install.sh | sh
    export PATH="$HOME/.local/bin:$PATH"
    print_status "uv installed"
 else
    print_status "uv already installed"
 fi
 # Step 3: Create virtual environment and install dependencies
 echo ""
 echo "Step 3: Setting up Python environment..."
 cd "$(dirname "$0")"
 # Remove existing venv if present
 if [ -d ".venv" ]; then
    rm -rf .venv
 fi
 uv venv --python 3.10
 print_status "Virtual environment created"
 # Step 4: Install Python dependencies
 echo ""
 echo "Step 4: Installing Python dependencies..."
 # Install with CUDA support detection
 uv pip install -e ".[api]"
 print_status "Base dependencies installed"
 # Step 5: Check for NVIDIA GPU and install CUDA dependencies
 echo ""
 echo "Step 5: Checking GPU availability..."
 if command -v nvidia-smi &> /dev/null; then
    echo ""
    nvidia-smi
    echo ""
    print_status "NVIDIA GPU detected"
    # Install PyTorch with CUDA
    echo "Installing PyTorch with CUDA support..."
    uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
    print_status "PyTorch with CUDA installed"
 else
    print_warning "No NVIDIA GPU detected - will use CPU (slower)"
 fi
 # Step 6: Pre-download default model
 echo ""
 echo "Step 6: Pre-downloading default model (this may take a few minutes)..."
 .venv/bin/python -c "
 from audio_separator.separator import Separator
 import os
 os.makedirs('/tmp/audio-separator/models', exist_ok=True)
 s = Separator(model_file_dir='/tmp/audio-separator/models')
 s.load_model()
 print('Model downloaded successfully')
 "
 print_status "Default model downloaded"
 # Step 7: Create systemd service file (optional)
 echo ""
 echo "Step 7: Creating systemd service file..."
 SERVICE_FILE="/etc/systemd/system/audio-separator.service"
 INSTALL_DIR="$(pwd)"
 if [ -d "/etc/systemd/system" ] && [ "$EUID" -eq 0 ]; then
    cat > "$SERVICE_FILE" << EOF
 [Unit]
 Description=Audio Separator API
 After=network.target
 [Service]
 Type=simple
 User=root
 WorkingDirectory=$INSTALL_DIR
 ExecStart=$INSTALL_DIR/.venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000
 Restart=always
 RestartSec=10
 Environment="PATH=$INSTALL_DIR/.venv/bin:/usr/local/bin:/usr/bin:/bin"
 [Install]
 WantedBy=multi-user.target
 EOF
    systemctl daemon-reload
    print_status "Systemd service created at $SERVICE_FILE"
    echo "    To enable: systemctl enable audio-separator"
    echo "    To start:  systemctl start audio-separator"
 else
    print_warning "Skipping systemd service (not root or systemd not available)"
 fi
 echo ""
 echo "==================================="
 echo "Installation complete!"
 echo "==================================="
 echo ""
 echo "To run the API:"
 echo "  .venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000"
 echo ""
 echo "To run tests:"
 echo "  ./test.sh"
 echo ""
 echo "API will be available at http://localhost:8000"
 echo "API docs at http://localhost:8000/docs"
 echo ""
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,37 @@
 [project]
 name = "audio-separator-api"
 version = "1.0.0"
 description = "API service for audio stem separation using ML models"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
    "audio-separator>=0.35",
    "fastapi>=0.109.0",
    "uvicorn[standard]>=0.27.0",
    "python-multipart>=0.0.6",
    "pydantic>=2.0.0",
 ]
 [project.optional-dependencies]
 api = [
    "fastapi>=0.109.0",
    "uvicorn[standard]>=0.27.0",
    "python-multipart>=0.0.6",
    "pydantic>=2.0.0",
 ]
 dev = [
    "pytest>=8.0.0",
    "httpx>=0.26.0",
    "ruff>=0.1.0",
 ]
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.ruff]
 line-length = 100
 target-version = "py310"
 [tool.ruff.lint]
 select = ["E", "F", "I", "W"]
--- a/test.sh
+++ b/test.sh
@ -0,0 +1,191 @@
 #!/bin/bash
 set -e
 echo "==================================="
 echo "Audio Separator API - Test Script"
 echo "==================================="
 # Colors
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m'
 PASS=0
 FAIL=0
 test_pass() {
    echo -e "${GREEN}[PASS]${NC} $1"
    ((PASS++))
 }
 test_fail() {
    echo -e "${RED}[FAIL]${NC} $1"
    ((FAIL++))
 }
 test_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
 }
 cd "$(dirname "$0")"
 # Test 1: Check Python environment
 echo ""
 echo "Test 1: Python environment"
 if [ -f ".venv/bin/python" ]; then
    PYTHON_VERSION=$(.venv/bin/python --version 2>&1)
    test_pass "Python found: $PYTHON_VERSION"
 else
    test_fail "Python virtual environment not found. Run ./install.sh first"
    exit 1
 fi
 # Test 2: Check core dependencies
 echo ""
 echo "Test 2: Core dependencies"
 .venv/bin/python << 'EOF'
 import sys
 deps = [
    ("fastapi", "FastAPI"),
    ("uvicorn", "Uvicorn"),
    ("torch", "PyTorch"),
    ("audio_separator", "Audio Separator"),
    ("pydub", "PyDub"),
 ]
 for module, name in deps:
    try:
        __import__(module)
        print(f"  [OK] {name}")
    except ImportError as e:
        print(f"  [FAIL] {name}: {e}")
        sys.exit(1)
 EOF
 if [ $? -eq 0 ]; then
    test_pass "All core dependencies available"
 else
    test_fail "Missing dependencies"
 fi
 # Test 3: Check CUDA availability
 echo ""
 echo "Test 3: CUDA / GPU availability"
 CUDA_RESULT=$(.venv/bin/python << 'EOF'
 import torch
 if torch.cuda.is_available():
    device_name = torch.cuda.get_device_name(0)
    memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    print(f"CUDA_AVAILABLE|{device_name}|{memory_gb:.1f}")
 else:
    print("CUDA_NOT_AVAILABLE")
 EOF
 )
 if [[ "$CUDA_RESULT" == CUDA_AVAILABLE* ]]; then
    IFS='|' read -r _ GPU_NAME GPU_MEM <<< "$CUDA_RESULT"
    test_pass "CUDA available: $GPU_NAME (${GPU_MEM}GB VRAM)"
 else
    test_warn "CUDA not available - will use CPU (slower)"
 fi
 # Test 4: Check model access
 echo ""
 echo "Test 4: Model loading"
 MODEL_RESULT=$(.venv/bin/python << 'EOF'
 import os
 os.makedirs('/tmp/audio-separator/models', exist_ok=True)
 try:
    from audio_separator.separator import Separator
    s = Separator(model_file_dir='/tmp/audio-separator/models')
    s.load_model()
    print("MODEL_OK")
 except Exception as e:
    print(f"MODEL_FAIL|{e}")
 EOF
 )
 if [[ "$MODEL_RESULT" == "MODEL_OK" ]]; then
    test_pass "Model loads successfully"
 else
    test_fail "Model loading failed: ${MODEL_RESULT#MODEL_FAIL|}"
 fi
 # Test 5: Check FFmpeg
 echo ""
 echo "Test 5: FFmpeg availability"
 if command -v ffmpeg &> /dev/null; then
    FFMPEG_VERSION=$(ffmpeg -version 2>&1 | head -n1)
    test_pass "FFmpeg found: $FFMPEG_VERSION"
 else
    test_fail "FFmpeg not found - required for audio processing"
 fi
 # Test 6: Test API endpoints (if server is running)
 echo ""
 echo "Test 6: API endpoints"
 API_URL="http://localhost:8000"
 if curl -s --connect-timeout 2 "$API_URL/health" > /dev/null 2>&1; then
    # Health endpoint
    HEALTH=$(curl -s "$API_URL/health")
    if echo "$HEALTH" | grep -q '"status":"healthy"'; then
        test_pass "Health endpoint responding"
    else
        test_fail "Health endpoint unhealthy"
    fi
    # Models endpoint
    MODELS=$(curl -s "$API_URL/models")
    if echo "$MODELS" | grep -q '"models"'; then
        test_pass "Models endpoint responding"
    else
        test_fail "Models endpoint failed"
    fi
    # Docs endpoint
    if curl -s --connect-timeout 2 "$API_URL/docs" | grep -q "swagger"; then
        test_pass "API docs available at $API_URL/docs"
    else
        test_warn "API docs may not be accessible"
    fi
 else
    test_warn "API server not running - skipping endpoint tests"
    echo "        Start the server with: .venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000"
 fi
 # Test 7: Disk space check
 echo ""
 echo "Test 7: Disk space"
 AVAILABLE_GB=$(df -BG /tmp | tail -1 | awk '{print $4}' | tr -d 'G')
 if [ "$AVAILABLE_GB" -gt 10 ]; then
    test_pass "Sufficient disk space: ${AVAILABLE_GB}GB available in /tmp"
 else
    test_warn "Low disk space: ${AVAILABLE_GB}GB available (recommend 10GB+)"
 fi
 # Summary
 echo ""
 echo "==================================="
 echo "Test Summary"
 echo "==================================="
 echo -e "Passed: ${GREEN}$PASS${NC}"
 echo -e "Failed: ${RED}$FAIL${NC}"
 echo ""
 if [ $FAIL -gt 0 ]; then
    echo -e "${RED}Some tests failed. Please fix issues before running the API.${NC}"
    exit 1
 else
    echo -e "${GREEN}All critical tests passed!${NC}"
    exit 0
 fi
--- a/uv.lock
+++ b/uv.lock