co-mono/packages/pods/scripts/pod_setup.sh
2025-11-10 21:47:21 +01:00

336 lines
No EOL
11 KiB
Bash
Executable file

#!/usr/bin/env bash
# GPU pod bootstrap for vLLM deployment
set -euo pipefail
# Parse arguments passed from pi CLI
MOUNT_COMMAND=""
MODELS_PATH=""
HF_TOKEN=""
PI_API_KEY=""
VLLM_VERSION="release" # Default to release
while [[ $# -gt 0 ]]; do
case $1 in
--mount)
MOUNT_COMMAND="$2"
shift 2
;;
--models-path)
MODELS_PATH="$2"
shift 2
;;
--hf-token)
HF_TOKEN="$2"
shift 2
;;
--vllm-api-key)
PI_API_KEY="$2"
shift 2
;;
--vllm)
VLLM_VERSION="$2"
shift 2
;;
*)
echo "ERROR: Unknown option: $1" >&2
exit 1
;;
esac
done
# Validate required parameters
if [ -z "$HF_TOKEN" ]; then
echo "ERROR: HF_TOKEN is required" >&2
exit 1
fi
if [ -z "$PI_API_KEY" ]; then
echo "ERROR: PI_API_KEY is required" >&2
exit 1
fi
if [ -z "$MODELS_PATH" ]; then
echo "ERROR: MODELS_PATH is required" >&2
exit 1
fi
echo "=== Starting pod setup ==="
# Install system dependencies
apt update -y
apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl wget lsb-release htop pkg-config
# --- Install matching CUDA toolkit -------------------------------------------
echo "Checking CUDA driver version..."
DRIVER_CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
echo "Driver supports CUDA: $DRIVER_CUDA_VERSION"
# Check if nvcc exists and its version
if command -v nvcc &> /dev/null; then
NVCC_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d, -f1)
echo "Current nvcc version: $NVCC_VERSION"
else
NVCC_VERSION="none"
echo "nvcc not found"
fi
# Install CUDA toolkit matching driver version if needed
if [[ "$NVCC_VERSION" != "$DRIVER_CUDA_VERSION" ]]; then
echo "Installing CUDA Toolkit $DRIVER_CUDA_VERSION to match driver..."
# Detect Ubuntu version
UBUNTU_VERSION=$(lsb_release -rs)
UBUNTU_CODENAME=$(lsb_release -cs)
echo "Detected Ubuntu $UBUNTU_VERSION ($UBUNTU_CODENAME)"
# Map Ubuntu version to NVIDIA repo path
if [[ "$UBUNTU_VERSION" == "24.04" ]]; then
REPO_PATH="ubuntu2404"
elif [[ "$UBUNTU_VERSION" == "22.04" ]]; then
REPO_PATH="ubuntu2204"
elif [[ "$UBUNTU_VERSION" == "20.04" ]]; then
REPO_PATH="ubuntu2004"
else
echo "Warning: Unsupported Ubuntu version $UBUNTU_VERSION, trying ubuntu2204"
REPO_PATH="ubuntu2204"
fi
# Add NVIDIA package repositories
wget https://developer.download.nvidia.com/compute/cuda/repos/${REPO_PATH}/x86_64/cuda-keyring_1.1-1_all.deb
dpkg -i cuda-keyring_1.1-1_all.deb
rm cuda-keyring_1.1-1_all.deb
apt-get update
# Install specific CUDA toolkit version
# Convert version format (12.9 -> 12-9)
CUDA_VERSION_APT=$(echo $DRIVER_CUDA_VERSION | sed 's/\./-/')
echo "Installing cuda-toolkit-${CUDA_VERSION_APT}..."
apt-get install -y cuda-toolkit-${CUDA_VERSION_APT}
# Add CUDA to PATH
export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
# Verify installation
nvcc --version
else
echo "CUDA toolkit $NVCC_VERSION matches driver version"
export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
fi
# --- Install uv (fast Python package manager) --------------------------------
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
# --- Install Python 3.12 if not available ------------------------------------
if ! command -v python3.12 &> /dev/null; then
echo "Python 3.12 not found. Installing via uv..."
uv python install 3.12
fi
# --- Clean up existing environments and caches -------------------------------
echo "Cleaning up existing environments and caches..."
# Remove existing venv for a clean installation
VENV="$HOME/venv"
if [ -d "$VENV" ]; then
echo "Removing existing virtual environment..."
rm -rf "$VENV"
fi
# Remove uv cache to ensure fresh installs
if [ -d "$HOME/.cache/uv" ]; then
echo "Clearing uv cache..."
rm -rf "$HOME/.cache/uv"
fi
# Remove vLLM cache to avoid conflicts
if [ -d "$HOME/.cache/vllm" ]; then
echo "Clearing vLLM cache..."
rm -rf "$HOME/.cache/vllm"
fi
# --- Create and activate venv ------------------------------------------------
echo "Creating fresh virtual environment..."
uv venv --python 3.12 --seed "$VENV"
source "$VENV/bin/activate"
# --- Install PyTorch and vLLM ------------------------------------------------
echo "Installing vLLM and dependencies (version: $VLLM_VERSION)..."
case "$VLLM_VERSION" in
release)
echo "Installing vLLM release with PyTorch..."
# Install vLLM with automatic PyTorch backend selection
# vLLM will automatically install the correct PyTorch version
uv pip install vllm>=0.10.0 --torch-backend=auto || {
echo "ERROR: Failed to install vLLM"
exit 1
}
;;
nightly)
echo "Installing vLLM nightly with PyTorch..."
echo "This will install the latest nightly build of vLLM..."
# Install vLLM nightly with PyTorch
uv pip install -U vllm \
--torch-backend=auto \
--extra-index-url https://wheels.vllm.ai/nightly || {
echo "ERROR: Failed to install vLLM nightly"
exit 1
}
echo "vLLM nightly successfully installed!"
;;
gpt-oss)
echo "Installing GPT-OSS special build with PyTorch nightly..."
echo "WARNING: This build is ONLY for GPT-OSS models!"
echo "Installing PyTorch nightly and cutting-edge dependencies..."
# Convert CUDA version format for PyTorch (12.4 -> cu124)
PYTORCH_CUDA="cu$(echo $DRIVER_CUDA_VERSION | sed 's/\.//')"
echo "Using PyTorch nightly with ${PYTORCH_CUDA} (driver supports ${DRIVER_CUDA_VERSION})"
# The GPT-OSS build will pull PyTorch nightly and other dependencies
# via the extra index URLs. We don't pre-install torch here to avoid conflicts.
uv pip install --pre vllm==0.10.1+gptoss \
--extra-index-url https://wheels.vllm.ai/gpt-oss/ \
--extra-index-url https://download.pytorch.org/whl/nightly/${PYTORCH_CUDA} \
--index-strategy unsafe-best-match || {
echo "ERROR: Failed to install GPT-OSS vLLM build"
echo "This automatically installs PyTorch nightly with ${PYTORCH_CUDA}, Triton nightly, and other dependencies"
exit 1
}
# Install gpt-oss library for tool support
uv pip install gpt-oss || {
echo "WARNING: Failed to install gpt-oss library (needed for tool use)"
}
;;
*)
echo "ERROR: Unknown vLLM version: $VLLM_VERSION"
exit 1
;;
esac
# --- Install additional packages ---------------------------------------------
echo "Installing additional packages..."
# Note: tensorrt removed temporarily due to CUDA 13.0 compatibility issues
# TensorRT still depends on deprecated nvidia-cuda-runtime-cu13 package
uv pip install huggingface-hub psutil hf_transfer
# --- FlashInfer installation (optional, improves performance) ----------------
echo "Attempting FlashInfer installation (optional)..."
if uv pip install flashinfer-python; then
echo "FlashInfer installed successfully"
else
echo "FlashInfer not available, using Flash Attention instead"
fi
# --- Mount storage if provided -----------------------------------------------
if [ -n "$MOUNT_COMMAND" ]; then
echo "Setting up mount..."
# Create mount point directory if it doesn't exist
mkdir -p "$MODELS_PATH"
# Execute the mount command
eval "$MOUNT_COMMAND" || {
echo "WARNING: Mount command failed, continuing without mount"
}
# Verify mount succeeded (optional, may not always be a mount point)
if mountpoint -q "$MODELS_PATH" 2>/dev/null; then
echo "Storage successfully mounted at $MODELS_PATH"
else
echo "Note: $MODELS_PATH is not a mount point (might be local storage)"
fi
fi
# --- Model storage setup ------------------------------------------------------
echo ""
echo "=== Setting up model storage ==="
echo "Storage path: $MODELS_PATH"
# Check if the path exists and is writable
if [ ! -d "$MODELS_PATH" ]; then
echo "Creating model storage directory: $MODELS_PATH"
mkdir -p "$MODELS_PATH"
fi
if [ ! -w "$MODELS_PATH" ]; then
echo "ERROR: Model storage path is not writable: $MODELS_PATH"
echo "Please check permissions"
exit 1
fi
# Create the huggingface cache directory structure in the models path
mkdir -p "${MODELS_PATH}/huggingface/hub"
# Remove any existing cache directory or symlink
if [ -e ~/.cache/huggingface ] || [ -L ~/.cache/huggingface ]; then
echo "Removing existing ~/.cache/huggingface..."
rm -rf ~/.cache/huggingface 2>/dev/null || true
fi
# Create parent directory if needed
mkdir -p ~/.cache
# Create symlink from ~/.cache/huggingface to the models path
ln -s "${MODELS_PATH}/huggingface" ~/.cache/huggingface
echo "Created symlink: ~/.cache/huggingface -> ${MODELS_PATH}/huggingface"
# Verify the symlink works
if [ -d ~/.cache/huggingface/hub ]; then
echo "✓ Model storage configured successfully"
# Check available space
AVAILABLE_SPACE=$(df -h "$MODELS_PATH" | awk 'NR==2 {print $4}')
echo "Available space: $AVAILABLE_SPACE"
else
echo "ERROR: Could not verify model storage setup"
echo "The symlink was created but the target directory is not accessible"
exit 1
fi
# --- Configure environment ----------------------------------------------------
mkdir -p ~/.config/vllm
touch ~/.config/vllm/do_not_track
# Write environment to .bashrc for persistence
cat >> ~/.bashrc << EOF
# Pi vLLM environment
[ -d "\$HOME/venv" ] && source "\$HOME/venv/bin/activate"
export PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:\$HOME/.local/bin:\$PATH"
export LD_LIBRARY_PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:\${LD_LIBRARY_PATH:-}"
export HF_TOKEN="${HF_TOKEN}"
export PI_API_KEY="${PI_API_KEY}"
export HUGGING_FACE_HUB_TOKEN="${HF_TOKEN}"
export HF_HUB_ENABLE_HF_TRANSFER=1
export VLLM_NO_USAGE_STATS=1
export VLLM_DO_NOT_TRACK=1
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
EOF
# Create log directory for vLLM
mkdir -p ~/.vllm_logs
# --- Output GPU info for pi CLI to parse -------------------------------------
echo ""
echo "===GPU_INFO_START==="
nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader | while IFS=, read -r id name memory; do
# Trim whitespace
id=$(echo "$id" | xargs)
name=$(echo "$name" | xargs)
memory=$(echo "$memory" | xargs)
echo "{\"id\": $id, \"name\": \"$name\", \"memory\": \"$memory\"}"
done
echo "===GPU_INFO_END==="
echo ""
echo "=== Setup complete ==="
echo "Pod is ready for vLLM deployments"
echo "Models will be cached at: $MODELS_PATH"