mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 15:03:31 +00:00
336 lines
No EOL
11 KiB
Bash
Executable file
336 lines
No EOL
11 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# GPU pod bootstrap for vLLM deployment
|
|
set -euo pipefail
|
|
|
|
# Parse arguments passed from pi CLI
|
|
MOUNT_COMMAND=""
|
|
MODELS_PATH=""
|
|
HF_TOKEN=""
|
|
PI_API_KEY=""
|
|
VLLM_VERSION="release" # Default to release
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--mount)
|
|
MOUNT_COMMAND="$2"
|
|
shift 2
|
|
;;
|
|
--models-path)
|
|
MODELS_PATH="$2"
|
|
shift 2
|
|
;;
|
|
--hf-token)
|
|
HF_TOKEN="$2"
|
|
shift 2
|
|
;;
|
|
--vllm-api-key)
|
|
PI_API_KEY="$2"
|
|
shift 2
|
|
;;
|
|
--vllm)
|
|
VLLM_VERSION="$2"
|
|
shift 2
|
|
;;
|
|
*)
|
|
echo "ERROR: Unknown option: $1" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Validate required parameters
|
|
if [ -z "$HF_TOKEN" ]; then
|
|
echo "ERROR: HF_TOKEN is required" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ -z "$PI_API_KEY" ]; then
|
|
echo "ERROR: PI_API_KEY is required" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ -z "$MODELS_PATH" ]; then
|
|
echo "ERROR: MODELS_PATH is required" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== Starting pod setup ==="
|
|
|
|
# Install system dependencies
|
|
apt update -y
|
|
apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl wget lsb-release htop pkg-config
|
|
|
|
# --- Install matching CUDA toolkit -------------------------------------------
|
|
echo "Checking CUDA driver version..."
|
|
DRIVER_CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
|
|
echo "Driver supports CUDA: $DRIVER_CUDA_VERSION"
|
|
|
|
# Check if nvcc exists and its version
|
|
if command -v nvcc &> /dev/null; then
|
|
NVCC_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d, -f1)
|
|
echo "Current nvcc version: $NVCC_VERSION"
|
|
else
|
|
NVCC_VERSION="none"
|
|
echo "nvcc not found"
|
|
fi
|
|
|
|
# Install CUDA toolkit matching driver version if needed
|
|
if [[ "$NVCC_VERSION" != "$DRIVER_CUDA_VERSION" ]]; then
|
|
echo "Installing CUDA Toolkit $DRIVER_CUDA_VERSION to match driver..."
|
|
|
|
# Detect Ubuntu version
|
|
UBUNTU_VERSION=$(lsb_release -rs)
|
|
UBUNTU_CODENAME=$(lsb_release -cs)
|
|
|
|
echo "Detected Ubuntu $UBUNTU_VERSION ($UBUNTU_CODENAME)"
|
|
|
|
# Map Ubuntu version to NVIDIA repo path
|
|
if [[ "$UBUNTU_VERSION" == "24.04" ]]; then
|
|
REPO_PATH="ubuntu2404"
|
|
elif [[ "$UBUNTU_VERSION" == "22.04" ]]; then
|
|
REPO_PATH="ubuntu2204"
|
|
elif [[ "$UBUNTU_VERSION" == "20.04" ]]; then
|
|
REPO_PATH="ubuntu2004"
|
|
else
|
|
echo "Warning: Unsupported Ubuntu version $UBUNTU_VERSION, trying ubuntu2204"
|
|
REPO_PATH="ubuntu2204"
|
|
fi
|
|
|
|
# Add NVIDIA package repositories
|
|
wget https://developer.download.nvidia.com/compute/cuda/repos/${REPO_PATH}/x86_64/cuda-keyring_1.1-1_all.deb
|
|
dpkg -i cuda-keyring_1.1-1_all.deb
|
|
rm cuda-keyring_1.1-1_all.deb
|
|
apt-get update
|
|
|
|
# Install specific CUDA toolkit version
|
|
# Convert version format (12.9 -> 12-9)
|
|
CUDA_VERSION_APT=$(echo $DRIVER_CUDA_VERSION | sed 's/\./-/')
|
|
echo "Installing cuda-toolkit-${CUDA_VERSION_APT}..."
|
|
apt-get install -y cuda-toolkit-${CUDA_VERSION_APT}
|
|
|
|
# Add CUDA to PATH
|
|
export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
|
|
export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
|
|
|
|
# Verify installation
|
|
nvcc --version
|
|
else
|
|
echo "CUDA toolkit $NVCC_VERSION matches driver version"
|
|
export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
|
|
export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
|
|
fi
|
|
|
|
# --- Install uv (fast Python package manager) --------------------------------
|
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
|
|
# --- Install Python 3.12 if not available ------------------------------------
|
|
if ! command -v python3.12 &> /dev/null; then
|
|
echo "Python 3.12 not found. Installing via uv..."
|
|
uv python install 3.12
|
|
fi
|
|
|
|
# --- Clean up existing environments and caches -------------------------------
|
|
echo "Cleaning up existing environments and caches..."
|
|
|
|
# Remove existing venv for a clean installation
|
|
VENV="$HOME/venv"
|
|
if [ -d "$VENV" ]; then
|
|
echo "Removing existing virtual environment..."
|
|
rm -rf "$VENV"
|
|
fi
|
|
|
|
# Remove uv cache to ensure fresh installs
|
|
if [ -d "$HOME/.cache/uv" ]; then
|
|
echo "Clearing uv cache..."
|
|
rm -rf "$HOME/.cache/uv"
|
|
fi
|
|
|
|
# Remove vLLM cache to avoid conflicts
|
|
if [ -d "$HOME/.cache/vllm" ]; then
|
|
echo "Clearing vLLM cache..."
|
|
rm -rf "$HOME/.cache/vllm"
|
|
fi
|
|
|
|
# --- Create and activate venv ------------------------------------------------
|
|
echo "Creating fresh virtual environment..."
|
|
uv venv --python 3.12 --seed "$VENV"
|
|
source "$VENV/bin/activate"
|
|
|
|
# --- Install PyTorch and vLLM ------------------------------------------------
|
|
echo "Installing vLLM and dependencies (version: $VLLM_VERSION)..."
|
|
case "$VLLM_VERSION" in
|
|
release)
|
|
echo "Installing vLLM release with PyTorch..."
|
|
# Install vLLM with automatic PyTorch backend selection
|
|
# vLLM will automatically install the correct PyTorch version
|
|
uv pip install vllm>=0.10.0 --torch-backend=auto || {
|
|
echo "ERROR: Failed to install vLLM"
|
|
exit 1
|
|
}
|
|
;;
|
|
nightly)
|
|
echo "Installing vLLM nightly with PyTorch..."
|
|
echo "This will install the latest nightly build of vLLM..."
|
|
|
|
# Install vLLM nightly with PyTorch
|
|
uv pip install -U vllm \
|
|
--torch-backend=auto \
|
|
--extra-index-url https://wheels.vllm.ai/nightly || {
|
|
echo "ERROR: Failed to install vLLM nightly"
|
|
exit 1
|
|
}
|
|
|
|
echo "vLLM nightly successfully installed!"
|
|
;;
|
|
gpt-oss)
|
|
echo "Installing GPT-OSS special build with PyTorch nightly..."
|
|
echo "WARNING: This build is ONLY for GPT-OSS models!"
|
|
echo "Installing PyTorch nightly and cutting-edge dependencies..."
|
|
|
|
# Convert CUDA version format for PyTorch (12.4 -> cu124)
|
|
PYTORCH_CUDA="cu$(echo $DRIVER_CUDA_VERSION | sed 's/\.//')"
|
|
echo "Using PyTorch nightly with ${PYTORCH_CUDA} (driver supports ${DRIVER_CUDA_VERSION})"
|
|
|
|
# The GPT-OSS build will pull PyTorch nightly and other dependencies
|
|
# via the extra index URLs. We don't pre-install torch here to avoid conflicts.
|
|
uv pip install --pre vllm==0.10.1+gptoss \
|
|
--extra-index-url https://wheels.vllm.ai/gpt-oss/ \
|
|
--extra-index-url https://download.pytorch.org/whl/nightly/${PYTORCH_CUDA} \
|
|
--index-strategy unsafe-best-match || {
|
|
echo "ERROR: Failed to install GPT-OSS vLLM build"
|
|
echo "This automatically installs PyTorch nightly with ${PYTORCH_CUDA}, Triton nightly, and other dependencies"
|
|
exit 1
|
|
}
|
|
|
|
# Install gpt-oss library for tool support
|
|
uv pip install gpt-oss || {
|
|
echo "WARNING: Failed to install gpt-oss library (needed for tool use)"
|
|
}
|
|
;;
|
|
*)
|
|
echo "ERROR: Unknown vLLM version: $VLLM_VERSION"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
# --- Install additional packages ---------------------------------------------
|
|
echo "Installing additional packages..."
|
|
# Note: tensorrt removed temporarily due to CUDA 13.0 compatibility issues
|
|
# TensorRT still depends on deprecated nvidia-cuda-runtime-cu13 package
|
|
uv pip install huggingface-hub psutil hf_transfer
|
|
|
|
# --- FlashInfer installation (optional, improves performance) ----------------
|
|
echo "Attempting FlashInfer installation (optional)..."
|
|
if uv pip install flashinfer-python; then
|
|
echo "FlashInfer installed successfully"
|
|
else
|
|
echo "FlashInfer not available, using Flash Attention instead"
|
|
fi
|
|
|
|
# --- Mount storage if provided -----------------------------------------------
|
|
if [ -n "$MOUNT_COMMAND" ]; then
|
|
echo "Setting up mount..."
|
|
|
|
# Create mount point directory if it doesn't exist
|
|
mkdir -p "$MODELS_PATH"
|
|
|
|
# Execute the mount command
|
|
eval "$MOUNT_COMMAND" || {
|
|
echo "WARNING: Mount command failed, continuing without mount"
|
|
}
|
|
|
|
# Verify mount succeeded (optional, may not always be a mount point)
|
|
if mountpoint -q "$MODELS_PATH" 2>/dev/null; then
|
|
echo "Storage successfully mounted at $MODELS_PATH"
|
|
else
|
|
echo "Note: $MODELS_PATH is not a mount point (might be local storage)"
|
|
fi
|
|
fi
|
|
|
|
# --- Model storage setup ------------------------------------------------------
|
|
echo ""
|
|
echo "=== Setting up model storage ==="
|
|
echo "Storage path: $MODELS_PATH"
|
|
|
|
# Check if the path exists and is writable
|
|
if [ ! -d "$MODELS_PATH" ]; then
|
|
echo "Creating model storage directory: $MODELS_PATH"
|
|
mkdir -p "$MODELS_PATH"
|
|
fi
|
|
|
|
if [ ! -w "$MODELS_PATH" ]; then
|
|
echo "ERROR: Model storage path is not writable: $MODELS_PATH"
|
|
echo "Please check permissions"
|
|
exit 1
|
|
fi
|
|
|
|
# Create the huggingface cache directory structure in the models path
|
|
mkdir -p "${MODELS_PATH}/huggingface/hub"
|
|
|
|
# Remove any existing cache directory or symlink
|
|
if [ -e ~/.cache/huggingface ] || [ -L ~/.cache/huggingface ]; then
|
|
echo "Removing existing ~/.cache/huggingface..."
|
|
rm -rf ~/.cache/huggingface 2>/dev/null || true
|
|
fi
|
|
|
|
# Create parent directory if needed
|
|
mkdir -p ~/.cache
|
|
|
|
# Create symlink from ~/.cache/huggingface to the models path
|
|
ln -s "${MODELS_PATH}/huggingface" ~/.cache/huggingface
|
|
echo "Created symlink: ~/.cache/huggingface -> ${MODELS_PATH}/huggingface"
|
|
|
|
# Verify the symlink works
|
|
if [ -d ~/.cache/huggingface/hub ]; then
|
|
echo "✓ Model storage configured successfully"
|
|
|
|
# Check available space
|
|
AVAILABLE_SPACE=$(df -h "$MODELS_PATH" | awk 'NR==2 {print $4}')
|
|
echo "Available space: $AVAILABLE_SPACE"
|
|
else
|
|
echo "ERROR: Could not verify model storage setup"
|
|
echo "The symlink was created but the target directory is not accessible"
|
|
exit 1
|
|
fi
|
|
|
|
# --- Configure environment ----------------------------------------------------
|
|
mkdir -p ~/.config/vllm
|
|
touch ~/.config/vllm/do_not_track
|
|
|
|
# Write environment to .bashrc for persistence
|
|
cat >> ~/.bashrc << EOF
|
|
|
|
# Pi vLLM environment
|
|
[ -d "\$HOME/venv" ] && source "\$HOME/venv/bin/activate"
|
|
export PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:\$HOME/.local/bin:\$PATH"
|
|
export LD_LIBRARY_PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:\${LD_LIBRARY_PATH:-}"
|
|
export HF_TOKEN="${HF_TOKEN}"
|
|
export PI_API_KEY="${PI_API_KEY}"
|
|
export HUGGING_FACE_HUB_TOKEN="${HF_TOKEN}"
|
|
export HF_HUB_ENABLE_HF_TRANSFER=1
|
|
export VLLM_NO_USAGE_STATS=1
|
|
export VLLM_DO_NOT_TRACK=1
|
|
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
|
|
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
|
EOF
|
|
|
|
# Create log directory for vLLM
|
|
mkdir -p ~/.vllm_logs
|
|
|
|
# --- Output GPU info for pi CLI to parse -------------------------------------
|
|
echo ""
|
|
echo "===GPU_INFO_START==="
|
|
nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader | while IFS=, read -r id name memory; do
|
|
# Trim whitespace
|
|
id=$(echo "$id" | xargs)
|
|
name=$(echo "$name" | xargs)
|
|
memory=$(echo "$memory" | xargs)
|
|
echo "{\"id\": $id, \"name\": \"$name\", \"memory\": \"$memory\"}"
|
|
done
|
|
echo "===GPU_INFO_END==="
|
|
|
|
echo ""
|
|
echo "=== Setup complete ==="
|
|
echo "Pod is ready for vLLM deployments"
|
|
echo "Models will be cached at: $MODELS_PATH" |