mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 05:02:14 +00:00
Initial monorepo setup with npm workspaces and dual TypeScript configuration
- Set up npm workspaces for three packages: pi-tui, pi-agent, and pi (pods) - Implemented dual TypeScript configuration: - Root tsconfig.json with path mappings for development and type checking - Package-specific tsconfig.build.json for clean production builds - Configured lockstep versioning with sync script for inter-package dependencies - Added comprehensive documentation for development and publishing workflows - All packages at version 0.5.0 ready for npm publishing
This commit is contained in:
commit
a74c5da112
63 changed files with 14558 additions and 0 deletions
83
packages/pods/scripts/model_run.sh
Normal file
83
packages/pods/scripts/model_run.sh
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env bash
|
||||
# Model runner script - runs sequentially, killed by pi stop
|
||||
set -euo pipefail
|
||||
|
||||
# These values are replaced before upload by pi CLI
|
||||
MODEL_ID="{{MODEL_ID}}"
|
||||
NAME="{{NAME}}"
|
||||
PORT="{{PORT}}"
|
||||
VLLM_ARGS="{{VLLM_ARGS}}"
|
||||
|
||||
# Trap to ensure cleanup on exit and kill any child processes
|
||||
cleanup() {
|
||||
local exit_code=$?
|
||||
echo "Model runner exiting with code $exit_code"
|
||||
# Kill any child processes
|
||||
pkill -P $$ 2>/dev/null || true
|
||||
exit $exit_code
|
||||
}
|
||||
trap cleanup EXIT TERM INT
|
||||
|
||||
# Force colored output even when not a TTY
|
||||
export FORCE_COLOR=1
|
||||
export PYTHONUNBUFFERED=1
|
||||
export TERM=xterm-256color
|
||||
export RICH_FORCE_TERMINAL=1
|
||||
export CLICOLOR_FORCE=1
|
||||
|
||||
# Source virtual environment
|
||||
source /root/venv/bin/activate
|
||||
|
||||
echo "========================================="
|
||||
echo "Model Run: $NAME"
|
||||
echo "Model ID: $MODEL_ID"
|
||||
echo "Port: $PORT"
|
||||
if [ -n "$VLLM_ARGS" ]; then
|
||||
echo "vLLM Args: $VLLM_ARGS"
|
||||
fi
|
||||
echo "========================================="
|
||||
echo ""
|
||||
|
||||
# Download model (with color progress bars)
|
||||
echo "Downloading model (will skip if cached)..."
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 hf download "$MODEL_ID"
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "❌ ERROR: Failed to download model" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "✅ Model download complete"
|
||||
echo ""
|
||||
|
||||
# Build vLLM command
|
||||
VLLM_CMD="vllm serve '$MODEL_ID' --port $PORT --api-key '$PI_API_KEY'"
|
||||
if [ -n "$VLLM_ARGS" ]; then
|
||||
VLLM_CMD="$VLLM_CMD $VLLM_ARGS"
|
||||
fi
|
||||
|
||||
echo "Starting vLLM server..."
|
||||
echo "Command: $VLLM_CMD"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
|
||||
# Run vLLM in background so we can monitor it
|
||||
echo "Starting vLLM process..."
|
||||
bash -c "$VLLM_CMD" &
|
||||
VLLM_PID=$!
|
||||
|
||||
# Monitor the vLLM process
|
||||
echo "Monitoring vLLM process (PID: $VLLM_PID)..."
|
||||
wait $VLLM_PID
|
||||
VLLM_EXIT_CODE=$?
|
||||
|
||||
if [ $VLLM_EXIT_CODE -ne 0 ]; then
|
||||
echo "❌ ERROR: vLLM exited with code $VLLM_EXIT_CODE" >&2
|
||||
# Make sure to exit the script command too
|
||||
kill -TERM $$ 2>/dev/null || true
|
||||
exit $VLLM_EXIT_CODE
|
||||
fi
|
||||
|
||||
echo "✅ vLLM exited normally"
|
||||
exit 0
|
||||
334
packages/pods/scripts/pod_setup.sh
Executable file
334
packages/pods/scripts/pod_setup.sh
Executable file
|
|
@ -0,0 +1,334 @@
|
|||
#!/usr/bin/env bash
|
||||
# GPU pod bootstrap for vLLM deployment
|
||||
set -euo pipefail
|
||||
|
||||
# Parse arguments passed from pi CLI
|
||||
MOUNT_COMMAND=""
|
||||
MODELS_PATH=""
|
||||
HF_TOKEN=""
|
||||
PI_API_KEY=""
|
||||
VLLM_VERSION="release" # Default to release
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--mount)
|
||||
MOUNT_COMMAND="$2"
|
||||
shift 2
|
||||
;;
|
||||
--models-path)
|
||||
MODELS_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--hf-token)
|
||||
HF_TOKEN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--vllm-api-key)
|
||||
PI_API_KEY="$2"
|
||||
shift 2
|
||||
;;
|
||||
--vllm)
|
||||
VLLM_VERSION="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate required parameters
|
||||
if [ -z "$HF_TOKEN" ]; then
|
||||
echo "ERROR: HF_TOKEN is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$PI_API_KEY" ]; then
|
||||
echo "ERROR: PI_API_KEY is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$MODELS_PATH" ]; then
|
||||
echo "ERROR: MODELS_PATH is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Starting pod setup ==="
|
||||
|
||||
# Install system dependencies
|
||||
apt update -y
|
||||
apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl wget lsb-release htop pkg-config
|
||||
|
||||
# --- Install matching CUDA toolkit -------------------------------------------
|
||||
echo "Checking CUDA driver version..."
|
||||
DRIVER_CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
|
||||
echo "Driver supports CUDA: $DRIVER_CUDA_VERSION"
|
||||
|
||||
# Check if nvcc exists and its version
|
||||
if command -v nvcc &> /dev/null; then
|
||||
NVCC_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d, -f1)
|
||||
echo "Current nvcc version: $NVCC_VERSION"
|
||||
else
|
||||
NVCC_VERSION="none"
|
||||
echo "nvcc not found"
|
||||
fi
|
||||
|
||||
# Install CUDA toolkit matching driver version if needed
|
||||
if [[ "$NVCC_VERSION" != "$DRIVER_CUDA_VERSION" ]]; then
|
||||
echo "Installing CUDA Toolkit $DRIVER_CUDA_VERSION to match driver..."
|
||||
|
||||
# Detect Ubuntu version
|
||||
UBUNTU_VERSION=$(lsb_release -rs)
|
||||
UBUNTU_CODENAME=$(lsb_release -cs)
|
||||
|
||||
echo "Detected Ubuntu $UBUNTU_VERSION ($UBUNTU_CODENAME)"
|
||||
|
||||
# Map Ubuntu version to NVIDIA repo path
|
||||
if [[ "$UBUNTU_VERSION" == "24.04" ]]; then
|
||||
REPO_PATH="ubuntu2404"
|
||||
elif [[ "$UBUNTU_VERSION" == "22.04" ]]; then
|
||||
REPO_PATH="ubuntu2204"
|
||||
elif [[ "$UBUNTU_VERSION" == "20.04" ]]; then
|
||||
REPO_PATH="ubuntu2004"
|
||||
else
|
||||
echo "Warning: Unsupported Ubuntu version $UBUNTU_VERSION, trying ubuntu2204"
|
||||
REPO_PATH="ubuntu2204"
|
||||
fi
|
||||
|
||||
# Add NVIDIA package repositories
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/${REPO_PATH}/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
rm cuda-keyring_1.1-1_all.deb
|
||||
apt-get update
|
||||
|
||||
# Install specific CUDA toolkit version
|
||||
# Convert version format (12.9 -> 12-9)
|
||||
CUDA_VERSION_APT=$(echo $DRIVER_CUDA_VERSION | sed 's/\./-/')
|
||||
echo "Installing cuda-toolkit-${CUDA_VERSION_APT}..."
|
||||
apt-get install -y cuda-toolkit-${CUDA_VERSION_APT}
|
||||
|
||||
# Add CUDA to PATH
|
||||
export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
|
||||
|
||||
# Verify installation
|
||||
nvcc --version
|
||||
else
|
||||
echo "CUDA toolkit $NVCC_VERSION matches driver version"
|
||||
export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
|
||||
fi
|
||||
|
||||
# --- Install uv (fast Python package manager) --------------------------------
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
|
||||
# --- Install Python 3.12 if not available ------------------------------------
|
||||
if ! command -v python3.12 &> /dev/null; then
|
||||
echo "Python 3.12 not found. Installing via uv..."
|
||||
uv python install 3.12
|
||||
fi
|
||||
|
||||
# --- Clean up existing environments and caches -------------------------------
|
||||
echo "Cleaning up existing environments and caches..."
|
||||
|
||||
# Remove existing venv for a clean installation
|
||||
VENV="$HOME/venv"
|
||||
if [ -d "$VENV" ]; then
|
||||
echo "Removing existing virtual environment..."
|
||||
rm -rf "$VENV"
|
||||
fi
|
||||
|
||||
# Remove uv cache to ensure fresh installs
|
||||
if [ -d "$HOME/.cache/uv" ]; then
|
||||
echo "Clearing uv cache..."
|
||||
rm -rf "$HOME/.cache/uv"
|
||||
fi
|
||||
|
||||
# Remove vLLM cache to avoid conflicts
|
||||
if [ -d "$HOME/.cache/vllm" ]; then
|
||||
echo "Clearing vLLM cache..."
|
||||
rm -rf "$HOME/.cache/vllm"
|
||||
fi
|
||||
|
||||
# --- Create and activate venv ------------------------------------------------
|
||||
echo "Creating fresh virtual environment..."
|
||||
uv venv --python 3.12 --seed "$VENV"
|
||||
source "$VENV/bin/activate"
|
||||
|
||||
# --- Install PyTorch and vLLM ------------------------------------------------
|
||||
echo "Installing vLLM and dependencies (version: $VLLM_VERSION)..."
|
||||
case "$VLLM_VERSION" in
|
||||
release)
|
||||
echo "Installing vLLM release with PyTorch..."
|
||||
# Install vLLM with automatic PyTorch backend selection
|
||||
# vLLM will automatically install the correct PyTorch version
|
||||
uv pip install vllm>=0.10.0 --torch-backend=auto || {
|
||||
echo "ERROR: Failed to install vLLM"
|
||||
exit 1
|
||||
}
|
||||
;;
|
||||
nightly)
|
||||
echo "Installing vLLM nightly with PyTorch..."
|
||||
echo "This will install the latest nightly build of vLLM..."
|
||||
|
||||
# Install vLLM nightly with PyTorch
|
||||
uv pip install -U vllm \
|
||||
--torch-backend=auto \
|
||||
--extra-index-url https://wheels.vllm.ai/nightly || {
|
||||
echo "ERROR: Failed to install vLLM nightly"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "vLLM nightly successfully installed!"
|
||||
;;
|
||||
gpt-oss)
|
||||
echo "Installing GPT-OSS special build with PyTorch nightly..."
|
||||
echo "WARNING: This build is ONLY for GPT-OSS models!"
|
||||
echo "Installing PyTorch nightly and cutting-edge dependencies..."
|
||||
|
||||
# Convert CUDA version format for PyTorch (12.4 -> cu124)
|
||||
PYTORCH_CUDA="cu$(echo $DRIVER_CUDA_VERSION | sed 's/\.//')"
|
||||
echo "Using PyTorch nightly with ${PYTORCH_CUDA} (driver supports ${DRIVER_CUDA_VERSION})"
|
||||
|
||||
# The GPT-OSS build will pull PyTorch nightly and other dependencies
|
||||
# via the extra index URLs. We don't pre-install torch here to avoid conflicts.
|
||||
uv pip install --pre vllm==0.10.1+gptoss \
|
||||
--extra-index-url https://wheels.vllm.ai/gpt-oss/ \
|
||||
--extra-index-url https://download.pytorch.org/whl/nightly/${PYTORCH_CUDA} \
|
||||
--index-strategy unsafe-best-match || {
|
||||
echo "ERROR: Failed to install GPT-OSS vLLM build"
|
||||
echo "This automatically installs PyTorch nightly with ${PYTORCH_CUDA}, Triton nightly, and other dependencies"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Install gpt-oss library for tool support
|
||||
uv pip install gpt-oss || {
|
||||
echo "WARNING: Failed to install gpt-oss library (needed for tool use)"
|
||||
}
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown vLLM version: $VLLM_VERSION"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# --- Install additional packages ---------------------------------------------
|
||||
echo "Installing additional packages..."
|
||||
uv pip install huggingface-hub psutil tensorrt hf_transfer
|
||||
|
||||
# --- FlashInfer installation (optional, improves performance) ----------------
|
||||
echo "Attempting FlashInfer installation (optional)..."
|
||||
if uv pip install flashinfer-python; then
|
||||
echo "FlashInfer installed successfully"
|
||||
else
|
||||
echo "FlashInfer not available, using Flash Attention instead"
|
||||
fi
|
||||
|
||||
# --- Mount storage if provided -----------------------------------------------
|
||||
if [ -n "$MOUNT_COMMAND" ]; then
|
||||
echo "Setting up mount..."
|
||||
|
||||
# Create mount point directory if it doesn't exist
|
||||
mkdir -p "$MODELS_PATH"
|
||||
|
||||
# Execute the mount command
|
||||
eval "$MOUNT_COMMAND" || {
|
||||
echo "WARNING: Mount command failed, continuing without mount"
|
||||
}
|
||||
|
||||
# Verify mount succeeded (optional, may not always be a mount point)
|
||||
if mountpoint -q "$MODELS_PATH" 2>/dev/null; then
|
||||
echo "Storage successfully mounted at $MODELS_PATH"
|
||||
else
|
||||
echo "Note: $MODELS_PATH is not a mount point (might be local storage)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Model storage setup ------------------------------------------------------
|
||||
echo ""
|
||||
echo "=== Setting up model storage ==="
|
||||
echo "Storage path: $MODELS_PATH"
|
||||
|
||||
# Check if the path exists and is writable
|
||||
if [ ! -d "$MODELS_PATH" ]; then
|
||||
echo "Creating model storage directory: $MODELS_PATH"
|
||||
mkdir -p "$MODELS_PATH"
|
||||
fi
|
||||
|
||||
if [ ! -w "$MODELS_PATH" ]; then
|
||||
echo "ERROR: Model storage path is not writable: $MODELS_PATH"
|
||||
echo "Please check permissions"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create the huggingface cache directory structure in the models path
|
||||
mkdir -p "${MODELS_PATH}/huggingface/hub"
|
||||
|
||||
# Remove any existing cache directory or symlink
|
||||
if [ -e ~/.cache/huggingface ] || [ -L ~/.cache/huggingface ]; then
|
||||
echo "Removing existing ~/.cache/huggingface..."
|
||||
rm -rf ~/.cache/huggingface 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Create parent directory if needed
|
||||
mkdir -p ~/.cache
|
||||
|
||||
# Create symlink from ~/.cache/huggingface to the models path
|
||||
ln -s "${MODELS_PATH}/huggingface" ~/.cache/huggingface
|
||||
echo "Created symlink: ~/.cache/huggingface -> ${MODELS_PATH}/huggingface"
|
||||
|
||||
# Verify the symlink works
|
||||
if [ -d ~/.cache/huggingface/hub ]; then
|
||||
echo "✓ Model storage configured successfully"
|
||||
|
||||
# Check available space
|
||||
AVAILABLE_SPACE=$(df -h "$MODELS_PATH" | awk 'NR==2 {print $4}')
|
||||
echo "Available space: $AVAILABLE_SPACE"
|
||||
else
|
||||
echo "ERROR: Could not verify model storage setup"
|
||||
echo "The symlink was created but the target directory is not accessible"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Configure environment ----------------------------------------------------
|
||||
mkdir -p ~/.config/vllm
|
||||
touch ~/.config/vllm/do_not_track
|
||||
|
||||
# Write environment to .bashrc for persistence
|
||||
cat >> ~/.bashrc << EOF
|
||||
|
||||
# Pi vLLM environment
|
||||
[ -d "\$HOME/venv" ] && source "\$HOME/venv/bin/activate"
|
||||
export PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:\$HOME/.local/bin:\$PATH"
|
||||
export LD_LIBRARY_PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:\${LD_LIBRARY_PATH:-}"
|
||||
export HF_TOKEN="${HF_TOKEN}"
|
||||
export PI_API_KEY="${PI_API_KEY}"
|
||||
export HUGGING_FACE_HUB_TOKEN="${HF_TOKEN}"
|
||||
export HF_HUB_ENABLE_HF_TRANSFER=1
|
||||
export VLLM_NO_USAGE_STATS=1
|
||||
export VLLM_DO_NOT_TRACK=1
|
||||
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
|
||||
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||
EOF
|
||||
|
||||
# Create log directory for vLLM
|
||||
mkdir -p ~/.vllm_logs
|
||||
|
||||
# --- Output GPU info for pi CLI to parse -------------------------------------
|
||||
echo ""
|
||||
echo "===GPU_INFO_START==="
|
||||
nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader | while IFS=, read -r id name memory; do
|
||||
# Trim whitespace
|
||||
id=$(echo "$id" | xargs)
|
||||
name=$(echo "$name" | xargs)
|
||||
memory=$(echo "$memory" | xargs)
|
||||
echo "{\"id\": $id, \"name\": \"$name\", \"memory\": \"$memory\"}"
|
||||
done
|
||||
echo "===GPU_INFO_END==="
|
||||
|
||||
echo ""
|
||||
echo "=== Setup complete ==="
|
||||
echo "Pod is ready for vLLM deployments"
|
||||
echo "Models will be cached at: $MODELS_PATH"
|
||||
Loading…
Add table
Add a link
Reference in a new issue