mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 12:00:15 +00:00
Initial monorepo setup with npm workspaces and dual TypeScript configuration
- Set up npm workspaces for three packages: pi-tui, pi-agent, and pi (pods) - Implemented dual TypeScript configuration: - Root tsconfig.json with path mappings for development and type checking - Package-specific tsconfig.build.json for clean production builds - Configured lockstep versioning with sync script for inter-package dependencies - Added comprehensive documentation for development and publishing workflows - All packages at version 0.5.0 ready for npm publishing
This commit is contained in:
commit
a74c5da112
63 changed files with 14558 additions and 0 deletions
703
packages/pods/src/commands/models.ts
Normal file
703
packages/pods/src/commands/models.ts
Normal file
|
|
@ -0,0 +1,703 @@
|
|||
import chalk from "chalk";
|
||||
import { spawn } from "child_process";
|
||||
import { readFileSync } from "fs";
|
||||
import { dirname, join } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { getActivePod, loadConfig, saveConfig } from "../config.js";
|
||||
import { getModelConfig, getModelName, isKnownModel } from "../model-configs.js";
|
||||
import { sshExec } from "../ssh.js";
|
||||
import type { Pod } from "../types.js";
|
||||
|
||||
/**
|
||||
* Get the pod to use (active or override)
|
||||
*/
|
||||
const getPod = (podOverride?: string): { name: string; pod: Pod } => {
|
||||
if (podOverride) {
|
||||
const config = loadConfig();
|
||||
const pod = config.pods[podOverride];
|
||||
if (!pod) {
|
||||
console.error(chalk.red(`Pod '${podOverride}' not found`));
|
||||
process.exit(1);
|
||||
}
|
||||
return { name: podOverride, pod };
|
||||
}
|
||||
|
||||
const active = getActivePod();
|
||||
if (!active) {
|
||||
console.error(chalk.red("No active pod. Use 'pi pods active <name>' to set one."));
|
||||
process.exit(1);
|
||||
}
|
||||
return active;
|
||||
};
|
||||
|
||||
/**
|
||||
* Find next available port starting from 8001
|
||||
*/
|
||||
const getNextPort = (pod: Pod): number => {
|
||||
const usedPorts = Object.values(pod.models).map((m) => m.port);
|
||||
let port = 8001;
|
||||
while (usedPorts.includes(port)) {
|
||||
port++;
|
||||
}
|
||||
return port;
|
||||
};
|
||||
|
||||
/**
|
||||
* Select GPUs for model deployment (round-robin)
|
||||
*/
|
||||
const selectGPUs = (pod: Pod, count: number = 1): number[] => {
|
||||
if (count === pod.gpus.length) {
|
||||
// Use all GPUs
|
||||
return pod.gpus.map((g) => g.id);
|
||||
}
|
||||
|
||||
// Count GPU usage across all models
|
||||
const gpuUsage = new Map<number, number>();
|
||||
for (const gpu of pod.gpus) {
|
||||
gpuUsage.set(gpu.id, 0);
|
||||
}
|
||||
|
||||
for (const model of Object.values(pod.models)) {
|
||||
for (const gpuId of model.gpu) {
|
||||
gpuUsage.set(gpuId, (gpuUsage.get(gpuId) || 0) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort GPUs by usage (least used first)
|
||||
const sortedGPUs = Array.from(gpuUsage.entries())
|
||||
.sort((a, b) => a[1] - b[1])
|
||||
.map((entry) => entry[0]);
|
||||
|
||||
// Return the least used GPUs
|
||||
return sortedGPUs.slice(0, count);
|
||||
};
|
||||
|
||||
/**
|
||||
* Start a model
|
||||
*/
|
||||
export const startModel = async (
|
||||
modelId: string,
|
||||
name: string,
|
||||
options: {
|
||||
pod?: string;
|
||||
vllmArgs?: string[];
|
||||
memory?: string;
|
||||
context?: string;
|
||||
gpus?: number;
|
||||
},
|
||||
) => {
|
||||
const { name: podName, pod } = getPod(options.pod);
|
||||
|
||||
// Validation
|
||||
if (!pod.modelsPath) {
|
||||
console.error(chalk.red("Pod does not have a models path configured"));
|
||||
process.exit(1);
|
||||
}
|
||||
if (pod.models[name]) {
|
||||
console.error(chalk.red(`Model '${name}' already exists on pod '${podName}'`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const port = getNextPort(pod);
|
||||
|
||||
// Determine GPU allocation and vLLM args
|
||||
let gpus: number[] = [];
|
||||
let vllmArgs: string[] = [];
|
||||
let modelConfig = null;
|
||||
|
||||
if (options.vllmArgs?.length) {
|
||||
// Custom args override everything
|
||||
vllmArgs = options.vllmArgs;
|
||||
console.log(chalk.gray("Using custom vLLM args, GPU allocation managed by vLLM"));
|
||||
} else if (isKnownModel(modelId)) {
|
||||
// Handle --gpus parameter for known models
|
||||
if (options.gpus) {
|
||||
// Validate GPU count
|
||||
if (options.gpus > pod.gpus.length) {
|
||||
console.error(chalk.red(`Error: Requested ${options.gpus} GPUs but pod only has ${pod.gpus.length}`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Try to find config for requested GPU count
|
||||
modelConfig = getModelConfig(modelId, pod.gpus, options.gpus);
|
||||
if (modelConfig) {
|
||||
gpus = selectGPUs(pod, options.gpus);
|
||||
vllmArgs = [...(modelConfig.args || [])];
|
||||
} else {
|
||||
console.error(
|
||||
chalk.red(`Model '${getModelName(modelId)}' does not have a configuration for ${options.gpus} GPU(s)`),
|
||||
);
|
||||
console.error(chalk.yellow("Available configurations:"));
|
||||
|
||||
// Show available configurations
|
||||
for (let gpuCount = 1; gpuCount <= pod.gpus.length; gpuCount++) {
|
||||
const config = getModelConfig(modelId, pod.gpus, gpuCount);
|
||||
if (config) {
|
||||
console.error(chalk.gray(` - ${gpuCount} GPU(s)`));
|
||||
}
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
// Find best config for this hardware (original behavior)
|
||||
for (let gpuCount = pod.gpus.length; gpuCount >= 1; gpuCount--) {
|
||||
modelConfig = getModelConfig(modelId, pod.gpus, gpuCount);
|
||||
if (modelConfig) {
|
||||
gpus = selectGPUs(pod, gpuCount);
|
||||
vllmArgs = [...(modelConfig.args || [])];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!modelConfig) {
|
||||
console.error(chalk.red(`Model '${getModelName(modelId)}' not compatible with this pod's GPUs`));
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unknown model
|
||||
if (options.gpus) {
|
||||
console.error(chalk.red("Error: --gpus can only be used with predefined models"));
|
||||
console.error(chalk.yellow("For custom models, use --vllm with tensor-parallel-size or similar arguments"));
|
||||
process.exit(1);
|
||||
}
|
||||
// Single GPU default
|
||||
gpus = selectGPUs(pod, 1);
|
||||
console.log(chalk.gray("Unknown model, defaulting to single GPU"));
|
||||
}
|
||||
|
||||
// Apply memory/context overrides
|
||||
if (!options.vllmArgs?.length) {
|
||||
if (options.memory) {
|
||||
const fraction = parseFloat(options.memory.replace("%", "")) / 100;
|
||||
vllmArgs = vllmArgs.filter((arg) => !arg.includes("gpu-memory-utilization"));
|
||||
vllmArgs.push("--gpu-memory-utilization", String(fraction));
|
||||
}
|
||||
if (options.context) {
|
||||
const contextSizes: Record<string, number> = {
|
||||
"4k": 4096,
|
||||
"8k": 8192,
|
||||
"16k": 16384,
|
||||
"32k": 32768,
|
||||
"64k": 65536,
|
||||
"128k": 131072,
|
||||
};
|
||||
const maxTokens = contextSizes[options.context.toLowerCase()] || parseInt(options.context);
|
||||
vllmArgs = vllmArgs.filter((arg) => !arg.includes("max-model-len"));
|
||||
vllmArgs.push("--max-model-len", String(maxTokens));
|
||||
}
|
||||
}
|
||||
|
||||
// Show what we're doing
|
||||
console.log(chalk.green(`Starting model '${name}' on pod '${podName}'...`));
|
||||
console.log(`Model: ${modelId}`);
|
||||
console.log(`Port: ${port}`);
|
||||
console.log(`GPU(s): ${gpus.length ? gpus.join(", ") : "Managed by vLLM"}`);
|
||||
if (modelConfig?.notes) console.log(chalk.yellow(`Note: ${modelConfig.notes}`));
|
||||
console.log("");
|
||||
|
||||
// Read and customize model_run.sh script with our values
|
||||
const scriptPath = join(dirname(fileURLToPath(import.meta.url)), "../../scripts/model_run.sh");
|
||||
let scriptContent = readFileSync(scriptPath, "utf-8");
|
||||
|
||||
// Replace placeholders - no escaping needed, heredoc with 'EOF' is literal
|
||||
scriptContent = scriptContent
|
||||
.replace("{{MODEL_ID}}", modelId)
|
||||
.replace("{{NAME}}", name)
|
||||
.replace("{{PORT}}", String(port))
|
||||
.replace("{{VLLM_ARGS}}", vllmArgs.join(" "));
|
||||
|
||||
// Upload customized script
|
||||
const result = await sshExec(
|
||||
pod.ssh,
|
||||
`cat > /tmp/model_run_${name}.sh << 'EOF'
|
||||
${scriptContent}
|
||||
EOF
|
||||
chmod +x /tmp/model_run_${name}.sh`,
|
||||
);
|
||||
|
||||
// Prepare environment
|
||||
const env = [
|
||||
`HF_TOKEN='${process.env.HF_TOKEN}'`,
|
||||
`PI_API_KEY='${process.env.PI_API_KEY}'`,
|
||||
`HF_HUB_ENABLE_HF_TRANSFER=1`,
|
||||
`VLLM_NO_USAGE_STATS=1`,
|
||||
`PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True`,
|
||||
`FORCE_COLOR=1`,
|
||||
`TERM=xterm-256color`,
|
||||
...(gpus.length === 1 ? [`CUDA_VISIBLE_DEVICES=${gpus[0]}`] : []),
|
||||
...Object.entries(modelConfig?.env || {}).map(([k, v]) => `${k}='${v}'`),
|
||||
]
|
||||
.map((e) => `export ${e}`)
|
||||
.join("\n");
|
||||
|
||||
// Start the model runner with script command for pseudo-TTY (preserves colors)
|
||||
// Note: We use script to preserve colors and create a log file
|
||||
// setsid creates a new session so it survives SSH disconnection
|
||||
const startCmd = `
|
||||
${env}
|
||||
mkdir -p ~/.vllm_logs
|
||||
# Create a wrapper that monitors the script command
|
||||
cat > /tmp/model_wrapper_${name}.sh << 'WRAPPER'
|
||||
#!/bin/bash
|
||||
script -q -f -c "/tmp/model_run_${name}.sh" ~/.vllm_logs/${name}.log
|
||||
exit_code=$?
|
||||
echo "Script exited with code $exit_code" >> ~/.vllm_logs/${name}.log
|
||||
exit $exit_code
|
||||
WRAPPER
|
||||
chmod +x /tmp/model_wrapper_${name}.sh
|
||||
setsid /tmp/model_wrapper_${name}.sh </dev/null >/dev/null 2>&1 &
|
||||
echo $!
|
||||
exit 0
|
||||
`;
|
||||
|
||||
const pidResult = await sshExec(pod.ssh, startCmd);
|
||||
const pid = parseInt(pidResult.stdout.trim());
|
||||
if (!pid) {
|
||||
console.error(chalk.red("Failed to start model runner"));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Save to config
|
||||
const config = loadConfig();
|
||||
config.pods[podName].models[name] = { model: modelId, port, gpu: gpus, pid };
|
||||
saveConfig(config);
|
||||
|
||||
console.log(`Model runner started with PID: ${pid}`);
|
||||
console.log("Streaming logs... (waiting for startup)\n");
|
||||
|
||||
// Small delay to ensure log file is created
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
// Stream logs with color support, watching for startup complete
|
||||
const sshParts = pod.ssh.split(" ");
|
||||
const sshCommand = sshParts[0]; // "ssh"
|
||||
const sshArgs = sshParts.slice(1); // ["root@86.38.238.55"]
|
||||
const host = sshArgs[0].split("@")[1] || "localhost";
|
||||
const tailCmd = `tail -f ~/.vllm_logs/${name}.log`;
|
||||
|
||||
// Build the full args array for spawn
|
||||
const fullArgs = [...sshArgs, tailCmd];
|
||||
|
||||
const logProcess = spawn(sshCommand, fullArgs, {
|
||||
stdio: ["inherit", "pipe", "pipe"], // capture stdout and stderr
|
||||
env: { ...process.env, FORCE_COLOR: "1" },
|
||||
});
|
||||
|
||||
let interrupted = false;
|
||||
let startupComplete = false;
|
||||
|
||||
// Handle Ctrl+C
|
||||
const sigintHandler = () => {
|
||||
interrupted = true;
|
||||
logProcess.kill();
|
||||
};
|
||||
process.on("SIGINT", sigintHandler);
|
||||
|
||||
// Process log output line by line
|
||||
const processOutput = (data: Buffer) => {
|
||||
const lines = data.toString().split("\n");
|
||||
for (const line of lines) {
|
||||
if (line) {
|
||||
console.log(line); // Echo the line to console
|
||||
|
||||
// Check for startup complete message
|
||||
if (line.includes("Application startup complete")) {
|
||||
startupComplete = true;
|
||||
logProcess.kill(); // Stop tailing logs
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
logProcess.stdout?.on("data", processOutput);
|
||||
logProcess.stderr?.on("data", processOutput);
|
||||
|
||||
await new Promise<void>((resolve) => logProcess.on("exit", resolve));
|
||||
process.removeListener("SIGINT", sigintHandler);
|
||||
|
||||
if (startupComplete) {
|
||||
// Model started successfully - output connection details
|
||||
console.log("\n" + chalk.green("✓ Model started successfully!"));
|
||||
console.log("\n" + chalk.bold("Connection Details:"));
|
||||
console.log(chalk.cyan("─".repeat(50)));
|
||||
console.log(chalk.white("Base URL: ") + chalk.yellow(`http://${host}:${port}/v1`));
|
||||
console.log(chalk.white("Model: ") + chalk.yellow(modelId));
|
||||
console.log(chalk.white("API Key: ") + chalk.yellow(process.env.PI_API_KEY || "(not set)"));
|
||||
console.log(chalk.cyan("─".repeat(50)));
|
||||
|
||||
console.log("\n" + chalk.bold("Export for shell:"));
|
||||
console.log(chalk.gray(`export OPENAI_BASE_URL="http://${host}:${port}/v1"`));
|
||||
console.log(chalk.gray(`export OPENAI_API_KEY="${process.env.PI_API_KEY || "your-api-key"}"`));
|
||||
console.log(chalk.gray(`export OPENAI_MODEL="${modelId}"`));
|
||||
|
||||
console.log("\n" + chalk.bold("Example usage:"));
|
||||
console.log(
|
||||
chalk.gray(`
|
||||
# Python
|
||||
from openai import OpenAI
|
||||
client = OpenAI() # Uses env vars
|
||||
response = client.chat.completions.create(
|
||||
model="${modelId}",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
|
||||
# CLI
|
||||
curl $OPENAI_BASE_URL/chat/completions \\
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"model":"${modelId}","messages":[{"role":"user","content":"Hi"}]}'`),
|
||||
);
|
||||
console.log("");
|
||||
console.log(chalk.cyan(`Chat with model: pi agent ${name} "Your message"`));
|
||||
console.log(chalk.cyan(`Interactive mode: pi agent ${name} -i`));
|
||||
console.log(chalk.cyan(`Monitor logs: pi logs ${name}`));
|
||||
console.log(chalk.cyan(`Stop model: pi stop ${name}`));
|
||||
} else if (interrupted) {
|
||||
console.log(chalk.yellow("\n\nStopped monitoring. Model deployment continues in background."));
|
||||
console.log(chalk.cyan(`Chat with model: pi agent ${name} "Your message"`));
|
||||
console.log(chalk.cyan(`Check status: pi logs ${name}`));
|
||||
console.log(chalk.cyan(`Stop model: pi stop ${name}`));
|
||||
} else {
|
||||
console.log(chalk.yellow("\n\nLog stream ended. Model may still be running."));
|
||||
console.log(chalk.cyan(`Chat with model: pi agent ${name} "Your message"`));
|
||||
console.log(chalk.cyan(`Check status: pi logs ${name}`));
|
||||
console.log(chalk.cyan(`Stop model: pi stop ${name}`));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Stop a model
|
||||
*/
|
||||
export const stopModel = async (name: string, options: { pod?: string }) => {
|
||||
const { name: podName, pod } = getPod(options.pod);
|
||||
|
||||
const model = pod.models[name];
|
||||
if (!model) {
|
||||
console.error(chalk.red(`Model '${name}' not found on pod '${podName}'`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(chalk.yellow(`Stopping model '${name}' on pod '${podName}'...`));
|
||||
|
||||
// Kill the script process and all its children
|
||||
// Using pkill to kill the process and all children
|
||||
const killCmd = `
|
||||
# Kill the script process and all its children
|
||||
pkill -TERM -P ${model.pid} 2>/dev/null || true
|
||||
kill ${model.pid} 2>/dev/null || true
|
||||
`;
|
||||
await sshExec(pod.ssh, killCmd);
|
||||
|
||||
// Remove from config
|
||||
const config = loadConfig();
|
||||
delete config.pods[podName].models[name];
|
||||
saveConfig(config);
|
||||
|
||||
console.log(chalk.green(`✓ Model '${name}' stopped`));
|
||||
};
|
||||
|
||||
/**
|
||||
* Stop all models on a pod
|
||||
*/
|
||||
export const stopAllModels = async (options: { pod?: string }) => {
|
||||
const { name: podName, pod } = getPod(options.pod);
|
||||
|
||||
const modelNames = Object.keys(pod.models);
|
||||
if (modelNames.length === 0) {
|
||||
console.log(`No models running on pod '${podName}'`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(chalk.yellow(`Stopping ${modelNames.length} model(s) on pod '${podName}'...`));
|
||||
|
||||
// Kill all script processes and their children
|
||||
const pids = Object.values(pod.models).map((m) => m.pid);
|
||||
const killCmd = `
|
||||
for PID in ${pids.join(" ")}; do
|
||||
pkill -TERM -P $PID 2>/dev/null || true
|
||||
kill $PID 2>/dev/null || true
|
||||
done
|
||||
`;
|
||||
await sshExec(pod.ssh, killCmd);
|
||||
|
||||
// Clear all models from config
|
||||
const config = loadConfig();
|
||||
config.pods[podName].models = {};
|
||||
saveConfig(config);
|
||||
|
||||
console.log(chalk.green(`✓ Stopped all models: ${modelNames.join(", ")}`));
|
||||
};
|
||||
|
||||
/**
|
||||
* List all models
|
||||
*/
|
||||
export const listModels = async (options: { pod?: string }) => {
|
||||
const { name: podName, pod } = getPod(options.pod);
|
||||
|
||||
const modelNames = Object.keys(pod.models);
|
||||
if (modelNames.length === 0) {
|
||||
console.log(`No models running on pod '${podName}'`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Get pod SSH host for URL display
|
||||
const sshParts = pod.ssh.split(" ");
|
||||
const host = sshParts.find((p) => p.includes("@"))?.split("@")[1] || "unknown";
|
||||
|
||||
console.log(`Models on pod '${chalk.bold(podName)}':`);
|
||||
for (const name of modelNames) {
|
||||
const model = pod.models[name];
|
||||
const gpuStr =
|
||||
model.gpu.length > 1
|
||||
? `GPUs ${model.gpu.join(",")}`
|
||||
: model.gpu.length === 1
|
||||
? `GPU ${model.gpu[0]}`
|
||||
: "GPU unknown";
|
||||
console.log(` ${chalk.green(name)} - Port ${model.port} - ${gpuStr} - PID ${model.pid}`);
|
||||
console.log(` Model: ${chalk.gray(model.model)}`);
|
||||
console.log(` URL: ${chalk.cyan(`http://${host}:${model.port}/v1`)}`);
|
||||
}
|
||||
|
||||
// Optionally verify processes are still running
|
||||
console.log("");
|
||||
console.log("Verifying processes...");
|
||||
let anyDead = false;
|
||||
for (const name of modelNames) {
|
||||
const model = pod.models[name];
|
||||
// Check both the wrapper process and if vLLM is responding
|
||||
const checkCmd = `
|
||||
# Check if wrapper process exists
|
||||
if ps -p ${model.pid} > /dev/null 2>&1; then
|
||||
# Process exists, now check if vLLM is responding
|
||||
if curl -s -f http://localhost:${model.port}/health > /dev/null 2>&1; then
|
||||
echo "running"
|
||||
else
|
||||
# Check if it's still starting up
|
||||
if tail -n 20 ~/.vllm_logs/${name}.log 2>/dev/null | grep -q "ERROR\\|Failed\\|Cuda error\\|died"; then
|
||||
echo "crashed"
|
||||
else
|
||||
echo "starting"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "dead"
|
||||
fi
|
||||
`;
|
||||
const result = await sshExec(pod.ssh, checkCmd);
|
||||
const status = result.stdout.trim();
|
||||
if (status === "dead") {
|
||||
console.log(chalk.red(` ${name}: Process ${model.pid} is not running`));
|
||||
anyDead = true;
|
||||
} else if (status === "crashed") {
|
||||
console.log(chalk.red(` ${name}: vLLM crashed (check logs with 'pi logs ${name}')`));
|
||||
anyDead = true;
|
||||
} else if (status === "starting") {
|
||||
console.log(chalk.yellow(` ${name}: Still starting up...`));
|
||||
}
|
||||
}
|
||||
|
||||
if (anyDead) {
|
||||
console.log("");
|
||||
console.log(chalk.yellow("Some models are not running. Clean up with:"));
|
||||
console.log(chalk.cyan(" pi stop <name>"));
|
||||
} else {
|
||||
console.log(chalk.green("✓ All processes verified"));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* View model logs
|
||||
*/
|
||||
export const viewLogs = async (name: string, options: { pod?: string }) => {
|
||||
const { name: podName, pod } = getPod(options.pod);
|
||||
|
||||
const model = pod.models[name];
|
||||
if (!model) {
|
||||
console.error(chalk.red(`Model '${name}' not found on pod '${podName}'`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(chalk.green(`Streaming logs for '${name}' on pod '${podName}'...`));
|
||||
console.log(chalk.gray("Press Ctrl+C to stop"));
|
||||
console.log("");
|
||||
|
||||
// Stream logs with color preservation
|
||||
const sshParts = pod.ssh.split(" ");
|
||||
const sshCommand = sshParts[0]; // "ssh"
|
||||
const sshArgs = sshParts.slice(1); // ["root@86.38.238.55"]
|
||||
const tailCmd = `tail -f ~/.vllm_logs/${name}.log`;
|
||||
|
||||
const logProcess = spawn(sshCommand, [...sshArgs, tailCmd], {
|
||||
stdio: "inherit",
|
||||
env: {
|
||||
...process.env,
|
||||
FORCE_COLOR: "1",
|
||||
},
|
||||
});
|
||||
|
||||
// Wait for process to exit
|
||||
await new Promise<void>((resolve) => {
|
||||
logProcess.on("exit", () => resolve());
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Show known models and their hardware requirements
|
||||
*/
|
||||
export const showKnownModels = async () => {
|
||||
const modelsJson = await import("../models.json", { assert: { type: "json" } });
|
||||
const models = modelsJson.default.models;
|
||||
|
||||
// Get active pod info if available
|
||||
const activePod = getActivePod();
|
||||
let podGpuCount = 0;
|
||||
let podGpuType = "";
|
||||
|
||||
if (activePod) {
|
||||
podGpuCount = activePod.pod.gpus.length;
|
||||
// Extract GPU type from name (e.g., "NVIDIA H200" -> "H200")
|
||||
podGpuType = activePod.pod.gpus[0]?.name?.replace("NVIDIA", "")?.trim()?.split(" ")[0] || "";
|
||||
|
||||
console.log(chalk.bold(`Known Models for ${activePod.name} (${podGpuCount}x ${podGpuType || "GPU"}):\n`));
|
||||
} else {
|
||||
console.log(chalk.bold("Known Models:\n"));
|
||||
console.log(chalk.yellow("No active pod. Use 'pi pods active <name>' to filter compatible models.\n"));
|
||||
}
|
||||
|
||||
console.log("Usage: pi start <model> --name <name> [options]\n");
|
||||
|
||||
// Group models by compatibility and family
|
||||
const compatible: Record<string, Array<{ id: string; name: string; config: string; notes?: string }>> = {};
|
||||
const incompatible: Record<string, Array<{ id: string; name: string; minGpu: string; notes?: string }>> = {};
|
||||
|
||||
for (const [modelId, info] of Object.entries(models)) {
|
||||
const modelInfo = info as any;
|
||||
const family = modelInfo.name.split("-")[0] || "Other";
|
||||
|
||||
let isCompatible = false;
|
||||
let compatibleConfig = "";
|
||||
let minGpu = "Unknown";
|
||||
let minNotes: string | undefined;
|
||||
|
||||
if (modelInfo.configs && modelInfo.configs.length > 0) {
|
||||
// Sort configs by GPU count to find minimum
|
||||
const sortedConfigs = [...modelInfo.configs].sort((a: any, b: any) => (a.gpuCount || 1) - (b.gpuCount || 1));
|
||||
|
||||
// Find minimum requirements
|
||||
const minConfig = sortedConfigs[0];
|
||||
const minGpuCount = minConfig.gpuCount || 1;
|
||||
const gpuTypes = minConfig.gpuTypes?.join("/") || "H100/H200";
|
||||
|
||||
if (minGpuCount === 1) {
|
||||
minGpu = `1x ${gpuTypes}`;
|
||||
} else {
|
||||
minGpu = `${minGpuCount}x ${gpuTypes}`;
|
||||
}
|
||||
|
||||
minNotes = minConfig.notes || modelInfo.notes;
|
||||
|
||||
// Check compatibility with active pod
|
||||
if (activePod && podGpuCount > 0) {
|
||||
// Find best matching config for this pod
|
||||
for (const config of sortedConfigs) {
|
||||
const configGpuCount = config.gpuCount || 1;
|
||||
const configGpuTypes = config.gpuTypes || [];
|
||||
|
||||
// Check if we have enough GPUs
|
||||
if (configGpuCount <= podGpuCount) {
|
||||
// Check if GPU type matches (if specified)
|
||||
if (
|
||||
configGpuTypes.length === 0 ||
|
||||
configGpuTypes.some((type: string) => podGpuType.includes(type) || type.includes(podGpuType))
|
||||
) {
|
||||
isCompatible = true;
|
||||
if (configGpuCount === 1) {
|
||||
compatibleConfig = `1x ${podGpuType}`;
|
||||
} else {
|
||||
compatibleConfig = `${configGpuCount}x ${podGpuType}`;
|
||||
}
|
||||
minNotes = config.notes || modelInfo.notes;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const modelEntry = {
|
||||
id: modelId,
|
||||
name: modelInfo.name,
|
||||
notes: minNotes,
|
||||
};
|
||||
|
||||
if (activePod && isCompatible) {
|
||||
if (!compatible[family]) {
|
||||
compatible[family] = [];
|
||||
}
|
||||
compatible[family].push({ ...modelEntry, config: compatibleConfig });
|
||||
} else {
|
||||
if (!incompatible[family]) {
|
||||
incompatible[family] = [];
|
||||
}
|
||||
incompatible[family].push({ ...modelEntry, minGpu });
|
||||
}
|
||||
}
|
||||
|
||||
// Display compatible models first
|
||||
if (activePod && Object.keys(compatible).length > 0) {
|
||||
console.log(chalk.green.bold("✓ Compatible Models:\n"));
|
||||
|
||||
const sortedFamilies = Object.keys(compatible).sort();
|
||||
for (const family of sortedFamilies) {
|
||||
console.log(chalk.cyan(`${family} Models:`));
|
||||
|
||||
const modelList = compatible[family].sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
for (const model of modelList) {
|
||||
console.log(` ${chalk.green(model.id)}`);
|
||||
console.log(` Name: ${model.name}`);
|
||||
console.log(` Config: ${model.config}`);
|
||||
if (model.notes) {
|
||||
console.log(chalk.gray(` Note: ${model.notes}`));
|
||||
}
|
||||
console.log("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Display incompatible models
|
||||
if (Object.keys(incompatible).length > 0) {
|
||||
if (activePod && Object.keys(compatible).length > 0) {
|
||||
console.log(chalk.red.bold("✗ Incompatible Models (need more/different GPUs):\n"));
|
||||
}
|
||||
|
||||
const sortedFamilies = Object.keys(incompatible).sort();
|
||||
for (const family of sortedFamilies) {
|
||||
if (!activePod) {
|
||||
console.log(chalk.cyan(`${family} Models:`));
|
||||
} else {
|
||||
console.log(chalk.gray(`${family} Models:`));
|
||||
}
|
||||
|
||||
const modelList = incompatible[family].sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
for (const model of modelList) {
|
||||
const color = activePod ? chalk.gray : chalk.green;
|
||||
console.log(` ${color(model.id)}`);
|
||||
console.log(chalk.gray(` Name: ${model.name}`));
|
||||
console.log(chalk.gray(` Min Hardware: ${model.minGpu}`));
|
||||
if (model.notes && !activePod) {
|
||||
console.log(chalk.gray(` Note: ${model.notes}`));
|
||||
}
|
||||
if (activePod) {
|
||||
console.log(""); // Less verbose for incompatible models when filtered
|
||||
} else {
|
||||
console.log("");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(chalk.gray("\nFor unknown models, defaults to single GPU deployment."));
|
||||
console.log(chalk.gray("Use --vllm to pass custom arguments to vLLM."));
|
||||
};
|
||||
205
packages/pods/src/commands/pods.ts
Normal file
205
packages/pods/src/commands/pods.ts
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
import chalk from "chalk";
|
||||
import { dirname, join } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { addPod, loadConfig, removePod, setActivePod } from "../config.js";
|
||||
import { scpFile, sshExec, sshExecStream } from "../ssh.js";
|
||||
import type { GPU, Pod } from "../types.js";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
/**
|
||||
* List all pods
|
||||
*/
|
||||
export const listPods = () => {
|
||||
const config = loadConfig();
|
||||
const podNames = Object.keys(config.pods);
|
||||
|
||||
if (podNames.length === 0) {
|
||||
console.log("No pods configured. Use 'pi pods setup' to add a pod.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("Configured pods:");
|
||||
for (const name of podNames) {
|
||||
const pod = config.pods[name];
|
||||
const isActive = config.active === name;
|
||||
const marker = isActive ? chalk.green("*") : " ";
|
||||
const gpuCount = pod.gpus?.length || 0;
|
||||
const gpuInfo = gpuCount > 0 ? `${gpuCount}x ${pod.gpus[0].name}` : "no GPUs detected";
|
||||
const vllmInfo = pod.vllmVersion ? ` (vLLM: ${pod.vllmVersion})` : "";
|
||||
console.log(`${marker} ${chalk.bold(name)} - ${gpuInfo}${vllmInfo} - ${pod.ssh}`);
|
||||
if (pod.modelsPath) {
|
||||
console.log(` Models: ${pod.modelsPath}`);
|
||||
}
|
||||
if (pod.vllmVersion === "gpt-oss") {
|
||||
console.log(chalk.yellow(` ⚠️ GPT-OSS build - only for GPT-OSS models`));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Setup a new pod
|
||||
*/
|
||||
export const setupPod = async (
|
||||
name: string,
|
||||
sshCmd: string,
|
||||
options: { mount?: string; modelsPath?: string; vllm?: "release" | "nightly" | "gpt-oss" },
|
||||
) => {
|
||||
// Validate environment variables
|
||||
const hfToken = process.env.HF_TOKEN;
|
||||
const vllmApiKey = process.env.PI_API_KEY;
|
||||
|
||||
if (!hfToken) {
|
||||
console.error(chalk.red("ERROR: HF_TOKEN environment variable is required"));
|
||||
console.error("Get a token from: https://huggingface.co/settings/tokens");
|
||||
console.error("Then run: export HF_TOKEN=your_token_here");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!vllmApiKey) {
|
||||
console.error(chalk.red("ERROR: PI_API_KEY environment variable is required"));
|
||||
console.error("Set an API key: export PI_API_KEY=your_api_key_here");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Determine models path
|
||||
let modelsPath = options.modelsPath;
|
||||
if (!modelsPath && options.mount) {
|
||||
// Extract path from mount command if not explicitly provided
|
||||
// e.g., "mount -t nfs ... /mnt/sfs" -> "/mnt/sfs"
|
||||
const parts = options.mount.split(" ");
|
||||
modelsPath = parts[parts.length - 1];
|
||||
}
|
||||
|
||||
if (!modelsPath) {
|
||||
console.error(chalk.red("ERROR: --models-path is required (or must be extractable from --mount)"));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(chalk.green(`Setting up pod '${name}'...`));
|
||||
console.log(`SSH: ${sshCmd}`);
|
||||
console.log(`Models path: ${modelsPath}`);
|
||||
console.log(
|
||||
`vLLM version: ${options.vllm || "release"} ${options.vllm === "gpt-oss" ? chalk.yellow("(GPT-OSS special build)") : ""}`,
|
||||
);
|
||||
if (options.mount) {
|
||||
console.log(`Mount command: ${options.mount}`);
|
||||
}
|
||||
console.log("");
|
||||
|
||||
// Test SSH connection
|
||||
console.log("Testing SSH connection...");
|
||||
const testResult = await sshExec(sshCmd, "echo 'SSH OK'");
|
||||
if (testResult.exitCode !== 0) {
|
||||
console.error(chalk.red("Failed to connect via SSH"));
|
||||
console.error(testResult.stderr);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(chalk.green("✓ SSH connection successful"));
|
||||
|
||||
// Copy setup script
|
||||
console.log("Copying setup script...");
|
||||
const scriptPath = join(__dirname, "../../scripts/pod_setup.sh");
|
||||
const success = await scpFile(sshCmd, scriptPath, "/tmp/pod_setup.sh");
|
||||
if (!success) {
|
||||
console.error(chalk.red("Failed to copy setup script"));
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(chalk.green("✓ Setup script copied"));
|
||||
|
||||
// Build setup command
|
||||
let setupCmd = `bash /tmp/pod_setup.sh --models-path '${modelsPath}' --hf-token '${hfToken}' --vllm-api-key '${vllmApiKey}'`;
|
||||
if (options.mount) {
|
||||
setupCmd += ` --mount '${options.mount}'`;
|
||||
}
|
||||
// Add vLLM version flag
|
||||
const vllmVersion = options.vllm || "release";
|
||||
setupCmd += ` --vllm '${vllmVersion}'`;
|
||||
|
||||
// Run setup script
|
||||
console.log("");
|
||||
console.log(chalk.yellow("Running setup (this will take 2-5 minutes)..."));
|
||||
console.log("");
|
||||
|
||||
// Use forceTTY to preserve colors from apt, pip, etc.
|
||||
const exitCode = await sshExecStream(sshCmd, setupCmd, { forceTTY: true });
|
||||
if (exitCode !== 0) {
|
||||
console.error(chalk.red("\nSetup failed. Check the output above for errors."));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Parse GPU info from setup output
|
||||
console.log("");
|
||||
console.log("Detecting GPU configuration...");
|
||||
const gpuResult = await sshExec(sshCmd, "nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader");
|
||||
|
||||
const gpus: GPU[] = [];
|
||||
if (gpuResult.exitCode === 0 && gpuResult.stdout) {
|
||||
const lines = gpuResult.stdout.trim().split("\n");
|
||||
for (const line of lines) {
|
||||
const [id, name, memory] = line.split(",").map((s) => s.trim());
|
||||
if (id !== undefined) {
|
||||
gpus.push({
|
||||
id: parseInt(id),
|
||||
name: name || "Unknown",
|
||||
memory: memory || "Unknown",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(chalk.green(`✓ Detected ${gpus.length} GPU(s)`));
|
||||
for (const gpu of gpus) {
|
||||
console.log(` GPU ${gpu.id}: ${gpu.name} (${gpu.memory})`);
|
||||
}
|
||||
|
||||
// Save pod configuration
|
||||
const pod: Pod = {
|
||||
ssh: sshCmd,
|
||||
gpus,
|
||||
models: {},
|
||||
modelsPath,
|
||||
vllmVersion: options.vllm || "release",
|
||||
};
|
||||
|
||||
addPod(name, pod);
|
||||
console.log("");
|
||||
console.log(chalk.green(`✓ Pod '${name}' setup complete and set as active pod`));
|
||||
console.log("");
|
||||
console.log("You can now deploy models with:");
|
||||
console.log(chalk.cyan(` pi start <model> --name <name>`));
|
||||
};
|
||||
|
||||
/**
|
||||
* Switch active pod
|
||||
*/
|
||||
export const switchActivePod = (name: string) => {
|
||||
const config = loadConfig();
|
||||
if (!config.pods[name]) {
|
||||
console.error(chalk.red(`Pod '${name}' not found`));
|
||||
console.log("\nAvailable pods:");
|
||||
for (const podName of Object.keys(config.pods)) {
|
||||
console.log(` ${podName}`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
setActivePod(name);
|
||||
console.log(chalk.green(`✓ Switched active pod to '${name}'`));
|
||||
};
|
||||
|
||||
/**
|
||||
* Remove a pod from config
|
||||
*/
|
||||
export const removePodCommand = (name: string) => {
|
||||
const config = loadConfig();
|
||||
if (!config.pods[name]) {
|
||||
console.error(chalk.red(`Pod '${name}' not found`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
removePod(name);
|
||||
console.log(chalk.green(`✓ Removed pod '${name}' from configuration`));
|
||||
console.log(chalk.yellow("Note: This only removes the local configuration. The remote pod is not affected."));
|
||||
};
|
||||
85
packages/pods/src/commands/prompt.ts
Normal file
85
packages/pods/src/commands/prompt.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { main as agentMain } from "@mariozechner/pi-agent";
|
||||
import chalk from "chalk";
|
||||
import { getActivePod, loadConfig } from "../config.js";
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────────
|
||||
// Types
|
||||
// ────────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
interface PromptOptions {
|
||||
pod?: string;
|
||||
apiKey?: string;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────────
|
||||
// Main prompt function
|
||||
// ────────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function promptModel(modelName: string, userArgs: string[], opts: PromptOptions = {}) {
|
||||
// Get pod and model configuration
|
||||
const activePod = opts.pod ? { name: opts.pod, pod: loadConfig().pods[opts.pod] } : getActivePod();
|
||||
|
||||
if (!activePod) {
|
||||
console.error(chalk.red("No active pod. Use 'pi pods active <name>' to set one."));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const { name: podName, pod } = activePod;
|
||||
const modelConfig = pod.models[modelName];
|
||||
|
||||
if (!modelConfig) {
|
||||
console.error(chalk.red(`Model '${modelName}' not found on pod '${podName}'`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Extract host from SSH string
|
||||
const host =
|
||||
pod.ssh
|
||||
.split(" ")
|
||||
.find((p) => p.includes("@"))
|
||||
?.split("@")[1] ?? "localhost";
|
||||
|
||||
// Build the system prompt for code navigation
|
||||
const systemPrompt = `You help the user understand and navigate the codebase in the current working directory.
|
||||
|
||||
You can read files, list directories, and execute shell commands via the respective tools.
|
||||
|
||||
Do not output file contents you read via the read_file tool directly, unless asked to.
|
||||
|
||||
Do not output markdown tables as part of your responses.
|
||||
|
||||
Keep your responses concise and relevant to the user's request.
|
||||
|
||||
File paths you output must include line numbers where possible, e.g. "src/index.ts:10-20" for lines 10 to 20 in src/index.ts.
|
||||
|
||||
Current working directory: ${process.cwd()}`;
|
||||
|
||||
// Build arguments for agent main function
|
||||
const args: string[] = [];
|
||||
|
||||
// Add base configuration that we control
|
||||
args.push(
|
||||
"--base-url",
|
||||
`http://${host}:${modelConfig.port}/v1`,
|
||||
"--model",
|
||||
modelConfig.model,
|
||||
"--api-key",
|
||||
opts.apiKey || process.env.PI_API_KEY || "dummy",
|
||||
"--api",
|
||||
modelConfig.model.toLowerCase().includes("gpt-oss") ? "responses" : "completions",
|
||||
"--system-prompt",
|
||||
systemPrompt,
|
||||
);
|
||||
|
||||
// Pass through all user-provided arguments
|
||||
// This includes messages, --continue, --json, etc.
|
||||
args.push(...userArgs);
|
||||
|
||||
// Call agent main function directly
|
||||
try {
|
||||
await agentMain(args);
|
||||
} catch (err: any) {
|
||||
console.error(chalk.red(`Agent error: ${err.message}`));
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue