From 5c64043892d991280b76a1a4b65f3b6179427839 Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Wed, 29 Apr 2026 04:16:44 +0000 Subject: [PATCH] feat: add Rocky 9.7 prerequisites and GPU passthrough for ollama container --- deploy-docker.sh | 125 ++++++++++++++++++++++++++++++++++----------- docker-compose.yml | 7 +++ 2 files changed, 101 insertions(+), 31 deletions(-) diff --git a/deploy-docker.sh b/deploy-docker.sh index 50bda39..1aff84f 100755 --- a/deploy-docker.sh +++ b/deploy-docker.sh @@ -39,7 +39,93 @@ done echo "=== Stonks Oracle Docker Deployment ===" echo " Target: ${REMOTE_HOST}:${REMOTE_DIR}" echo " Model: ${OLLAMA_MODEL}" -echo " Ollama: ${OLLAMA_URL:-auto-detect}" +echo " Ollama: Docker container (GPU-accelerated)" +echo "" + +# ------------------------------------------------------- +# Step 0: Ensure prerequisites on Rocky 9.7 +# ------------------------------------------------------- +echo "--- Step 0: Checking prerequisites (Rocky 9.7) ---" +ssh "$REMOTE_HOST" bash -s <<'REMOTE_SCRIPT' +set -euo pipefail + +# Verify we're on a RHEL-compatible system +if ! grep -qi "rocky\|rhel\|centos" /etc/os-release 2>/dev/null; then + echo " ⚠ Warning: This script is designed for Rocky Linux 9.7 — detected different OS" +fi + +# --- Docker Engine --- +if command -v docker &>/dev/null && docker info &>/dev/null; then + echo " ✓ Docker already installed ($(docker --version | cut -d' ' -f3 | tr -d ','))" +else + echo " Installing Docker CE..." + sudo dnf -y install dnf-plugins-core + sudo dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo + sudo dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + sudo systemctl enable --now docker + # Add current user to docker group (takes effect on next login) + sudo usermod -aG docker "$(whoami)" || true + echo " ✓ Docker installed and started" +fi + +# --- Docker Compose plugin --- +if docker compose version &>/dev/null; then + echo " ✓ Docker Compose plugin available ($(docker compose version --short))" +else + echo " ERROR: docker compose plugin not found after Docker install" + exit 1 +fi + +# --- NVIDIA Driver --- +if ! command -v nvidia-smi &>/dev/null; then + echo " Installing NVIDIA drivers..." + sudo dnf -y install epel-release + sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo + sudo dnf -y module install nvidia-driver:latest-dkms + echo " ✓ NVIDIA driver installed (reboot may be required)" +else + echo " ✓ NVIDIA driver present ($(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1))" +fi + +# --- NVIDIA Container Toolkit --- +if command -v nvidia-ctk &>/dev/null; then + echo " ✓ NVIDIA Container Toolkit already installed" +else + echo " Installing NVIDIA Container Toolkit..." + curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \ + sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo > /dev/null + sudo dnf -y install nvidia-container-toolkit + # Configure Docker runtime for NVIDIA + sudo nvidia-ctk runtime configure --runtime=docker + sudo systemctl restart docker + echo " ✓ NVIDIA Container Toolkit installed and Docker configured" +fi + +# --- Verify GPU is accessible from Docker --- +if docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null; then + echo " ✓ GPU passthrough verified" +else + echo " ⚠ GPU passthrough test failed — NVIDIA Container Toolkit may need a reboot" + echo " Run: sudo reboot, then re-run this script" +fi + +# --- Git --- +if ! command -v git &>/dev/null; then + echo " Installing git..." + sudo dnf -y install git + echo " ✓ Git installed" +fi + +# --- Firewall (open required ports) --- +if command -v firewall-cmd &>/dev/null && systemctl is-active firewalld &>/dev/null; then + echo " Configuring firewall..." + for port in 3000 8001 8002 8003 8004 9000 9001 11434; do + sudo firewall-cmd --permanent --add-port="${port}/tcp" 2>/dev/null || true + done + sudo firewall-cmd --reload 2>/dev/null || true + echo " ✓ Firewall ports opened" +fi +REMOTE_SCRIPT echo "" # ------------------------------------------------------- @@ -70,35 +156,12 @@ echo "" # Step 2: Detect or configure Ollama # ------------------------------------------------------- echo "--- Step 2: Configuring Ollama ---" -if [ -z "$OLLAMA_URL" ]; then - # Auto-detect: check if Ollama is running on the remote host - OLLAMA_URL=$(ssh "$REMOTE_HOST" bash -s <<'DETECT_SCRIPT' -# Check common Ollama ports -for port in 11434 2701; do - if curl -sf --connect-timeout 2 "http://localhost:$port/api/tags" > /dev/null 2>&1; then - echo "http://localhost:$port" - exit 0 - fi -done -echo "" -DETECT_SCRIPT - ) - - if [ -n "$OLLAMA_URL" ]; then - echo " ✓ Found existing Ollama at: $OLLAMA_URL" - else - echo " No Ollama detected — will use Docker container" - OLLAMA_URL="http://ollama:11434" - fi -else - echo " Using provided Ollama URL: $OLLAMA_URL" -fi - -# Determine if we need the Docker Ollama container -USE_DOCKER_OLLAMA=false -if [ "$OLLAMA_URL" = "http://ollama:11434" ]; then - USE_DOCKER_OLLAMA=true -fi +# Always use the Docker Ollama container with GPU passthrough +# The ollama/ollama image ships with CUDA runtime built-in +USE_DOCKER_OLLAMA=true +OLLAMA_URL="http://ollama:11434" +echo " Using Docker Ollama container (GPU-accelerated via NVIDIA passthrough)" +echo " Host-accessible at localhost:11434" echo "" # ------------------------------------------------------- @@ -318,7 +381,7 @@ echo " Trading Engine: http://${REMOTE_IP}:8002" echo " Risk Engine: http://${REMOTE_IP}:8003" echo " MinIO Console: http://${REMOTE_IP}:9001" echo " Superset: http://${REMOTE_IP}:8088" -echo " Ollama: ${OLLAMA_URL}" +echo " Ollama: http://${REMOTE_IP}:11434" echo "" echo "Commands:" echo " ssh $REMOTE_HOST 'cd $REMOTE_DIR && docker compose logs -f'" diff --git a/docker-compose.yml b/docker-compose.yml index 2c979d9..52f19d4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -82,6 +82,13 @@ services: - "11434:11434" volumes: - ollama_models:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] trino: image: trinodb/trino:latest