feat: add Rocky 9.7 prerequisites and GPU passthrough for ollama container
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled

This commit is contained in:
Celes Renata
2026-04-29 04:16:44 +00:00
parent 11c6457559
commit 5c64043892
2 changed files with 101 additions and 31 deletions
+93 -30
View File
@@ -39,7 +39,93 @@ done
echo "=== Stonks Oracle Docker Deployment ==="
echo " Target: ${REMOTE_HOST}:${REMOTE_DIR}"
echo " Model: ${OLLAMA_MODEL}"
echo " Ollama: ${OLLAMA_URL:-auto-detect}"
echo " Ollama: Docker container (GPU-accelerated)"
echo ""
# -------------------------------------------------------
# Step 0: Ensure prerequisites on Rocky 9.7
# -------------------------------------------------------
echo "--- Step 0: Checking prerequisites (Rocky 9.7) ---"
ssh "$REMOTE_HOST" bash -s <<'REMOTE_SCRIPT'
set -euo pipefail
# Verify we're on a RHEL-compatible system
if ! grep -qi "rocky\|rhel\|centos" /etc/os-release 2>/dev/null; then
echo " ⚠ Warning: This script is designed for Rocky Linux 9.7 — detected different OS"
fi
# --- Docker Engine ---
if command -v docker &>/dev/null && docker info &>/dev/null; then
echo " ✓ Docker already installed ($(docker --version | cut -d' ' -f3 | tr -d ','))"
else
echo " Installing Docker CE..."
sudo dnf -y install dnf-plugins-core
sudo dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo
sudo dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
sudo systemctl enable --now docker
# Add current user to docker group (takes effect on next login)
sudo usermod -aG docker "$(whoami)" || true
echo " ✓ Docker installed and started"
fi
# --- Docker Compose plugin ---
if docker compose version &>/dev/null; then
echo " ✓ Docker Compose plugin available ($(docker compose version --short))"
else
echo " ERROR: docker compose plugin not found after Docker install"
exit 1
fi
# --- NVIDIA Driver ---
if ! command -v nvidia-smi &>/dev/null; then
echo " Installing NVIDIA drivers..."
sudo dnf -y install epel-release
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
sudo dnf -y module install nvidia-driver:latest-dkms
echo " ✓ NVIDIA driver installed (reboot may be required)"
else
echo " ✓ NVIDIA driver present ($(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1))"
fi
# --- NVIDIA Container Toolkit ---
if command -v nvidia-ctk &>/dev/null; then
echo " ✓ NVIDIA Container Toolkit already installed"
else
echo " Installing NVIDIA Container Toolkit..."
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo > /dev/null
sudo dnf -y install nvidia-container-toolkit
# Configure Docker runtime for NVIDIA
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
echo " ✓ NVIDIA Container Toolkit installed and Docker configured"
fi
# --- Verify GPU is accessible from Docker ---
if docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null; then
echo " ✓ GPU passthrough verified"
else
echo " ⚠ GPU passthrough test failed — NVIDIA Container Toolkit may need a reboot"
echo " Run: sudo reboot, then re-run this script"
fi
# --- Git ---
if ! command -v git &>/dev/null; then
echo " Installing git..."
sudo dnf -y install git
echo " ✓ Git installed"
fi
# --- Firewall (open required ports) ---
if command -v firewall-cmd &>/dev/null && systemctl is-active firewalld &>/dev/null; then
echo " Configuring firewall..."
for port in 3000 8001 8002 8003 8004 9000 9001 11434; do
sudo firewall-cmd --permanent --add-port="${port}/tcp" 2>/dev/null || true
done
sudo firewall-cmd --reload 2>/dev/null || true
echo " ✓ Firewall ports opened"
fi
REMOTE_SCRIPT
echo ""
# -------------------------------------------------------
@@ -70,35 +156,12 @@ echo ""
# Step 2: Detect or configure Ollama
# -------------------------------------------------------
echo "--- Step 2: Configuring Ollama ---"
if [ -z "$OLLAMA_URL" ]; then
# Auto-detect: check if Ollama is running on the remote host
OLLAMA_URL=$(ssh "$REMOTE_HOST" bash -s <<'DETECT_SCRIPT'
# Check common Ollama ports
for port in 11434 2701; do
if curl -sf --connect-timeout 2 "http://localhost:$port/api/tags" > /dev/null 2>&1; then
echo "http://localhost:$port"
exit 0
fi
done
echo ""
DETECT_SCRIPT
)
if [ -n "$OLLAMA_URL" ]; then
echo " ✓ Found existing Ollama at: $OLLAMA_URL"
else
echo " No Ollama detected — will use Docker container"
OLLAMA_URL="http://ollama:11434"
fi
else
echo " Using provided Ollama URL: $OLLAMA_URL"
fi
# Determine if we need the Docker Ollama container
USE_DOCKER_OLLAMA=false
if [ "$OLLAMA_URL" = "http://ollama:11434" ]; then
# Always use the Docker Ollama container with GPU passthrough
# The ollama/ollama image ships with CUDA runtime built-in
USE_DOCKER_OLLAMA=true
fi
OLLAMA_URL="http://ollama:11434"
echo " Using Docker Ollama container (GPU-accelerated via NVIDIA passthrough)"
echo " Host-accessible at localhost:11434"
echo ""
# -------------------------------------------------------
@@ -318,7 +381,7 @@ echo " Trading Engine: http://${REMOTE_IP}:8002"
echo " Risk Engine: http://${REMOTE_IP}:8003"
echo " MinIO Console: http://${REMOTE_IP}:9001"
echo " Superset: http://${REMOTE_IP}:8088"
echo " Ollama: ${OLLAMA_URL}"
echo " Ollama: http://${REMOTE_IP}:11434"
echo ""
echo "Commands:"
echo " ssh $REMOTE_HOST 'cd $REMOTE_DIR && docker compose logs -f'"
+7
View File
@@ -82,6 +82,13 @@ services:
- "11434:11434"
volumes:
- ollama_models:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
trino:
image: trinodb/trino:latest