feat: add Rocky 9.7 prerequisites and GPU passthrough for ollama container
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
This commit is contained in:
+94
-31
@@ -39,7 +39,93 @@ done
|
||||
echo "=== Stonks Oracle Docker Deployment ==="
|
||||
echo " Target: ${REMOTE_HOST}:${REMOTE_DIR}"
|
||||
echo " Model: ${OLLAMA_MODEL}"
|
||||
echo " Ollama: ${OLLAMA_URL:-auto-detect}"
|
||||
echo " Ollama: Docker container (GPU-accelerated)"
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 0: Ensure prerequisites on Rocky 9.7
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 0: Checking prerequisites (Rocky 9.7) ---"
|
||||
ssh "$REMOTE_HOST" bash -s <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
|
||||
# Verify we're on a RHEL-compatible system
|
||||
if ! grep -qi "rocky\|rhel\|centos" /etc/os-release 2>/dev/null; then
|
||||
echo " ⚠ Warning: This script is designed for Rocky Linux 9.7 — detected different OS"
|
||||
fi
|
||||
|
||||
# --- Docker Engine ---
|
||||
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
||||
echo " ✓ Docker already installed ($(docker --version | cut -d' ' -f3 | tr -d ','))"
|
||||
else
|
||||
echo " Installing Docker CE..."
|
||||
sudo dnf -y install dnf-plugins-core
|
||||
sudo dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo
|
||||
sudo dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
sudo systemctl enable --now docker
|
||||
# Add current user to docker group (takes effect on next login)
|
||||
sudo usermod -aG docker "$(whoami)" || true
|
||||
echo " ✓ Docker installed and started"
|
||||
fi
|
||||
|
||||
# --- Docker Compose plugin ---
|
||||
if docker compose version &>/dev/null; then
|
||||
echo " ✓ Docker Compose plugin available ($(docker compose version --short))"
|
||||
else
|
||||
echo " ERROR: docker compose plugin not found after Docker install"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- NVIDIA Driver ---
|
||||
if ! command -v nvidia-smi &>/dev/null; then
|
||||
echo " Installing NVIDIA drivers..."
|
||||
sudo dnf -y install epel-release
|
||||
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
|
||||
sudo dnf -y module install nvidia-driver:latest-dkms
|
||||
echo " ✓ NVIDIA driver installed (reboot may be required)"
|
||||
else
|
||||
echo " ✓ NVIDIA driver present ($(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1))"
|
||||
fi
|
||||
|
||||
# --- NVIDIA Container Toolkit ---
|
||||
if command -v nvidia-ctk &>/dev/null; then
|
||||
echo " ✓ NVIDIA Container Toolkit already installed"
|
||||
else
|
||||
echo " Installing NVIDIA Container Toolkit..."
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
|
||||
sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo > /dev/null
|
||||
sudo dnf -y install nvidia-container-toolkit
|
||||
# Configure Docker runtime for NVIDIA
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
echo " ✓ NVIDIA Container Toolkit installed and Docker configured"
|
||||
fi
|
||||
|
||||
# --- Verify GPU is accessible from Docker ---
|
||||
if docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null; then
|
||||
echo " ✓ GPU passthrough verified"
|
||||
else
|
||||
echo " ⚠ GPU passthrough test failed — NVIDIA Container Toolkit may need a reboot"
|
||||
echo " Run: sudo reboot, then re-run this script"
|
||||
fi
|
||||
|
||||
# --- Git ---
|
||||
if ! command -v git &>/dev/null; then
|
||||
echo " Installing git..."
|
||||
sudo dnf -y install git
|
||||
echo " ✓ Git installed"
|
||||
fi
|
||||
|
||||
# --- Firewall (open required ports) ---
|
||||
if command -v firewall-cmd &>/dev/null && systemctl is-active firewalld &>/dev/null; then
|
||||
echo " Configuring firewall..."
|
||||
for port in 3000 8001 8002 8003 8004 9000 9001 11434; do
|
||||
sudo firewall-cmd --permanent --add-port="${port}/tcp" 2>/dev/null || true
|
||||
done
|
||||
sudo firewall-cmd --reload 2>/dev/null || true
|
||||
echo " ✓ Firewall ports opened"
|
||||
fi
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
@@ -70,35 +156,12 @@ echo ""
|
||||
# Step 2: Detect or configure Ollama
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 2: Configuring Ollama ---"
|
||||
if [ -z "$OLLAMA_URL" ]; then
|
||||
# Auto-detect: check if Ollama is running on the remote host
|
||||
OLLAMA_URL=$(ssh "$REMOTE_HOST" bash -s <<'DETECT_SCRIPT'
|
||||
# Check common Ollama ports
|
||||
for port in 11434 2701; do
|
||||
if curl -sf --connect-timeout 2 "http://localhost:$port/api/tags" > /dev/null 2>&1; then
|
||||
echo "http://localhost:$port"
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
DETECT_SCRIPT
|
||||
)
|
||||
|
||||
if [ -n "$OLLAMA_URL" ]; then
|
||||
echo " ✓ Found existing Ollama at: $OLLAMA_URL"
|
||||
else
|
||||
echo " No Ollama detected — will use Docker container"
|
||||
OLLAMA_URL="http://ollama:11434"
|
||||
fi
|
||||
else
|
||||
echo " Using provided Ollama URL: $OLLAMA_URL"
|
||||
fi
|
||||
|
||||
# Determine if we need the Docker Ollama container
|
||||
USE_DOCKER_OLLAMA=false
|
||||
if [ "$OLLAMA_URL" = "http://ollama:11434" ]; then
|
||||
USE_DOCKER_OLLAMA=true
|
||||
fi
|
||||
# Always use the Docker Ollama container with GPU passthrough
|
||||
# The ollama/ollama image ships with CUDA runtime built-in
|
||||
USE_DOCKER_OLLAMA=true
|
||||
OLLAMA_URL="http://ollama:11434"
|
||||
echo " Using Docker Ollama container (GPU-accelerated via NVIDIA passthrough)"
|
||||
echo " Host-accessible at localhost:11434"
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
@@ -318,7 +381,7 @@ echo " Trading Engine: http://${REMOTE_IP}:8002"
|
||||
echo " Risk Engine: http://${REMOTE_IP}:8003"
|
||||
echo " MinIO Console: http://${REMOTE_IP}:9001"
|
||||
echo " Superset: http://${REMOTE_IP}:8088"
|
||||
echo " Ollama: ${OLLAMA_URL}"
|
||||
echo " Ollama: http://${REMOTE_IP}:11434"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " ssh $REMOTE_HOST 'cd $REMOTE_DIR && docker compose logs -f'"
|
||||
|
||||
Reference in New Issue
Block a user