feat: multi-distro support in deploy-docker.sh
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled

Step 0 now detects the OS and package manager, supporting:
- Debian/Ubuntu (apt)
- RHEL/Rocky/Fedora/CentOS (dnf/yum)
- Arch Linux (pacman)
- openSUSE (zypper)
- WSL (uses host Windows NVIDIA driver, skips driver install)

Handles Docker CE install, NVIDIA driver, NVIDIA Container Toolkit,
and firewall (firewalld + ufw) across all supported distros.
This commit is contained in:
Celes Renata
2026-04-29 18:59:40 +00:00
parent 6f54fd07fa
commit 82892b7a3e
+164 -35
View File
@@ -43,15 +43,81 @@ echo " Ollama: Docker container (GPU-accelerated)"
echo "" echo ""
# ------------------------------------------------------- # -------------------------------------------------------
# Step 0: Ensure prerequisites on Rocky 9.7 # Step 0: Ensure prerequisites (multi-distro support)
# ------------------------------------------------------- # -------------------------------------------------------
echo "--- Step 0: Checking prerequisites (Rocky 9.7) ---" echo "--- Step 0: Checking prerequisites ---"
ssh "$REMOTE_HOST" bash -s <<'REMOTE_SCRIPT' ssh "$REMOTE_HOST" bash -s <<'REMOTE_SCRIPT'
set -euo pipefail set -euo pipefail
# Verify we're on a RHEL-compatible system # --- Detect OS and package manager ---
if ! grep -qi "rocky\|rhel\|centos" /etc/os-release 2>/dev/null; then detect_os() {
echo " ⚠ Warning: This script is designed for Rocky Linux 9.7 — detected different OS" if [ -f /etc/os-release ]; then
. /etc/os-release
OS_ID="${ID:-unknown}"
OS_LIKE="${ID_LIKE:-$OS_ID}"
elif [ -f /etc/redhat-release ]; then
OS_ID="rhel"
OS_LIKE="rhel"
else
OS_ID="unknown"
OS_LIKE="unknown"
fi
# Detect WSL
IS_WSL=false
if grep -qi microsoft /proc/version 2>/dev/null; then
IS_WSL=true
fi
# Determine package manager
if command -v apt-get &>/dev/null; then
PKG_MGR="apt"
elif command -v dnf &>/dev/null; then
PKG_MGR="dnf"
elif command -v yum &>/dev/null; then
PKG_MGR="yum"
elif command -v pacman &>/dev/null; then
PKG_MGR="pacman"
elif command -v zypper &>/dev/null; then
PKG_MGR="zypper"
else
PKG_MGR="unknown"
fi
echo " Detected: OS=$OS_ID (like=$OS_LIKE), pkg=$PKG_MGR, WSL=$IS_WSL"
}
install_pkg() {
local pkg="$1"
case "$PKG_MGR" in
apt) sudo apt-get install -y "$pkg" ;;
dnf) sudo dnf -y install "$pkg" ;;
yum) sudo yum -y install "$pkg" ;;
pacman) sudo pacman -S --noconfirm "$pkg" ;;
zypper) sudo zypper install -y "$pkg" ;;
*) echo " ERROR: Unknown package manager"; exit 1 ;;
esac
}
update_pkg_cache() {
case "$PKG_MGR" in
apt) sudo apt-get update -qq ;;
dnf|yum) ;; # dnf/yum auto-refresh
pacman) sudo pacman -Sy ;;
zypper) sudo zypper refresh -q ;;
esac
}
detect_os
# --- Git ---
if ! command -v git &>/dev/null; then
echo " Installing git..."
update_pkg_cache
install_pkg git
echo " ✓ Git installed"
else
echo " ✓ Git present"
fi fi
# --- Docker Engine --- # --- Docker Engine ---
@@ -59,12 +125,39 @@ if command -v docker &>/dev/null && docker info &>/dev/null; then
echo " ✓ Docker already installed ($(docker --version | cut -d' ' -f3 | tr -d ','))" echo " ✓ Docker already installed ($(docker --version | cut -d' ' -f3 | tr -d ','))"
else else
echo " Installing Docker CE..." echo " Installing Docker CE..."
sudo dnf -y install dnf-plugins-core case "$PKG_MGR" in
sudo dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo apt)
sudo dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin # Debian/Ubuntu/WSL
sudo systemctl enable --now docker sudo apt-get update -qq
# Add current user to docker group (takes effect on next login) sudo apt-get install -y ca-certificates curl gnupg
sudo usermod -aG docker "$(whoami)" || true sudo install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/${OS_ID}/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg 2>/dev/null
sudo chmod a+r /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/${OS_ID} $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update -qq
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
;;
dnf|yum)
# RHEL/Rocky/Fedora/CentOS
sudo "$PKG_MGR" -y install dnf-plugins-core 2>/dev/null || true
local repo_distro="rhel"
if [[ "$OS_ID" == "fedora" ]]; then repo_distro="fedora"; fi
sudo dnf config-manager --add-repo "https://download.docker.com/linux/${repo_distro}/docker-ce.repo" 2>/dev/null || \
sudo yum-config-manager --add-repo "https://download.docker.com/linux/${repo_distro}/docker-ce.repo" 2>/dev/null
sudo "$PKG_MGR" -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
;;
pacman)
# Arch Linux
sudo pacman -S --noconfirm docker docker-compose docker-buildx
;;
zypper)
# openSUSE
sudo zypper install -y docker docker-compose docker-buildx
;;
esac
sudo systemctl enable --now docker 2>/dev/null || true
sudo usermod -aG docker "$(whoami)" 2>/dev/null || true
echo " ✓ Docker installed and started" echo " ✓ Docker installed and started"
fi fi
@@ -76,13 +169,31 @@ else
exit 1 exit 1
fi fi
# --- NVIDIA Driver --- # --- NVIDIA Driver (skip on WSL — uses host driver) ---
if ! command -v nvidia-smi &>/dev/null; then if [ "$IS_WSL" = "true" ]; then
echo " ✓ WSL detected — using host Windows NVIDIA driver"
elif ! command -v nvidia-smi &>/dev/null; then
echo " Installing NVIDIA drivers..." echo " Installing NVIDIA drivers..."
sudo dnf -y install epel-release case "$PKG_MGR" in
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo apt)
sudo dnf -y module install nvidia-driver:latest-dkms sudo apt-get install -y nvidia-driver-560 2>/dev/null || \
echo " ✓ NVIDIA driver installed (reboot may be required)" sudo apt-get install -y nvidia-driver 2>/dev/null || \
echo " ⚠ NVIDIA driver install failed — install manually"
;;
dnf|yum)
sudo dnf -y install epel-release 2>/dev/null || true
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo 2>/dev/null || true
sudo dnf -y module install nvidia-driver:latest-dkms 2>/dev/null || \
echo " ⚠ NVIDIA driver install failed — install manually"
;;
pacman)
sudo pacman -S --noconfirm nvidia nvidia-utils 2>/dev/null || \
echo " ⚠ NVIDIA driver install failed — install manually"
;;
zypper)
echo " ⚠ NVIDIA driver: install manually for openSUSE"
;;
esac
else else
echo " ✓ NVIDIA driver present ($(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1))" echo " ✓ NVIDIA driver present ($(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1))"
fi fi
@@ -90,40 +201,58 @@ fi
# --- NVIDIA Container Toolkit --- # --- NVIDIA Container Toolkit ---
if command -v nvidia-ctk &>/dev/null; then if command -v nvidia-ctk &>/dev/null; then
echo " ✓ NVIDIA Container Toolkit already installed" echo " ✓ NVIDIA Container Toolkit already installed"
elif [ "$IS_WSL" = "true" ] && docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null 2>&1; then
echo " ✓ WSL GPU passthrough working (no nvidia-ctk needed)"
else else
echo " Installing NVIDIA Container Toolkit..." echo " Installing NVIDIA Container Toolkit..."
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \ case "$PKG_MGR" in
sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo > /dev/null apt)
sudo dnf -y install nvidia-container-toolkit curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg 2>/dev/null
# Configure Docker runtime for NVIDIA curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sudo nvidia-ctk runtime configure --runtime=docker sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo systemctl restart docker sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list > /dev/null
sudo apt-get update -qq
sudo apt-get install -y nvidia-container-toolkit
;;
dnf|yum)
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo > /dev/null
sudo "$PKG_MGR" -y install nvidia-container-toolkit
;;
pacman)
sudo pacman -S --noconfirm nvidia-container-toolkit 2>/dev/null || \
echo " ⚠ Install nvidia-container-toolkit from AUR"
;;
zypper)
echo " ⚠ NVIDIA Container Toolkit: install manually for openSUSE"
;;
esac
sudo nvidia-ctk runtime configure --runtime=docker 2>/dev/null || true
sudo systemctl restart docker 2>/dev/null || true
echo " ✓ NVIDIA Container Toolkit installed and Docker configured" echo " ✓ NVIDIA Container Toolkit installed and Docker configured"
fi fi
# --- Verify GPU is accessible from Docker --- # --- Verify GPU is accessible from Docker ---
if docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null; then if docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null 2>&1; then
echo " ✓ GPU passthrough verified" echo " ✓ GPU passthrough verified"
else else
echo " ⚠ GPU passthrough test failed — NVIDIA Container Toolkit may need a reboot" echo " ⚠ GPU passthrough test failed — may need a reboot or manual NVIDIA setup"
echo " Run: sudo reboot, then re-run this script"
fi fi
# --- Git --- # --- Firewall (open required ports if firewall is active) ---
if ! command -v git &>/dev/null; then
echo " Installing git..."
sudo dnf -y install git
echo " ✓ Git installed"
fi
# --- Firewall (open required ports) ---
if command -v firewall-cmd &>/dev/null && systemctl is-active firewalld &>/dev/null; then if command -v firewall-cmd &>/dev/null && systemctl is-active firewalld &>/dev/null; then
echo " Configuring firewall..." echo " Configuring firewalld..."
for port in 3000 8001 8002 8003 8004 9000 9001 11434; do for port in 3000 8001 8002 8003 8004 9000 9001 11434; do
sudo firewall-cmd --permanent --add-port="${port}/tcp" 2>/dev/null || true sudo firewall-cmd --permanent --add-port="${port}/tcp" 2>/dev/null || true
done done
sudo firewall-cmd --reload 2>/dev/null || true sudo firewall-cmd --reload 2>/dev/null || true
echo " ✓ Firewall ports opened" echo " ✓ Firewall ports opened"
elif command -v ufw &>/dev/null && sudo ufw status 2>/dev/null | grep -q "active"; then
echo " Configuring ufw..."
for port in 3000 8001 8002 8003 8004 9000 9001 11434; do
sudo ufw allow "${port}/tcp" 2>/dev/null || true
done
echo " ✓ UFW ports opened"
fi fi
REMOTE_SCRIPT REMOTE_SCRIPT
echo "" echo ""