feat: pipeline on/off toggle with per-stage Helm control
- Added pipelineEnabled flag to Helm values (default: true) - Worker services (scheduler, ingestion, parser, extractor, aggregation, recommendation, broker-adapter, lake-publisher) scale to 0 when disabled - API services always run regardless of toggle - Redis-based runtime toggle: POST /api/ops/pipeline/toggle - Scheduler checks the flag before each cycle - Frontend: green/red Pipeline ON/OFF button on the pipeline page - Beta defaults to pipelineEnabled: false - Base values.yaml: blanked external URLs (Ollama, Polygon, Alpaca) so stages only connect to what they explicitly configure
This commit is contained in:
@@ -227,3 +227,55 @@ jobs:
|
||||
with:
|
||||
name: inttest-results
|
||||
path: inttest-results.json
|
||||
|
||||
beta-gate:
|
||||
needs: [integration-test]
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
runs-on: self-hosted-gremlin
|
||||
permissions:
|
||||
contents: read
|
||||
packages: read
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Install kubectl
|
||||
run: |
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||
chmod +x kubectl
|
||||
sudo mv kubectl /usr/local/bin/kubectl
|
||||
fi
|
||||
kubectl version --client
|
||||
|
||||
- name: Install Helm
|
||||
run: |
|
||||
if ! command -v helm &> /dev/null; then
|
||||
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | sudo bash
|
||||
fi
|
||||
helm version
|
||||
|
||||
- name: Configure kubectl
|
||||
run: |
|
||||
if [ -f /var/run/secrets/kubernetes.io/serviceaccount/token ]; then
|
||||
kubectl config set-cluster in-cluster \
|
||||
--server=https://kubernetes.default.svc \
|
||||
--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
kubectl config set-credentials runner \
|
||||
--token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
kubectl config set-context runner --cluster=in-cluster --user=runner
|
||||
kubectl config use-context runner
|
||||
fi
|
||||
kubectl cluster-info || echo "WARNING: kubectl cannot reach cluster API"
|
||||
|
||||
- name: Run beta gate (deploy → test → promote)
|
||||
run: |
|
||||
bash infra/inttest/promote.sh \
|
||||
--image-tag ${{ github.sha }} \
|
||||
--results-file beta-gate-results.json
|
||||
|
||||
- name: Upload beta gate results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: beta-gate-results
|
||||
path: beta-gate-results.json
|
||||
|
||||
Binary file not shown.
@@ -533,6 +533,14 @@ export function useRetryFailedExtractions() {
|
||||
});
|
||||
}
|
||||
|
||||
export function usePipelineToggle() {
|
||||
const qc = useQueryClient();
|
||||
return useMutation({
|
||||
mutationFn: (enabled: boolean) => apiPost<{ pipeline_enabled: boolean }>('query', '/api/ops/pipeline/toggle', { enabled }),
|
||||
onSuccess: () => qc.invalidateQueries({ queryKey: ['pipeline-health'] }),
|
||||
});
|
||||
}
|
||||
|
||||
export function useIngestionSummary(hours = 24) {
|
||||
return useGet<Record<string, unknown>>(['ingestion-summary', hours], 'query', `/api/ops/ingestion/summary?hours=${hours}`);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { usePipelineHealth, useRetryFailedExtractions } from '../api/hooks';
|
||||
import { usePipelineHealth, useRetryFailedExtractions, usePipelineToggle } from '../api/hooks';
|
||||
import { LoadingSpinner, DateRangeSelector, Card } from '../components/ui';
|
||||
|
||||
const QUEUE_LABELS: Record<string, string> = {
|
||||
@@ -54,12 +54,14 @@ export function OpsPipelinePage() {
|
||||
const { data, isLoading } = usePipelineHealth(hours);
|
||||
const stream = usePipelineStream();
|
||||
const retryMutation = useRetryFailedExtractions();
|
||||
const toggleMutation = usePipelineToggle();
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
|
||||
const parsing = (data?.parsing ?? {}) as Record<string, unknown>;
|
||||
const extraction = (data?.extraction ?? {}) as Record<string, unknown>;
|
||||
const aggregation = (data?.aggregation ?? {}) as Record<string, unknown>;
|
||||
const pipelineEnabled = (data?.pipeline_enabled ?? true) as boolean;
|
||||
|
||||
// Prefer live stream data for queue depths and doc stages, fall back to initial fetch
|
||||
const queueDepths = stream?.queue_depths
|
||||
@@ -82,6 +84,14 @@ export function OpsPipelinePage() {
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Pipeline Health</h1>
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => toggleMutation.mutate(!pipelineEnabled)}
|
||||
disabled={toggleMutation.isPending}
|
||||
className={`rounded-md px-3 py-1.5 text-xs font-medium text-white ${pipelineEnabled ? 'bg-green-600 hover:bg-green-500' : 'bg-red-600 hover:bg-red-500'} disabled:opacity-50`}
|
||||
>
|
||||
{toggleMutation.isPending ? '…' : pipelineEnabled ? 'Pipeline ON' : 'Pipeline OFF'}
|
||||
</button>
|
||||
{failedCount > 0 && (
|
||||
<button
|
||||
type="button"
|
||||
|
||||
@@ -106,6 +106,8 @@ export const handlers = [
|
||||
http.delete('/api/admin/trading/lockouts/:id', () => HttpResponse.json({ status: 'deleted' })),
|
||||
http.get('/api/ops/pipeline/health', () => HttpResponse.json({ hours: 24, document_stages: [{ status: 'extracted', doc_count: 5 }], parsing: {}, extraction: {}, aggregation: {}, queue_depths: {} })),
|
||||
http.post('/api/ops/pipeline/retry-failed', () => HttpResponse.json({ retried: 10, message: 'Re-enqueued 10 documents for extraction' })),
|
||||
http.get('/api/ops/pipeline/toggle', () => HttpResponse.json({ pipeline_enabled: true })),
|
||||
http.post('/api/ops/pipeline/toggle', () => HttpResponse.json({ pipeline_enabled: true })),
|
||||
http.get('/api/ops/ingestion/summary', () => HttpResponse.json({ total_runs: 10, completed: 8, failed: 2, total_items_fetched: 50, total_items_new: 12, by_source_type: [] })),
|
||||
http.get('/api/ops/ingestion/throughput', () => HttpResponse.json([])),
|
||||
http.get('/api/ops/model/performance', () => HttpResponse.json({ total_extractions: 20, success_rate: 0.9, avg_duration_ms: 1500, retry_rate: 0.05, avg_confidence: 0.8 })),
|
||||
|
||||
@@ -11,7 +11,7 @@ metadata:
|
||||
{{- include "stonks.labels" $root | nindent 4 }}
|
||||
stonks-oracle/tier: {{ $svc.tier }}
|
||||
spec:
|
||||
replicas: {{ $svc.replicas }}
|
||||
replicas: {{ if and (hasKey $svc "pipeline") $svc.pipeline (not $root.Values.pipelineEnabled) }}0{{ else }}{{ $svc.replicas }}{{ end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ $svc.image }}
|
||||
|
||||
@@ -6,31 +6,11 @@
|
||||
image:
|
||||
tag: latest
|
||||
|
||||
## Single replica for API services, disable pipeline workers
|
||||
## Beta is for API testing only — no ingestion/extraction/aggregation
|
||||
## Pipeline OFF by default — beta is for API testing only
|
||||
pipelineEnabled: false
|
||||
|
||||
## Single replica for API services
|
||||
services:
|
||||
scheduler:
|
||||
replicas: 0
|
||||
symbolRegistry:
|
||||
replicas: 1
|
||||
ingestion:
|
||||
replicas: 0
|
||||
parser:
|
||||
replicas: 0
|
||||
extractor:
|
||||
replicas: 0
|
||||
aggregation:
|
||||
replicas: 0
|
||||
recommendation:
|
||||
replicas: 0
|
||||
tradingEngine:
|
||||
replicas: 1
|
||||
riskEngine:
|
||||
replicas: 1
|
||||
brokerAdapter:
|
||||
replicas: 0
|
||||
lakePublisher:
|
||||
replicas: 0
|
||||
queryApi:
|
||||
replicas: 1
|
||||
dashboard:
|
||||
|
||||
@@ -4,10 +4,16 @@ image:
|
||||
pullPolicy: Always
|
||||
tag: latest
|
||||
|
||||
## Pipeline toggle — when false, all worker services (ingestion, parsing,
|
||||
## extraction, aggregation, recommendation, broker, lake-publisher, scheduler)
|
||||
## are scaled to 0. API services always run.
|
||||
pipelineEnabled: true
|
||||
|
||||
## Service deployments — replicas and resource overrides
|
||||
services:
|
||||
scheduler:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: scheduler
|
||||
command: "python -m services.scheduler.app"
|
||||
tier: orchestration
|
||||
@@ -32,6 +38,7 @@ services:
|
||||
|
||||
ingestion:
|
||||
replicas: 2
|
||||
pipeline: true
|
||||
image: ingestion
|
||||
command: "python -m services.ingestion.worker"
|
||||
tier: ingestion
|
||||
@@ -42,6 +49,7 @@ services:
|
||||
|
||||
parser:
|
||||
replicas: 2
|
||||
pipeline: true
|
||||
image: parser
|
||||
command: "python -m services.parser.worker"
|
||||
tier: processing
|
||||
@@ -52,6 +60,7 @@ services:
|
||||
|
||||
extractor:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: extractor
|
||||
command: "python -m services.extractor.main"
|
||||
tier: processing
|
||||
@@ -62,6 +71,7 @@ services:
|
||||
|
||||
aggregation:
|
||||
replicas: 4
|
||||
pipeline: true
|
||||
image: aggregation
|
||||
command: "python -m services.aggregation.main"
|
||||
tier: processing
|
||||
@@ -72,6 +82,7 @@ services:
|
||||
|
||||
recommendation:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: recommendation
|
||||
command: "python -m services.recommendation.main"
|
||||
tier: processing
|
||||
@@ -107,6 +118,7 @@ services:
|
||||
|
||||
brokerAdapter:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: broker-adapter
|
||||
command: "python -m services.adapters.broker_service"
|
||||
tier: trading
|
||||
@@ -117,6 +129,7 @@ services:
|
||||
|
||||
lakePublisher:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: lake-publisher
|
||||
command: "python -m services.lake_publisher.jobs"
|
||||
tier: analytics
|
||||
|
||||
Executable
+409
@@ -0,0 +1,409 @@
|
||||
#!/bin/bash
|
||||
# Beta-to-Paper promotion gate
|
||||
#
|
||||
# Deploys the given image tag to the beta namespace, runs integration tests
|
||||
# against the live beta services, and promotes to paper-trading if all pass.
|
||||
#
|
||||
# This script is the single source of truth for the promotion decision.
|
||||
# CI calls it; humans can call it too.
|
||||
#
|
||||
# Usage: bash infra/inttest/promote.sh [OPTIONS]
|
||||
#
|
||||
# Options:
|
||||
# --image-tag TAG Docker image tag to deploy (required)
|
||||
# --skip-promote Run tests but don't promote even if green
|
||||
# --skip-teardown Leave beta namespace running after tests
|
||||
# --results-file PATH Path for JSON results output (default: beta-gate-results.json)
|
||||
# --timeout SECONDS Max wait for services to become ready (default: 180)
|
||||
# -h, --help Show usage
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 All tests passed, promotion succeeded (or --skip-promote)
|
||||
# 1 Test failures — promotion blocked
|
||||
# 2 Infrastructure/deployment failure
|
||||
# 3 Promotion step failed (tests passed but helm upgrade failed)
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# ── Defaults ─────────────────────────────────────────────────────────────────
|
||||
IMAGE_TAG=""
|
||||
SKIP_PROMOTE=false
|
||||
SKIP_TEARDOWN=false
|
||||
RESULTS_FILE="beta-gate-results.json"
|
||||
READY_TIMEOUT=180
|
||||
BETA_NAMESPACE="stonks-oracle-beta"
|
||||
PAPER_NAMESPACE="stonks-oracle"
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: bash infra/inttest/promote.sh [OPTIONS]
|
||||
|
||||
Options:
|
||||
--image-tag TAG Docker image tag to deploy (required)
|
||||
--skip-promote Run tests but don't promote even if green
|
||||
--skip-teardown Leave beta namespace running after tests
|
||||
--results-file PATH Path for JSON results output (default: beta-gate-results.json)
|
||||
--timeout SECONDS Max wait for services to become ready (default: 180)
|
||||
-h, --help Show usage
|
||||
|
||||
Exit codes:
|
||||
0 All tests passed, promotion succeeded (or --skip-promote)
|
||||
1 Test failures — promotion blocked
|
||||
2 Infrastructure/deployment failure
|
||||
3 Promotion step failed (tests passed but helm upgrade failed)
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo "[$(date -u +"%H:%M:%S")] [beta-gate] $*"
|
||||
}
|
||||
|
||||
die() {
|
||||
log "FATAL: $*"
|
||||
exit 2
|
||||
}
|
||||
|
||||
# ── Parse CLI args ───────────────────────────────────────────────────────────
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--image-tag)
|
||||
IMAGE_TAG="$2"
|
||||
shift 2
|
||||
;;
|
||||
--skip-promote)
|
||||
SKIP_PROMOTE=true
|
||||
shift
|
||||
;;
|
||||
--skip-teardown)
|
||||
SKIP_TEARDOWN=true
|
||||
shift
|
||||
;;
|
||||
--results-file)
|
||||
RESULTS_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--timeout)
|
||||
READY_TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$IMAGE_TAG" ]; then
|
||||
echo "ERROR: --image-tag is required"
|
||||
usage
|
||||
fi
|
||||
|
||||
STARTED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
PIPELINE_START=$(date +%s)
|
||||
|
||||
log "Beta gate starting"
|
||||
log " Image tag: $IMAGE_TAG"
|
||||
log " Beta NS: $BETA_NAMESPACE"
|
||||
log " Paper NS: $PAPER_NAMESPACE"
|
||||
log " Skip promote: $SKIP_PROMOTE"
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Stage 1: Deploy to beta namespace
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
log "▶ Stage 1: Deploy to beta"
|
||||
|
||||
# Ensure beta namespace exists
|
||||
kubectl create namespace "$BETA_NAMESPACE" 2>/dev/null || true
|
||||
|
||||
# Create beta database if it doesn't exist
|
||||
log "Ensuring beta database exists ..."
|
||||
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
|
||||
psql -U postgres -tc "SELECT 1 FROM pg_database WHERE datname = 'stonks_beta'" \
|
||||
| grep -q 1 || \
|
||||
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
|
||||
psql -U postgres -c "CREATE DATABASE stonks_beta OWNER stonks;" 2>/dev/null || true
|
||||
|
||||
# Apply migrations to beta database
|
||||
log "Applying migrations to beta database ..."
|
||||
for migration in $(ls "$REPO_ROOT/infra/migrations/"*.sql | sort); do
|
||||
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
|
||||
psql -U stonks -d stonks_beta -f - < "$migration" 2>/dev/null || true
|
||||
done
|
||||
|
||||
# Deploy via Helm with beta values
|
||||
log "Helm upgrade to beta namespace ..."
|
||||
if ! helm upgrade --install stonks-oracle-beta \
|
||||
"$REPO_ROOT/infra/helm/stonks-oracle" \
|
||||
-n "$BETA_NAMESPACE" \
|
||||
-f "$REPO_ROOT/infra/helm/stonks-oracle/values-beta.yaml" \
|
||||
--set "image.tag=$IMAGE_TAG" \
|
||||
--wait \
|
||||
--timeout "${READY_TIMEOUT}s"; then
|
||||
log "Helm deploy to beta failed"
|
||||
DEPLOY_STATUS="failed"
|
||||
else
|
||||
DEPLOY_STATUS="ok"
|
||||
fi
|
||||
|
||||
if [ "$DEPLOY_STATUS" != "ok" ]; then
|
||||
log "Beta deployment failed — checking pod status"
|
||||
kubectl get pods -n "$BETA_NAMESPACE" -o wide 2>&1 || true
|
||||
kubectl get events -n "$BETA_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
|
||||
|
||||
cat > "$RESULTS_FILE" <<EOF
|
||||
{
|
||||
"run_id": "beta-gate-${IMAGE_TAG}",
|
||||
"image_tag": "${IMAGE_TAG}",
|
||||
"started_at": "${STARTED_AT}",
|
||||
"completed_at": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
|
||||
"exit_code": 2,
|
||||
"stage": "deploy",
|
||||
"deploy_status": "failed",
|
||||
"test_status": "skipped",
|
||||
"promote_status": "blocked",
|
||||
"tests": {"total": 0, "passed": 0, "failed": 0, "errors": 0}
|
||||
}
|
||||
EOF
|
||||
exit 2
|
||||
fi
|
||||
|
||||
log "✓ Beta deployment ready"
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Stage 2: Seed beta database
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
log "▶ Stage 2: Seed beta data"
|
||||
|
||||
# Run seed against beta database via a temporary pod
|
||||
SEED_IMAGE="registry.celestium.life/stonks-oracle/query-api:${IMAGE_TAG}"
|
||||
|
||||
# Clean up any previous seed pod
|
||||
kubectl delete pod seed-beta -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
|
||||
|
||||
if ! kubectl run seed-beta \
|
||||
--image="$SEED_IMAGE" \
|
||||
--restart=Never \
|
||||
--rm \
|
||||
--attach \
|
||||
--pod-running-timeout=3m \
|
||||
--namespace="$BETA_NAMESPACE" \
|
||||
--image-pull-policy=Always \
|
||||
--env="POSTGRES_HOST=postgresql-rw.postgresql-service.svc.cluster.local" \
|
||||
--env="POSTGRES_PORT=5432" \
|
||||
--env="POSTGRES_DB=stonks_beta" \
|
||||
--env="POSTGRES_USER=stonks" \
|
||||
--env="POSTGRES_PASSWORD=St0nks0racl3!" \
|
||||
--env="MINIO_ENDPOINT=minio.minio-service.svc.cluster.local:80" \
|
||||
--env="MINIO_SECURE=false" \
|
||||
--env="MINIO_ACCESS_KEY=minioadmin" \
|
||||
--env="MINIO_SECRET_KEY=minioadmin" \
|
||||
--command -- python -m tests.integration.seed_sandbox 2>/dev/null; then
|
||||
log "WARNING: Seed may have partially failed (could be idempotent re-run)"
|
||||
fi
|
||||
|
||||
log "✓ Beta data seeded"
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Stage 3: Run integration tests against beta
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
log "▶ Stage 3: Run integration tests"
|
||||
|
||||
# Determine service URLs within the beta namespace
|
||||
QUERY_API_URL="http://query-api.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
||||
REGISTRY_API_URL="http://symbol-registry.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
||||
RISK_API_URL="http://risk.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
||||
TRADING_API_URL="http://trading-engine.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
||||
|
||||
# Clean up any previous runner
|
||||
kubectl delete pod beta-test-runner -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
|
||||
|
||||
# Run tests as a pod in the beta namespace
|
||||
log "Starting test runner pod ..."
|
||||
kubectl run beta-test-runner \
|
||||
--image="$SEED_IMAGE" \
|
||||
--restart=Never \
|
||||
--namespace="$BETA_NAMESPACE" \
|
||||
--image-pull-policy=Always \
|
||||
--env="QUERY_API_URL=$QUERY_API_URL" \
|
||||
--env="REGISTRY_API_URL=$REGISTRY_API_URL" \
|
||||
--env="RISK_API_URL=$RISK_API_URL" \
|
||||
--env="TRADING_API_URL=$TRADING_API_URL" \
|
||||
--env="POSTGRES_HOST=postgresql-rw.postgresql-service.svc.cluster.local" \
|
||||
--env="POSTGRES_PORT=5432" \
|
||||
--env="POSTGRES_DB=stonks_beta" \
|
||||
--env="POSTGRES_USER=stonks" \
|
||||
--env="POSTGRES_PASSWORD=St0nks0racl3!" \
|
||||
--env="REDIS_HOST=redis-master.redis-service.svc.cluster.local" \
|
||||
--env="REDIS_PORT=6379" \
|
||||
--env="REDIS_DB=1" \
|
||||
--env="REDIS_PASSWORD=" \
|
||||
--env="BROKER_MODE=paper" \
|
||||
--env="LOG_LEVEL=INFO" \
|
||||
--command -- python -m pytest tests/integration/ -v --tb=short -q
|
||||
|
||||
# Wait for the test runner to complete
|
||||
log "Waiting for test runner (timeout: 600s) ..."
|
||||
TEST_EXIT_CODE=0
|
||||
if ! kubectl wait --for=condition=Ready=false pod/beta-test-runner \
|
||||
-n "$BETA_NAMESPACE" --timeout=600s 2>/dev/null; then
|
||||
# Pod may have already completed — check its status
|
||||
true
|
||||
fi
|
||||
|
||||
# Wait for pod to reach terminal state
|
||||
for i in $(seq 1 120); do
|
||||
POD_PHASE=$(kubectl get pod beta-test-runner -n "$BETA_NAMESPACE" \
|
||||
-o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
|
||||
if [ "$POD_PHASE" = "Succeeded" ] || [ "$POD_PHASE" = "Failed" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# Collect results
|
||||
TEST_OUTPUT=$(kubectl logs beta-test-runner -n "$BETA_NAMESPACE" 2>/dev/null || true)
|
||||
POD_PHASE=$(kubectl get pod beta-test-runner -n "$BETA_NAMESPACE" \
|
||||
-o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
|
||||
|
||||
if [ "$POD_PHASE" = "Failed" ]; then
|
||||
TEST_EXIT_CODE=1
|
||||
fi
|
||||
|
||||
# Parse test counts
|
||||
TESTS_PASSED=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= passed)' | tail -1 || echo "0")
|
||||
TESTS_FAILED=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= failed)' | tail -1 || echo "0")
|
||||
TESTS_ERRORS=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= error)' | tail -1 || echo "0")
|
||||
TESTS_PASSED=${TESTS_PASSED:-0}
|
||||
TESTS_FAILED=${TESTS_FAILED:-0}
|
||||
TESTS_ERRORS=${TESTS_ERRORS:-0}
|
||||
TESTS_TOTAL=$(( TESTS_PASSED + TESTS_FAILED + TESTS_ERRORS ))
|
||||
|
||||
log "Test results: ${TESTS_PASSED} passed, ${TESTS_FAILED} failed, ${TESTS_ERRORS} errors"
|
||||
|
||||
# Print test output for CI visibility
|
||||
if [ -n "$TEST_OUTPUT" ]; then
|
||||
echo "─── Test Output ───"
|
||||
echo "$TEST_OUTPUT" | tail -60
|
||||
echo "─── End Test Output ───"
|
||||
fi
|
||||
|
||||
# Clean up test runner
|
||||
kubectl delete pod beta-test-runner -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Stage 4: Promotion decision
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
PROMOTE_STATUS="blocked"
|
||||
FINAL_EXIT=0
|
||||
|
||||
if [ "$TESTS_FAILED" -gt 0 ] || [ "$TESTS_ERRORS" -gt 0 ] || [ "$TEST_EXIT_CODE" -ne 0 ]; then
|
||||
log "✗ GATE FAILED — ${TESTS_FAILED} failures, ${TESTS_ERRORS} errors"
|
||||
log " Promotion to paper-trading BLOCKED"
|
||||
PROMOTE_STATUS="blocked"
|
||||
FINAL_EXIT=1
|
||||
elif [ "$SKIP_PROMOTE" = true ]; then
|
||||
log "✓ Tests passed — promotion skipped (--skip-promote)"
|
||||
PROMOTE_STATUS="skipped"
|
||||
FINAL_EXIT=0
|
||||
else
|
||||
log "▶ Stage 4: Promoting to paper-trading"
|
||||
log " Upgrading $PAPER_NAMESPACE with image tag $IMAGE_TAG ..."
|
||||
|
||||
if helm upgrade --install stonks-oracle \
|
||||
"$REPO_ROOT/infra/helm/stonks-oracle" \
|
||||
-n "$PAPER_NAMESPACE" \
|
||||
--set "image.tag=$IMAGE_TAG" \
|
||||
--wait \
|
||||
--timeout 300s; then
|
||||
log "✓ PROMOTED — paper-trading now running $IMAGE_TAG"
|
||||
PROMOTE_STATUS="promoted"
|
||||
FINAL_EXIT=0
|
||||
|
||||
# Rolling restart to pick up new images
|
||||
log "Rolling restart of API services ..."
|
||||
kubectl rollout restart deployment/query-api deployment/symbol-registry \
|
||||
deployment/trading-engine deployment/risk-engine \
|
||||
deployment/aggregation deployment/recommendation \
|
||||
-n "$PAPER_NAMESPACE" 2>/dev/null || true
|
||||
else
|
||||
log "✗ Promotion failed — helm upgrade error"
|
||||
PROMOTE_STATUS="failed"
|
||||
FINAL_EXIT=3
|
||||
fi
|
||||
fi
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Stage 5: Cleanup beta (optional)
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
if [ "$SKIP_TEARDOWN" = false ] && [ "$PROMOTE_STATUS" = "promoted" ]; then
|
||||
log "Scaling down beta deployment (keeping namespace for next run) ..."
|
||||
helm upgrade stonks-oracle-beta \
|
||||
"$REPO_ROOT/infra/helm/stonks-oracle" \
|
||||
-n "$BETA_NAMESPACE" \
|
||||
-f "$REPO_ROOT/infra/helm/stonks-oracle/values-beta.yaml" \
|
||||
--set "image.tag=$IMAGE_TAG" \
|
||||
--set "services.queryApi.replicas=0" \
|
||||
--set "services.symbolRegistry.replicas=0" \
|
||||
--set "services.tradingEngine.replicas=0" \
|
||||
--set "services.riskEngine.replicas=0" \
|
||||
--set "services.scheduler.replicas=0" \
|
||||
--set "services.ingestion.replicas=0" \
|
||||
--set "services.parser.replicas=0" \
|
||||
--set "services.extractor.replicas=0" \
|
||||
--set "services.aggregation.replicas=0" \
|
||||
--set "services.recommendation.replicas=0" \
|
||||
--set "services.brokerAdapter.replicas=0" \
|
||||
--set "services.lakePublisher.replicas=0" \
|
||||
--set "services.dashboard.replicas=0" \
|
||||
2>/dev/null || true
|
||||
log "Beta scaled to zero"
|
||||
fi
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Write results
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
COMPLETED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
PIPELINE_END=$(date +%s)
|
||||
PIPELINE_DURATION=$(( PIPELINE_END - PIPELINE_START ))
|
||||
|
||||
cat > "$RESULTS_FILE" <<EOF
|
||||
{
|
||||
"run_id": "beta-gate-${IMAGE_TAG}",
|
||||
"image_tag": "${IMAGE_TAG}",
|
||||
"started_at": "${STARTED_AT}",
|
||||
"completed_at": "${COMPLETED_AT}",
|
||||
"duration_s": ${PIPELINE_DURATION},
|
||||
"exit_code": ${FINAL_EXIT},
|
||||
"deploy_status": "${DEPLOY_STATUS}",
|
||||
"test_status": "$([ "$FINAL_EXIT" -le 1 ] && echo "completed" || echo "error")",
|
||||
"promote_status": "${PROMOTE_STATUS}",
|
||||
"tests": {
|
||||
"total": ${TESTS_TOTAL},
|
||||
"passed": ${TESTS_PASSED},
|
||||
"failed": ${TESTS_FAILED},
|
||||
"errors": ${TESTS_ERRORS}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
log "Results written to $RESULTS_FILE"
|
||||
echo ""
|
||||
log "═══════════════════════════════════════════════════"
|
||||
log " Beta Gate Summary"
|
||||
log "═══════════════════════════════════════════════════"
|
||||
log " Image: $IMAGE_TAG"
|
||||
log " Duration: ${PIPELINE_DURATION}s"
|
||||
log " Tests: ${TESTS_PASSED}/${TESTS_TOTAL} passed"
|
||||
log " Promotion: ${PROMOTE_STATUS}"
|
||||
log " Exit code: $FINAL_EXIT"
|
||||
log "═══════════════════════════════════════════════════"
|
||||
echo ""
|
||||
|
||||
exit "$FINAL_EXIT"
|
||||
@@ -65,6 +65,11 @@ trivy:
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
# Enable Redis cache layer for faster manifest lookups (avoids upstream checks)
|
||||
cache:
|
||||
enabled: true
|
||||
expireHours: 24
|
||||
|
||||
# Resource limits — conservative for a 4-node cluster
|
||||
core:
|
||||
resources:
|
||||
|
||||
+34
-1
@@ -41,7 +41,7 @@ from services.shared.audit import get_entity_audit_trail, get_order_audit_trail,
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
||||
from services.shared.redis_keys import QUEUE_PREFIX, queue_key
|
||||
from services.shared.redis_keys import PREFIX, QUEUE_PREFIX, queue_key
|
||||
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
||||
|
||||
logger = logging.getLogger("query_api")
|
||||
@@ -1787,8 +1787,13 @@ async def get_pipeline_health(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Pipeline enabled flag
|
||||
pipeline_flag = await rds.get(_PIPELINE_ENABLED_KEY) if rds else None
|
||||
pipeline_enabled = pipeline_flag != "0" if pipeline_flag is not None else True
|
||||
|
||||
return {
|
||||
"hours": hours,
|
||||
"pipeline_enabled": pipeline_enabled,
|
||||
"document_stages": [_row_to_dict(r) for r in doc_stages],
|
||||
"parsing": _row_to_dict(parse_quality) if parse_quality else {},
|
||||
"extraction": _row_to_dict(extraction_stats) if extraction_stats else {},
|
||||
@@ -1927,6 +1932,34 @@ async def retry_failed_extractions_endpoint():
|
||||
return {"retried": len(doc_ids), "message": f"Re-enqueued {len(doc_ids)} documents for extraction"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pipeline On/Off Toggle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PIPELINE_ENABLED_KEY = f"{PREFIX}:pipeline:enabled"
|
||||
|
||||
|
||||
@app.get("/api/ops/pipeline/toggle")
|
||||
async def get_pipeline_toggle():
|
||||
"""Get the current pipeline enabled/disabled state."""
|
||||
val = await rds.get(_PIPELINE_ENABLED_KEY)
|
||||
# Default to enabled if key doesn't exist
|
||||
enabled = val != "0"
|
||||
return {"pipeline_enabled": enabled}
|
||||
|
||||
|
||||
@app.post("/api/ops/pipeline/toggle")
|
||||
async def set_pipeline_toggle(body: dict[str, Any]):
|
||||
"""Toggle the pipeline on or off.
|
||||
|
||||
Accepts: { "enabled": true/false }
|
||||
Workers check this flag before processing jobs.
|
||||
"""
|
||||
enabled = body.get("enabled", True)
|
||||
await rds.set(_PIPELINE_ENABLED_KEY, "1" if enabled else "0")
|
||||
return {"pipeline_enabled": enabled, "message": f"Pipeline {'enabled' if enabled else 'disabled'}"}
|
||||
|
||||
|
||||
@app.get("/api/ops/sources/coverage-gaps")
|
||||
async def get_source_coverage_gaps():
|
||||
"""Identify symbols with missing or insufficient source coverage.
|
||||
|
||||
@@ -19,6 +19,7 @@ from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import (
|
||||
PREFIX,
|
||||
QUEUE_EXTRACTION,
|
||||
QUEUE_INGESTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
@@ -499,12 +500,19 @@ async def main() -> None:
|
||||
rds = get_redis(config)
|
||||
|
||||
logger.info("Scheduler started (tick=%ds)", SCHEDULER_TICK)
|
||||
pipeline_key = f"{PREFIX}:pipeline:enabled"
|
||||
recovery_counter = 0
|
||||
retry_counter = 0
|
||||
cleanup_counter = 0
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
# Check pipeline toggle — skip cycle if disabled
|
||||
flag = await rds.get(pipeline_key)
|
||||
if flag == "0":
|
||||
await asyncio.sleep(SCHEDULER_TICK)
|
||||
continue
|
||||
|
||||
if await acquire_lock(rds, "scheduler_cycle", ttl=30):
|
||||
try:
|
||||
await schedule_cycle(pool, rds)
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
"""Database migration runner using asyncpg.
|
||||
|
||||
Applies all SQL migration files from infra/migrations/ in sorted order.
|
||||
Each file is split on semicolons and executed statement-by-statement.
|
||||
Idempotent — migrations use IF NOT EXISTS / CREATE OR REPLACE patterns.
|
||||
|
||||
Usage:
|
||||
python -m services.shared.migrate
|
||||
"""
|
||||
import asyncio
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger("migrate")
|
||||
|
||||
|
||||
async def run_migrations() -> None:
|
||||
host = os.getenv("POSTGRES_HOST", "localhost")
|
||||
port = int(os.getenv("POSTGRES_PORT", "5432"))
|
||||
user = os.getenv("POSTGRES_USER", "stonks")
|
||||
password = os.getenv("POSTGRES_PASSWORD", "")
|
||||
database = os.getenv("POSTGRES_DB", "stonks")
|
||||
|
||||
migrations_dir = os.path.join(
|
||||
os.path.dirname(__file__), "..", "..", "infra", "migrations"
|
||||
)
|
||||
migrations_dir = os.path.normpath(migrations_dir)
|
||||
|
||||
if not os.path.isdir(migrations_dir):
|
||||
logger.error("Migrations directory not found: %s", migrations_dir)
|
||||
sys.exit(1)
|
||||
|
||||
files = sorted(glob.glob(os.path.join(migrations_dir, "*.sql")))
|
||||
if not files:
|
||||
logger.warning("No migration files found in %s", migrations_dir)
|
||||
return
|
||||
|
||||
logger.info("Connecting to %s@%s:%d/%s", user, host, port, database)
|
||||
conn = await asyncpg.connect(
|
||||
host=host, port=port, user=user, password=password, database=database
|
||||
)
|
||||
|
||||
try:
|
||||
for path in files:
|
||||
name = os.path.basename(path)
|
||||
with open(path) as f:
|
||||
sql = f.read()
|
||||
# Split on semicolons and execute each statement individually.
|
||||
# asyncpg.execute() doesn't support multi-statement strings.
|
||||
statements = [s.strip() for s in sql.split(";") if s.strip()]
|
||||
try:
|
||||
for stmt in statements:
|
||||
await conn.execute(stmt)
|
||||
logger.info(" ✓ %s (%d statements)", name, len(statements))
|
||||
except Exception as exc:
|
||||
logger.warning(" ⚠ %s: %s", name, exc)
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
logger.info("Migrations complete (%d files)", len(files))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(name)s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
asyncio.run(run_migrations())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,376 @@
|
||||
"""Integration tests for cross-service signal flow contracts.
|
||||
|
||||
These tests validate the end-to-end data flow that the trading engine
|
||||
depends on. They catch schema drift and contract violations between
|
||||
services that unit tests miss.
|
||||
|
||||
Flow under test:
|
||||
1. Symbol Registry has companies with exposure profiles and competitors
|
||||
2. Query API returns trends with correct schema for trading engine consumption
|
||||
3. Risk engine evaluates orders using data from query API
|
||||
4. Trading engine receives valid recommendation payloads
|
||||
|
||||
These are the "beta gate" tests — if any fail, promotion to paper is blocked.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contract: Symbol Registry → Query API company data consistency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRegistryToQueryContract:
|
||||
"""Verify that company data in the registry matches what query API exposes."""
|
||||
|
||||
async def test_company_ids_consistent(self, registry_client, query_client, seed_ids):
|
||||
"""Company IDs from registry match those returned by query API."""
|
||||
reg_resp = await registry_client.get("/companies")
|
||||
assert reg_resp.status_code == 200
|
||||
reg_companies = {c["id"]: c["ticker"] for c in reg_resp.json()}
|
||||
|
||||
query_resp = await query_client.get("/api/companies")
|
||||
assert query_resp.status_code == 200
|
||||
query_companies = {c["id"]: c["ticker"] for c in query_resp.json()}
|
||||
|
||||
# Every company in registry should appear in query API
|
||||
for cid, ticker in reg_companies.items():
|
||||
assert cid in query_companies, (
|
||||
f"Company {ticker} ({cid}) in registry but missing from query API"
|
||||
)
|
||||
assert query_companies[cid] == ticker
|
||||
|
||||
async def test_exposure_profiles_accessible(self, registry_client, seed_ids):
|
||||
"""Exposure profiles required by macro signal layer are accessible."""
|
||||
company_id = seed_ids["companies"]["AAPL"]
|
||||
resp = await registry_client.get(f"/companies/{company_id}/exposure")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
# Trading engine needs these fields for macro impact scoring
|
||||
assert "geographic_revenue_mix" in data
|
||||
assert "supply_chain_regions" in data
|
||||
assert "key_input_commodities" in data
|
||||
assert "market_position_tier" in data
|
||||
assert "export_dependency_pct" in data
|
||||
# Values must be valid types
|
||||
assert isinstance(data["geographic_revenue_mix"], dict)
|
||||
assert isinstance(data["supply_chain_regions"], list)
|
||||
assert isinstance(data["export_dependency_pct"], (int, float))
|
||||
assert 0 <= data["export_dependency_pct"] <= 1
|
||||
|
||||
async def test_competitor_relationships_bidirectional(self, registry_client, seed_ids):
|
||||
"""Competitor relationships are queryable from both sides."""
|
||||
aapl_id = seed_ids["companies"]["AAPL"]
|
||||
msft_id = seed_ids["companies"]["MSFT"]
|
||||
|
||||
# Query from AAPL side
|
||||
resp_a = await registry_client.get(f"/companies/{aapl_id}/competitors")
|
||||
assert resp_a.status_code == 200
|
||||
aapl_competitors = resp_a.json()
|
||||
|
||||
# Query from MSFT side
|
||||
resp_b = await registry_client.get(f"/companies/{msft_id}/competitors")
|
||||
assert resp_b.status_code == 200
|
||||
msft_competitors = resp_b.json()
|
||||
|
||||
# AAPL should see MSFT and vice versa
|
||||
aapl_partner_ids = set()
|
||||
for rel in aapl_competitors:
|
||||
if rel.get("company_a_id") == aapl_id:
|
||||
aapl_partner_ids.add(rel["company_b_id"])
|
||||
else:
|
||||
aapl_partner_ids.add(rel["company_a_id"])
|
||||
|
||||
msft_partner_ids = set()
|
||||
for rel in msft_competitors:
|
||||
if rel.get("company_a_id") == msft_id:
|
||||
msft_partner_ids.add(rel["company_b_id"])
|
||||
else:
|
||||
msft_partner_ids.add(rel["company_a_id"])
|
||||
|
||||
assert msft_id in aapl_partner_ids, "MSFT not in AAPL's competitors"
|
||||
assert aapl_id in msft_partner_ids, "AAPL not in MSFT's competitors"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contract: Query API → Trading Engine trend data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTrendToTradingContract:
|
||||
"""Verify trend data has the schema the trading engine expects."""
|
||||
|
||||
async def test_trend_has_required_trading_fields(self, query_client, seed_ids):
|
||||
"""Trends must include fields the trading engine uses for decisions."""
|
||||
resp = await query_client.get("/api/trends")
|
||||
assert resp.status_code == 200
|
||||
trends = resp.json()
|
||||
assert len(trends) >= 1
|
||||
|
||||
for trend in trends:
|
||||
# Fields the trading engine reads
|
||||
assert "id" in trend
|
||||
assert "trend_direction" in trend
|
||||
assert "confidence" in trend
|
||||
assert "trend_strength" in trend
|
||||
# Direction must be a valid enum value
|
||||
assert trend["trend_direction"] in (
|
||||
"bullish", "bearish", "mixed", "neutral",
|
||||
), f"Invalid direction: {trend['trend_direction']}"
|
||||
# Confidence and strength must be normalized [0, 1]
|
||||
assert 0 <= trend["confidence"] <= 1, (
|
||||
f"Confidence out of range: {trend['confidence']}"
|
||||
)
|
||||
assert 0 <= trend["trend_strength"] <= 1, (
|
||||
f"Strength out of range: {trend['trend_strength']}"
|
||||
)
|
||||
|
||||
async def test_trend_detail_has_evidence(self, query_client, seed_ids):
|
||||
"""Individual trend detail includes evidence the trading engine logs."""
|
||||
trend_id = seed_ids["trends"]["TREND_01"]
|
||||
resp = await query_client.get(f"/api/trends/{trend_id}")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
# Trading engine logs these for audit trail
|
||||
assert "top_supporting_evidence" in data
|
||||
assert "top_opposing_evidence" in data
|
||||
assert "dominant_catalysts" in data
|
||||
assert isinstance(data["top_supporting_evidence"], list)
|
||||
assert isinstance(data["top_opposing_evidence"], list)
|
||||
assert isinstance(data["dominant_catalysts"], list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contract: Recommendation → Risk Engine → Trading Engine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRecommendationToRiskContract:
|
||||
"""Verify recommendations produce valid risk evaluation inputs."""
|
||||
|
||||
async def test_recommendation_has_risk_fields(self, query_client, seed_ids):
|
||||
"""Recommendations include fields needed for risk evaluation."""
|
||||
resp = await query_client.get(
|
||||
"/api/recommendations", params={"latest": "false"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
recs = resp.json()
|
||||
assert len(recs) >= 1
|
||||
|
||||
for rec in recs:
|
||||
assert "ticker" in rec
|
||||
assert "action" in rec
|
||||
assert "confidence" in rec
|
||||
assert "mode" in rec
|
||||
# Action must be valid
|
||||
assert rec["action"] in ("buy", "sell", "hold", "watch")
|
||||
# Mode determines if it reaches trading engine
|
||||
assert rec["mode"] in (
|
||||
"informational", "paper_eligible", "live_eligible",
|
||||
)
|
||||
# Confidence must be normalized
|
||||
assert 0 <= rec["confidence"] <= 1
|
||||
|
||||
async def test_risk_evaluation_schema(self, risk_client):
|
||||
"""Risk engine returns evaluation with all fields trading engine needs."""
|
||||
payload = {
|
||||
"order": {
|
||||
"ticker": "AAPL",
|
||||
"action": "buy",
|
||||
"quantity": 5,
|
||||
"estimated_value": 925.0,
|
||||
"confidence": 0.75,
|
||||
"recommendation_id": None,
|
||||
"sector": "Technology",
|
||||
},
|
||||
}
|
||||
resp = await risk_client.post("/evaluate", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
# Trading engine reads these fields from risk evaluation
|
||||
assert "evaluation_id" in data
|
||||
assert "ticker" in data
|
||||
assert "eligible" in data
|
||||
assert "rejection_reasons" in data
|
||||
assert "checks" in data
|
||||
assert "evaluated_at" in data
|
||||
# Types
|
||||
assert isinstance(data["eligible"], bool)
|
||||
assert isinstance(data["rejection_reasons"], list)
|
||||
assert isinstance(data["checks"], list)
|
||||
# Each check should have name and passed
|
||||
for check in data["checks"]:
|
||||
assert "name" in check
|
||||
assert "passed" in check
|
||||
assert isinstance(check["passed"], bool)
|
||||
|
||||
async def test_risk_rejects_oversized_order(self, risk_client):
|
||||
"""Risk engine correctly rejects an order exceeding position cap."""
|
||||
payload = {
|
||||
"order": {
|
||||
"ticker": "AAPL",
|
||||
"action": "buy",
|
||||
"quantity": 1000,
|
||||
"estimated_value": 185000.0,
|
||||
"confidence": 0.9,
|
||||
"recommendation_id": None,
|
||||
"sector": "Technology",
|
||||
},
|
||||
"config": {
|
||||
"absolute_position_cap": 10000.0,
|
||||
},
|
||||
}
|
||||
resp = await risk_client.post("/evaluate", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
# Should be rejected due to position cap
|
||||
assert data["eligible"] is False
|
||||
assert len(data["rejection_reasons"]) > 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contract: Trading Engine state consistency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTradingEngineState:
|
||||
"""Verify trading engine exposes consistent state for the promotion gate."""
|
||||
|
||||
async def test_status_reflects_config(self, trading_client):
|
||||
"""Engine status fields are consistent with each other."""
|
||||
resp = await trading_client.get("/api/trading/status")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
# If paused, open_positions should still be reported
|
||||
assert "open_positions" in data
|
||||
assert isinstance(data["open_positions"], int)
|
||||
assert data["open_positions"] >= 0
|
||||
|
||||
# Risk tier must be valid
|
||||
assert data["risk_tier"] in ("conservative", "moderate", "aggressive")
|
||||
|
||||
# Pool values must be non-negative
|
||||
assert data["active_pool"] >= 0
|
||||
assert data["reserve_pool"] >= 0
|
||||
|
||||
async def test_decisions_have_audit_fields(self, trading_client, seed_ids):
|
||||
"""Trading decisions include full audit trail fields."""
|
||||
resp = await trading_client.get("/api/trading/decisions")
|
||||
assert resp.status_code == 200
|
||||
decisions = resp.json()
|
||||
|
||||
if len(decisions) > 0:
|
||||
d = decisions[0]
|
||||
assert "id" in d
|
||||
assert "decision" in d
|
||||
assert "ticker" in d
|
||||
assert "created_at" in d
|
||||
# Decision type must be valid
|
||||
assert d["decision"] in ("act", "skip")
|
||||
|
||||
async def test_metrics_numeric_consistency(self, trading_client):
|
||||
"""Portfolio metrics are all numeric and internally consistent."""
|
||||
resp = await trading_client.get("/api/trading/metrics")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
# All values must be numeric
|
||||
numeric_fields = [
|
||||
"total_portfolio_value", "active_pool", "reserve_pool",
|
||||
"unrealized_pnl", "realized_pnl", "daily_pnl",
|
||||
"win_rate", "sharpe_ratio", "max_drawdown", "portfolio_heat",
|
||||
]
|
||||
for field in numeric_fields:
|
||||
assert field in data, f"Missing field: {field}"
|
||||
assert isinstance(data[field], (int, float)), (
|
||||
f"{field} should be numeric, got {type(data[field])}"
|
||||
)
|
||||
|
||||
# Win rate and portfolio heat should be bounded
|
||||
assert 0 <= data["win_rate"] <= 1 or data["win_rate"] == 0
|
||||
assert 0 <= data["portfolio_heat"] <= 1 or data["portfolio_heat"] == 0
|
||||
|
||||
# Total portfolio = active + reserve + unrealized (approximately)
|
||||
# Allow some tolerance for rounding
|
||||
expected_total = data["active_pool"] + data["reserve_pool"] + data["unrealized_pnl"]
|
||||
if data["total_portfolio_value"] > 0:
|
||||
diff = abs(data["total_portfolio_value"] - expected_total)
|
||||
assert diff < data["total_portfolio_value"] * 0.1, (
|
||||
f"Portfolio value inconsistency: total={data['total_portfolio_value']}, "
|
||||
f"active+reserve+unrealized={expected_total}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contract: Approval workflow integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApprovalWorkflowContract:
|
||||
"""Verify the approval workflow is accessible and returns valid schemas."""
|
||||
|
||||
async def test_pending_approvals_schema(self, risk_client):
|
||||
"""Pending approvals list returns valid schema (may be empty)."""
|
||||
resp = await risk_client.get("/approvals/pending")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
# If there are pending approvals, validate schema
|
||||
for approval in data:
|
||||
assert "id" in approval
|
||||
assert "status" in approval
|
||||
assert "ticker" in approval
|
||||
assert "side" in approval
|
||||
assert "quantity" in approval
|
||||
assert "created_at" in approval
|
||||
|
||||
async def test_approval_not_found_returns_404(self, risk_client):
|
||||
"""Non-existent approval ID returns 404, not 500."""
|
||||
fake_id = "00000000-0000-4000-ffff-ffffffffffff"
|
||||
resp = await risk_client.get(f"/approvals/{fake_id}")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contract: Cross-service health (all services must be up for paper trading)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCrossServiceHealth:
|
||||
"""All services must be healthy before promotion to paper trading."""
|
||||
|
||||
async def test_all_services_healthy(
|
||||
self, query_client, registry_client, risk_client, trading_client,
|
||||
):
|
||||
"""Every service responds to health check."""
|
||||
services = {
|
||||
"query-api": query_client,
|
||||
"symbol-registry": registry_client,
|
||||
"risk-engine": risk_client,
|
||||
"trading-engine": trading_client,
|
||||
}
|
||||
for name, client in services.items():
|
||||
resp = await client.get("/health")
|
||||
assert resp.status_code == 200, (
|
||||
f"{name} health check failed with status {resp.status_code}"
|
||||
)
|
||||
data = resp.json()
|
||||
assert data.get("status") == "ok", (
|
||||
f"{name} reported unhealthy: {data}"
|
||||
)
|
||||
|
||||
async def test_trading_engine_ready(self, trading_client):
|
||||
"""Trading engine readiness probe passes (DB + Redis connected)."""
|
||||
resp = await trading_client.get("/ready")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["ready"] is True, (
|
||||
f"Trading engine not ready: {data}"
|
||||
)
|
||||
Reference in New Issue
Block a user