phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,317 @@
|
||||
"""Prometheus metrics for all Stonks Oracle pipeline stages.
|
||||
|
||||
Provides counters, histograms, and gauges covering:
|
||||
- Ingestion: items fetched, new items, errors, adapter latency
|
||||
- Parsing: documents parsed, quality scores, low-quality flags
|
||||
- Extraction: attempts, successes, failures, latency, confidence, retries
|
||||
- Aggregation: trend windows computed, signal counts, contradiction scores
|
||||
- Lake publication: facts published per table, write latency
|
||||
- Trading: orders submitted, rejected, filled, risk evaluations
|
||||
|
||||
Requirements: 12.1, 12.2
|
||||
Design: Section 12 (Observability and Operations)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from prometheus_client import Counter, Gauge, Histogram, Info
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Service info
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SERVICE_INFO = Info("stonks_oracle", "Stonks Oracle service metadata")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ingestion metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INGESTION_JOBS_TOTAL = Counter(
|
||||
"stonks_ingestion_jobs_total",
|
||||
"Total ingestion jobs processed",
|
||||
["source_type", "status"],
|
||||
)
|
||||
|
||||
INGESTION_ITEMS_FETCHED = Counter(
|
||||
"stonks_ingestion_items_fetched_total",
|
||||
"Total items fetched from external sources",
|
||||
["source_type"],
|
||||
)
|
||||
|
||||
INGESTION_ITEMS_NEW = Counter(
|
||||
"stonks_ingestion_items_new_total",
|
||||
"New (non-duplicate) items ingested",
|
||||
["source_type"],
|
||||
)
|
||||
|
||||
INGESTION_ITEMS_DEDUPED = Counter(
|
||||
"stonks_ingestion_items_deduped_total",
|
||||
"Items skipped due to deduplication",
|
||||
["source_type"],
|
||||
)
|
||||
|
||||
INGESTION_ERRORS = Counter(
|
||||
"stonks_ingestion_errors_total",
|
||||
"Ingestion errors by source type",
|
||||
["source_type"],
|
||||
)
|
||||
|
||||
INGESTION_ADAPTER_DURATION = Histogram(
|
||||
"stonks_ingestion_adapter_duration_seconds",
|
||||
"Adapter fetch latency in seconds",
|
||||
["source_type"],
|
||||
buckets=(0.1, 0.5, 1, 2, 5, 10, 30, 60),
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parsing metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PARSE_JOBS_TOTAL = Counter(
|
||||
"stonks_parse_jobs_total",
|
||||
"Total parse jobs processed",
|
||||
["status"],
|
||||
)
|
||||
|
||||
PARSE_QUALITY_SCORE = Histogram(
|
||||
"stonks_parse_quality_score",
|
||||
"Distribution of parser quality scores",
|
||||
buckets=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
|
||||
)
|
||||
|
||||
PARSE_LOW_QUALITY_TOTAL = Counter(
|
||||
"stonks_parse_low_quality_total",
|
||||
"Documents flagged as low quality by the parser",
|
||||
)
|
||||
|
||||
PARSE_DURATION = Histogram(
|
||||
"stonks_parse_duration_seconds",
|
||||
"Parse job duration in seconds",
|
||||
buckets=(0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10),
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extraction metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
EXTRACTION_JOBS_TOTAL = Counter(
|
||||
"stonks_extraction_jobs_total",
|
||||
"Total extraction jobs processed",
|
||||
["status"],
|
||||
)
|
||||
|
||||
EXTRACTION_ATTEMPTS = Counter(
|
||||
"stonks_extraction_attempts_total",
|
||||
"Total Ollama extraction attempts (including retries)",
|
||||
)
|
||||
|
||||
EXTRACTION_RETRIES = Counter(
|
||||
"stonks_extraction_retries_total",
|
||||
"Extraction retry count",
|
||||
)
|
||||
|
||||
EXTRACTION_DURATION = Histogram(
|
||||
"stonks_extraction_duration_seconds",
|
||||
"Extraction total duration in seconds",
|
||||
buckets=(1, 2, 5, 10, 20, 30, 60, 120),
|
||||
)
|
||||
|
||||
EXTRACTION_CONFIDENCE = Histogram(
|
||||
"stonks_extraction_confidence",
|
||||
"Distribution of extraction confidence scores",
|
||||
buckets=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
|
||||
)
|
||||
|
||||
EXTRACTION_VALIDATION_ERRORS = Counter(
|
||||
"stonks_extraction_validation_errors_total",
|
||||
"Total validation errors across extractions",
|
||||
)
|
||||
|
||||
EXTRACTION_TOKEN_ESTIMATE = Counter(
|
||||
"stonks_extraction_tokens_total",
|
||||
"Estimated token usage",
|
||||
["direction"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Aggregation metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
AGGREGATION_WINDOWS_COMPUTED = Counter(
|
||||
"stonks_aggregation_windows_total",
|
||||
"Trend windows computed",
|
||||
["window"],
|
||||
)
|
||||
|
||||
AGGREGATION_SIGNALS_PROCESSED = Counter(
|
||||
"stonks_aggregation_signals_total",
|
||||
"Signals processed during aggregation",
|
||||
["window"],
|
||||
)
|
||||
|
||||
AGGREGATION_CONTRADICTION_SCORE = Histogram(
|
||||
"stonks_aggregation_contradiction_score",
|
||||
"Distribution of contradiction scores in trend windows",
|
||||
buckets=(0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0),
|
||||
)
|
||||
|
||||
AGGREGATION_DURATION = Histogram(
|
||||
"stonks_aggregation_duration_seconds",
|
||||
"Aggregation job duration in seconds",
|
||||
["window"],
|
||||
buckets=(0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10),
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recommendation metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RECOMMENDATION_GENERATED = Counter(
|
||||
"stonks_recommendations_total",
|
||||
"Recommendations generated",
|
||||
["action", "mode"],
|
||||
)
|
||||
|
||||
RECOMMENDATION_SUPPRESSED = Counter(
|
||||
"stonks_recommendations_suppressed_total",
|
||||
"Recommendations suppressed due to low data quality",
|
||||
)
|
||||
|
||||
RECOMMENDATION_CONFIDENCE = Histogram(
|
||||
"stonks_recommendation_confidence",
|
||||
"Distribution of recommendation confidence scores",
|
||||
buckets=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lake publication metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
LAKE_FACTS_PUBLISHED = Counter(
|
||||
"stonks_lake_facts_published_total",
|
||||
"Analytical facts published to the lakehouse",
|
||||
["table_name"],
|
||||
)
|
||||
|
||||
LAKE_PUBLISH_DURATION = Histogram(
|
||||
"stonks_lake_publish_duration_seconds",
|
||||
"Lake publication write latency in seconds",
|
||||
["table_name"],
|
||||
buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5),
|
||||
)
|
||||
|
||||
LAKE_PUBLISH_ERRORS = Counter(
|
||||
"stonks_lake_publish_errors_total",
|
||||
"Lake publication errors",
|
||||
["table_name"],
|
||||
)
|
||||
|
||||
LAKE_PUBLISH_BYTES = Counter(
|
||||
"stonks_lake_publish_bytes_total",
|
||||
"Total bytes written to the lakehouse",
|
||||
["table_name"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trading / broker metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ORDERS_SUBMITTED = Counter(
|
||||
"stonks_orders_submitted_total",
|
||||
"Orders submitted to broker",
|
||||
["side", "order_type", "mode"],
|
||||
)
|
||||
|
||||
ORDERS_REJECTED = Counter(
|
||||
"stonks_orders_rejected_total",
|
||||
"Orders rejected before broker submission",
|
||||
["reason_category"],
|
||||
)
|
||||
|
||||
ORDERS_FILLED = Counter(
|
||||
"stonks_orders_filled_total",
|
||||
"Orders filled by broker",
|
||||
["side"],
|
||||
)
|
||||
|
||||
ORDERS_DUPLICATES_PREVENTED = Counter(
|
||||
"stonks_orders_duplicates_prevented_total",
|
||||
"Duplicate orders prevented by idempotency checks",
|
||||
["detected_via"],
|
||||
)
|
||||
|
||||
RISK_EVALUATIONS_TOTAL = Counter(
|
||||
"stonks_risk_evaluations_total",
|
||||
"Risk evaluations performed",
|
||||
["result"],
|
||||
)
|
||||
|
||||
RISK_CHECK_FAILURES = Counter(
|
||||
"stonks_risk_check_failures_total",
|
||||
"Individual risk check failures",
|
||||
["check_name"],
|
||||
)
|
||||
|
||||
POSITIONS_SYNCED = Counter(
|
||||
"stonks_positions_synced_total",
|
||||
"Position sync operations completed",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active gauges
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ACTIVE_JOBS = Gauge(
|
||||
"stonks_active_jobs",
|
||||
"Currently processing jobs by stage",
|
||||
["stage"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Alerting metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ALERTS_FIRED = Counter(
|
||||
"stonks_alerts_fired_total",
|
||||
"Total alerts fired by rule",
|
||||
["rule", "severity"],
|
||||
)
|
||||
|
||||
ALERTS_RESOLVED = Counter(
|
||||
"stonks_alerts_resolved_total",
|
||||
"Total alerts resolved by rule",
|
||||
["rule"],
|
||||
)
|
||||
|
||||
ALERT_CHECK_DURATION = Histogram(
|
||||
"stonks_alert_check_duration_seconds",
|
||||
"Duration of alert evaluation cycle",
|
||||
buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5),
|
||||
)
|
||||
|
||||
ALERT_ACTIVE = Gauge(
|
||||
"stonks_alert_active",
|
||||
"Whether an alert rule is currently firing (1) or resolved (0)",
|
||||
["rule"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dead-letter queue metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DLQ_ITEMS_TOTAL = Counter(
|
||||
"stonks_dlq_items_total",
|
||||
"Jobs sent to dead-letter queues",
|
||||
["queue"],
|
||||
)
|
||||
|
||||
DLQ_REPLAYED_TOTAL = Counter(
|
||||
"stonks_dlq_replayed_total",
|
||||
"Jobs replayed from dead-letter queues",
|
||||
["queue"],
|
||||
)
|
||||
|
||||
DLQ_DEPTH = Gauge(
|
||||
"stonks_dlq_depth",
|
||||
"Current dead-letter queue depth",
|
||||
["queue"],
|
||||
)
|
||||
Reference in New Issue
Block a user