fix: pipeline stop now halts all workers and flushes queues
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
Workers (ingestion, parser, extractor, aggregation, recommendation, broker, lake-publisher) now check the pipeline:enabled Redis flag on each loop iteration and sleep when disabled. The toggle endpoint flushes all pipeline queues on disable so queued jobs don't resume when workers eventually check. Broker/trading queues are excluded from flush to avoid dropping in-flight orders.
This commit is contained in:
@@ -76,7 +76,7 @@ from services.shared.metrics import (
|
||||
RISK_CHECK_FAILURES,
|
||||
RISK_EVALUATIONS_TOTAL,
|
||||
)
|
||||
from services.shared.redis_keys import QUEUE_BROKER, queue_key
|
||||
from services.shared.redis_keys import QUEUE_BROKER, is_pipeline_enabled, queue_key
|
||||
|
||||
logger = logging.getLogger("broker_service")
|
||||
|
||||
@@ -923,6 +923,9 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
result = await rds.lpop(queue)
|
||||
raw = str(result) if result else None
|
||||
if raw:
|
||||
|
||||
@@ -23,6 +23,7 @@ from services.shared.logging import inject_trace_context, setup_logging
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_AGGREGATION,
|
||||
QUEUE_RECOMMENDATION,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
|
||||
@@ -134,6 +135,10 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(redis_client):
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
raw = await redis_client.lpop(queue)
|
||||
if raw is None:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
+26
-3
@@ -41,7 +41,7 @@ from services.shared.audit import get_entity_audit_trail, get_order_audit_trail,
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
||||
from services.shared.redis_keys import PREFIX, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||
from services.shared.redis_keys import PIPELINE_ENABLED_KEY, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
||||
|
||||
logger = logging.getLogger("query_api")
|
||||
@@ -1948,7 +1948,7 @@ async def retry_failed_extractions_endpoint():
|
||||
# Pipeline On/Off Toggle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PIPELINE_ENABLED_KEY = f"{PREFIX}:pipeline:enabled"
|
||||
_PIPELINE_ENABLED_KEY = PIPELINE_ENABLED_KEY
|
||||
|
||||
|
||||
@app.get("/api/ops/pipeline/toggle")
|
||||
@@ -1966,10 +1966,33 @@ async def set_pipeline_toggle(body: dict[str, Any]):
|
||||
|
||||
Accepts: { "enabled": true/false }
|
||||
Workers check this flag before processing jobs.
|
||||
When disabling, optionally flush all pipeline queues so in-flight
|
||||
work stops immediately.
|
||||
"""
|
||||
enabled = body.get("enabled", True)
|
||||
flush = body.get("flush", not enabled) # default: flush when disabling
|
||||
await rds.set(_PIPELINE_ENABLED_KEY, "1" if enabled else "0")
|
||||
return {"pipeline_enabled": enabled, "message": f"Pipeline {'enabled' if enabled else 'disabled'}"}
|
||||
|
||||
flushed_counts: dict[str, int] = {}
|
||||
if flush and not enabled:
|
||||
from services.shared.redis_keys import QUEUE_PREFIX
|
||||
# Flush all pipeline queues
|
||||
queue_names = [
|
||||
"ingestion", "parsing", "extraction", "macro_classification",
|
||||
"aggregation", "recommendation", "lake_publish",
|
||||
]
|
||||
for qname in queue_names:
|
||||
qkey = f"{QUEUE_PREFIX}:{qname}"
|
||||
count = await rds.llen(qkey)
|
||||
if count > 0:
|
||||
await rds.delete(qkey)
|
||||
flushed_counts[qname] = count
|
||||
|
||||
msg = f"Pipeline {'enabled' if enabled else 'disabled'}"
|
||||
if flushed_counts:
|
||||
total = sum(flushed_counts.values())
|
||||
msg += f" — flushed {total} queued jobs"
|
||||
return {"pipeline_enabled": enabled, "flushed": flushed_counts, "message": msg}
|
||||
|
||||
|
||||
@app.get("/api/ops/sources/coverage-gaps")
|
||||
|
||||
@@ -27,6 +27,7 @@ from services.shared.redis_keys import (
|
||||
QUEUE_AGGREGATION,
|
||||
QUEUE_EXTRACTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
|
||||
@@ -421,6 +422,10 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(redis_client):
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
# Alternate: every 3rd job from macro queue, rest from extraction
|
||||
# This prevents macro events from starving regular extractions
|
||||
raw = None
|
||||
|
||||
@@ -41,6 +41,7 @@ from services.shared.redis_keys import (
|
||||
QUEUE_INGESTION,
|
||||
QUEUE_PARSING,
|
||||
dedupe_key,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
from services.shared.storage import (
|
||||
@@ -265,6 +266,9 @@ async def main():
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
raw = await rds.lpop(queue)
|
||||
if raw:
|
||||
job = json.loads(raw)
|
||||
|
||||
@@ -54,7 +54,7 @@ from services.lake_publisher.worker import (
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_minio, get_pg_pool, get_redis
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import QUEUE_LAKE_PUBLISH, queue_key
|
||||
from services.shared.redis_keys import QUEUE_LAKE_PUBLISH, is_pipeline_enabled, queue_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -865,6 +865,9 @@ async def run_worker(
|
||||
logger.info("Lake publisher worker started, listening on %s", queue)
|
||||
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(poll_interval)
|
||||
continue
|
||||
raw = await rds.lpop(queue) # type: ignore[misc]
|
||||
if raw is None:
|
||||
await asyncio.sleep(poll_interval)
|
||||
|
||||
@@ -35,7 +35,13 @@ from services.shared.metrics import (
|
||||
PARSE_LOW_QUALITY_TOTAL,
|
||||
PARSE_QUALITY_SCORE,
|
||||
)
|
||||
from services.shared.redis_keys import QUEUE_EXTRACTION, QUEUE_MACRO_CLASSIFICATION, QUEUE_PARSING, queue_key
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_EXTRACTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
QUEUE_PARSING,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
from services.shared.storage import upload_normalized_text, upload_parser_output
|
||||
|
||||
logger = logging.getLogger("parser_worker")
|
||||
@@ -260,6 +266,9 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
raw = await rds.lpop(queue)
|
||||
if raw:
|
||||
job = json.loads(raw)
|
||||
|
||||
@@ -12,7 +12,7 @@ from services.recommendation.worker import generate_recommendation
|
||||
from services.shared.agent_config import AgentConfigResolver
|
||||
from services.shared.config import OllamaConfig, load_config
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import QUEUE_RECOMMENDATION, queue_key
|
||||
from services.shared.redis_keys import QUEUE_RECOMMENDATION, is_pipeline_enabled, queue_key
|
||||
|
||||
logger = logging.getLogger("recommendation_main")
|
||||
|
||||
@@ -62,6 +62,10 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(redis_client):
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
raw = await redis_client.lpop(queue)
|
||||
if raw is None:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@@ -20,7 +20,7 @@ from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import (
|
||||
PREFIX,
|
||||
PIPELINE_ENABLED_KEY,
|
||||
QUEUE_EXTRACTION,
|
||||
QUEUE_INGESTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
@@ -501,7 +501,7 @@ async def main() -> None:
|
||||
rds = get_redis(config)
|
||||
|
||||
logger.info("Scheduler started (tick=%ds)", SCHEDULER_TICK)
|
||||
pipeline_key = f"{PREFIX}:pipeline:enabled"
|
||||
pipeline_key = PIPELINE_ENABLED_KEY
|
||||
|
||||
# If PIPELINE_DEFAULT_OFF is set, initialize the toggle to OFF on first boot
|
||||
# (only if the key doesn't already exist — preserves manual overrides)
|
||||
|
||||
@@ -89,3 +89,18 @@ def trading_cb_key(trigger_type: str) -> str:
|
||||
def trading_notification_rate_key(channel: str) -> str:
|
||||
"""Return the notification rate-limit key for a given channel."""
|
||||
return f"{TRADING_NOTIFICATION_RATE}:{channel}"
|
||||
|
||||
|
||||
# --- Pipeline toggle ---
|
||||
PIPELINE_ENABLED_KEY = f"{PREFIX}:pipeline:enabled"
|
||||
|
||||
|
||||
async def is_pipeline_enabled(rds: "redis.asyncio.Redis") -> bool: # type: ignore[name-defined] # noqa: F821
|
||||
"""Check whether the pipeline is enabled via the Redis toggle.
|
||||
|
||||
Returns True (enabled) when the key is absent or set to anything
|
||||
other than ``"0"``. Workers should call this at the top of each
|
||||
loop iteration and sleep when it returns False.
|
||||
"""
|
||||
val = await rds.get(PIPELINE_ENABLED_KEY)
|
||||
return val != "0"
|
||||
|
||||
Reference in New Issue
Block a user