fix: pipeline stop now halts all workers and flushes queues
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
Workers (ingestion, parser, extractor, aggregation, recommendation, broker, lake-publisher) now check the pipeline:enabled Redis flag on each loop iteration and sleep when disabled. The toggle endpoint flushes all pipeline queues on disable so queued jobs don't resume when workers eventually check. Broker/trading queues are excluded from flush to avoid dropping in-flight orders.
This commit is contained in:
@@ -76,7 +76,7 @@ from services.shared.metrics import (
|
|||||||
RISK_CHECK_FAILURES,
|
RISK_CHECK_FAILURES,
|
||||||
RISK_EVALUATIONS_TOTAL,
|
RISK_EVALUATIONS_TOTAL,
|
||||||
)
|
)
|
||||||
from services.shared.redis_keys import QUEUE_BROKER, queue_key
|
from services.shared.redis_keys import QUEUE_BROKER, is_pipeline_enabled, queue_key
|
||||||
|
|
||||||
logger = logging.getLogger("broker_service")
|
logger = logging.getLogger("broker_service")
|
||||||
|
|
||||||
@@ -923,6 +923,9 @@ async def main() -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(rds):
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
continue
|
||||||
result = await rds.lpop(queue)
|
result = await rds.lpop(queue)
|
||||||
raw = str(result) if result else None
|
raw = str(result) if result else None
|
||||||
if raw:
|
if raw:
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from services.shared.logging import inject_trace_context, setup_logging
|
|||||||
from services.shared.redis_keys import (
|
from services.shared.redis_keys import (
|
||||||
QUEUE_AGGREGATION,
|
QUEUE_AGGREGATION,
|
||||||
QUEUE_RECOMMENDATION,
|
QUEUE_RECOMMENDATION,
|
||||||
|
is_pipeline_enabled,
|
||||||
queue_key,
|
queue_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -134,6 +135,10 @@ async def main() -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(redis_client):
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
raw = await redis_client.lpop(queue)
|
raw = await redis_client.lpop(queue)
|
||||||
if raw is None:
|
if raw is None:
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|||||||
+26
-3
@@ -41,7 +41,7 @@ from services.shared.audit import get_entity_audit_trail, get_order_audit_trail,
|
|||||||
from services.shared.config import load_config
|
from services.shared.config import load_config
|
||||||
from services.shared.db import get_pg_pool, get_redis
|
from services.shared.db import get_pg_pool, get_redis
|
||||||
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
||||||
from services.shared.redis_keys import PREFIX, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
from services.shared.redis_keys import PIPELINE_ENABLED_KEY, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||||
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
||||||
|
|
||||||
logger = logging.getLogger("query_api")
|
logger = logging.getLogger("query_api")
|
||||||
@@ -1948,7 +1948,7 @@ async def retry_failed_extractions_endpoint():
|
|||||||
# Pipeline On/Off Toggle
|
# Pipeline On/Off Toggle
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_PIPELINE_ENABLED_KEY = f"{PREFIX}:pipeline:enabled"
|
_PIPELINE_ENABLED_KEY = PIPELINE_ENABLED_KEY
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/ops/pipeline/toggle")
|
@app.get("/api/ops/pipeline/toggle")
|
||||||
@@ -1966,10 +1966,33 @@ async def set_pipeline_toggle(body: dict[str, Any]):
|
|||||||
|
|
||||||
Accepts: { "enabled": true/false }
|
Accepts: { "enabled": true/false }
|
||||||
Workers check this flag before processing jobs.
|
Workers check this flag before processing jobs.
|
||||||
|
When disabling, optionally flush all pipeline queues so in-flight
|
||||||
|
work stops immediately.
|
||||||
"""
|
"""
|
||||||
enabled = body.get("enabled", True)
|
enabled = body.get("enabled", True)
|
||||||
|
flush = body.get("flush", not enabled) # default: flush when disabling
|
||||||
await rds.set(_PIPELINE_ENABLED_KEY, "1" if enabled else "0")
|
await rds.set(_PIPELINE_ENABLED_KEY, "1" if enabled else "0")
|
||||||
return {"pipeline_enabled": enabled, "message": f"Pipeline {'enabled' if enabled else 'disabled'}"}
|
|
||||||
|
flushed_counts: dict[str, int] = {}
|
||||||
|
if flush and not enabled:
|
||||||
|
from services.shared.redis_keys import QUEUE_PREFIX
|
||||||
|
# Flush all pipeline queues
|
||||||
|
queue_names = [
|
||||||
|
"ingestion", "parsing", "extraction", "macro_classification",
|
||||||
|
"aggregation", "recommendation", "lake_publish",
|
||||||
|
]
|
||||||
|
for qname in queue_names:
|
||||||
|
qkey = f"{QUEUE_PREFIX}:{qname}"
|
||||||
|
count = await rds.llen(qkey)
|
||||||
|
if count > 0:
|
||||||
|
await rds.delete(qkey)
|
||||||
|
flushed_counts[qname] = count
|
||||||
|
|
||||||
|
msg = f"Pipeline {'enabled' if enabled else 'disabled'}"
|
||||||
|
if flushed_counts:
|
||||||
|
total = sum(flushed_counts.values())
|
||||||
|
msg += f" — flushed {total} queued jobs"
|
||||||
|
return {"pipeline_enabled": enabled, "flushed": flushed_counts, "message": msg}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/ops/sources/coverage-gaps")
|
@app.get("/api/ops/sources/coverage-gaps")
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ from services.shared.redis_keys import (
|
|||||||
QUEUE_AGGREGATION,
|
QUEUE_AGGREGATION,
|
||||||
QUEUE_EXTRACTION,
|
QUEUE_EXTRACTION,
|
||||||
QUEUE_MACRO_CLASSIFICATION,
|
QUEUE_MACRO_CLASSIFICATION,
|
||||||
|
is_pipeline_enabled,
|
||||||
queue_key,
|
queue_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -421,6 +422,10 @@ async def main() -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(redis_client):
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
# Alternate: every 3rd job from macro queue, rest from extraction
|
# Alternate: every 3rd job from macro queue, rest from extraction
|
||||||
# This prevents macro events from starving regular extractions
|
# This prevents macro events from starving regular extractions
|
||||||
raw = None
|
raw = None
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ from services.shared.redis_keys import (
|
|||||||
QUEUE_INGESTION,
|
QUEUE_INGESTION,
|
||||||
QUEUE_PARSING,
|
QUEUE_PARSING,
|
||||||
dedupe_key,
|
dedupe_key,
|
||||||
|
is_pipeline_enabled,
|
||||||
queue_key,
|
queue_key,
|
||||||
)
|
)
|
||||||
from services.shared.storage import (
|
from services.shared.storage import (
|
||||||
@@ -265,6 +266,9 @@ async def main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(rds):
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
continue
|
||||||
raw = await rds.lpop(queue)
|
raw = await rds.lpop(queue)
|
||||||
if raw:
|
if raw:
|
||||||
job = json.loads(raw)
|
job = json.loads(raw)
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ from services.lake_publisher.worker import (
|
|||||||
from services.shared.config import load_config
|
from services.shared.config import load_config
|
||||||
from services.shared.db import get_minio, get_pg_pool, get_redis
|
from services.shared.db import get_minio, get_pg_pool, get_redis
|
||||||
from services.shared.logging import setup_logging
|
from services.shared.logging import setup_logging
|
||||||
from services.shared.redis_keys import QUEUE_LAKE_PUBLISH, queue_key
|
from services.shared.redis_keys import QUEUE_LAKE_PUBLISH, is_pipeline_enabled, queue_key
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -865,6 +865,9 @@ async def run_worker(
|
|||||||
logger.info("Lake publisher worker started, listening on %s", queue)
|
logger.info("Lake publisher worker started, listening on %s", queue)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(rds):
|
||||||
|
await asyncio.sleep(poll_interval)
|
||||||
|
continue
|
||||||
raw = await rds.lpop(queue) # type: ignore[misc]
|
raw = await rds.lpop(queue) # type: ignore[misc]
|
||||||
if raw is None:
|
if raw is None:
|
||||||
await asyncio.sleep(poll_interval)
|
await asyncio.sleep(poll_interval)
|
||||||
|
|||||||
@@ -35,7 +35,13 @@ from services.shared.metrics import (
|
|||||||
PARSE_LOW_QUALITY_TOTAL,
|
PARSE_LOW_QUALITY_TOTAL,
|
||||||
PARSE_QUALITY_SCORE,
|
PARSE_QUALITY_SCORE,
|
||||||
)
|
)
|
||||||
from services.shared.redis_keys import QUEUE_EXTRACTION, QUEUE_MACRO_CLASSIFICATION, QUEUE_PARSING, queue_key
|
from services.shared.redis_keys import (
|
||||||
|
QUEUE_EXTRACTION,
|
||||||
|
QUEUE_MACRO_CLASSIFICATION,
|
||||||
|
QUEUE_PARSING,
|
||||||
|
is_pipeline_enabled,
|
||||||
|
queue_key,
|
||||||
|
)
|
||||||
from services.shared.storage import upload_normalized_text, upload_parser_output
|
from services.shared.storage import upload_normalized_text, upload_parser_output
|
||||||
|
|
||||||
logger = logging.getLogger("parser_worker")
|
logger = logging.getLogger("parser_worker")
|
||||||
@@ -260,6 +266,9 @@ async def main() -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(rds):
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
continue
|
||||||
raw = await rds.lpop(queue)
|
raw = await rds.lpop(queue)
|
||||||
if raw:
|
if raw:
|
||||||
job = json.loads(raw)
|
job = json.loads(raw)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from services.recommendation.worker import generate_recommendation
|
|||||||
from services.shared.agent_config import AgentConfigResolver
|
from services.shared.agent_config import AgentConfigResolver
|
||||||
from services.shared.config import OllamaConfig, load_config
|
from services.shared.config import OllamaConfig, load_config
|
||||||
from services.shared.logging import setup_logging
|
from services.shared.logging import setup_logging
|
||||||
from services.shared.redis_keys import QUEUE_RECOMMENDATION, queue_key
|
from services.shared.redis_keys import QUEUE_RECOMMENDATION, is_pipeline_enabled, queue_key
|
||||||
|
|
||||||
logger = logging.getLogger("recommendation_main")
|
logger = logging.getLogger("recommendation_main")
|
||||||
|
|
||||||
@@ -62,6 +62,10 @@ async def main() -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if not await is_pipeline_enabled(redis_client):
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
raw = await redis_client.lpop(queue)
|
raw = await redis_client.lpop(queue)
|
||||||
if raw is None:
|
if raw is None:
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ from services.shared.config import load_config
|
|||||||
from services.shared.db import get_pg_pool, get_redis
|
from services.shared.db import get_pg_pool, get_redis
|
||||||
from services.shared.logging import setup_logging
|
from services.shared.logging import setup_logging
|
||||||
from services.shared.redis_keys import (
|
from services.shared.redis_keys import (
|
||||||
PREFIX,
|
PIPELINE_ENABLED_KEY,
|
||||||
QUEUE_EXTRACTION,
|
QUEUE_EXTRACTION,
|
||||||
QUEUE_INGESTION,
|
QUEUE_INGESTION,
|
||||||
QUEUE_MACRO_CLASSIFICATION,
|
QUEUE_MACRO_CLASSIFICATION,
|
||||||
@@ -501,7 +501,7 @@ async def main() -> None:
|
|||||||
rds = get_redis(config)
|
rds = get_redis(config)
|
||||||
|
|
||||||
logger.info("Scheduler started (tick=%ds)", SCHEDULER_TICK)
|
logger.info("Scheduler started (tick=%ds)", SCHEDULER_TICK)
|
||||||
pipeline_key = f"{PREFIX}:pipeline:enabled"
|
pipeline_key = PIPELINE_ENABLED_KEY
|
||||||
|
|
||||||
# If PIPELINE_DEFAULT_OFF is set, initialize the toggle to OFF on first boot
|
# If PIPELINE_DEFAULT_OFF is set, initialize the toggle to OFF on first boot
|
||||||
# (only if the key doesn't already exist — preserves manual overrides)
|
# (only if the key doesn't already exist — preserves manual overrides)
|
||||||
|
|||||||
@@ -89,3 +89,18 @@ def trading_cb_key(trigger_type: str) -> str:
|
|||||||
def trading_notification_rate_key(channel: str) -> str:
|
def trading_notification_rate_key(channel: str) -> str:
|
||||||
"""Return the notification rate-limit key for a given channel."""
|
"""Return the notification rate-limit key for a given channel."""
|
||||||
return f"{TRADING_NOTIFICATION_RATE}:{channel}"
|
return f"{TRADING_NOTIFICATION_RATE}:{channel}"
|
||||||
|
|
||||||
|
|
||||||
|
# --- Pipeline toggle ---
|
||||||
|
PIPELINE_ENABLED_KEY = f"{PREFIX}:pipeline:enabled"
|
||||||
|
|
||||||
|
|
||||||
|
async def is_pipeline_enabled(rds: "redis.asyncio.Redis") -> bool: # type: ignore[name-defined] # noqa: F821
|
||||||
|
"""Check whether the pipeline is enabled via the Redis toggle.
|
||||||
|
|
||||||
|
Returns True (enabled) when the key is absent or set to anything
|
||||||
|
other than ``"0"``. Workers should call this at the top of each
|
||||||
|
loop iteration and sleep when it returns False.
|
||||||
|
"""
|
||||||
|
val = await rds.get(PIPELINE_ENABLED_KEY)
|
||||||
|
return val != "0"
|
||||||
|
|||||||
Reference in New Issue
Block a user