fix: thesis rewriter now routes to vLLM when provider is vllm
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
- thesis_llm.py: add _call_vllm_thesis() using /v1/chat/completions - thesis_llm.py: check resolved model_provider and route accordingly - values.yaml: set OLLAMA_BASE_URL to http://10.1.1.12:2701
This commit is contained in:
@@ -174,7 +174,7 @@ config:
|
|||||||
REDIS_DB: "0"
|
REDIS_DB: "0"
|
||||||
MINIO_ENDPOINT: "minio.minio-service.svc.cluster.local:80"
|
MINIO_ENDPOINT: "minio.minio-service.svc.cluster.local:80"
|
||||||
MINIO_SECURE: "false"
|
MINIO_SECURE: "false"
|
||||||
OLLAMA_BASE_URL: "http://ollama.ollama-service.svc.cluster.local:11434"
|
OLLAMA_BASE_URL: "http://10.1.1.12:2701"
|
||||||
OLLAMA_MODEL: "qwen3.5:9b-fast"
|
OLLAMA_MODEL: "qwen3.5:9b-fast"
|
||||||
OLLAMA_TIMEOUT: "240"
|
OLLAMA_TIMEOUT: "240"
|
||||||
OLLAMA_MAX_RETRIES: "2"
|
OLLAMA_MAX_RETRIES: "2"
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import asyncpg
|
|||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
||||||
from services.shared.config import OllamaConfig
|
from services.shared.config import OllamaConfig, VLLMConfig
|
||||||
from services.shared.schemas import TrendSummary
|
from services.shared.schemas import TrendSummary
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -115,26 +115,42 @@ async def rewrite_thesis_with_llm(
|
|||||||
|
|
||||||
# Resolve thesis-rewriter config from DB for variant override
|
# Resolve thesis-rewriter config from DB for variant override
|
||||||
resolved: ResolvedAgentConfig | None = None
|
resolved: ResolvedAgentConfig | None = None
|
||||||
effective_config = config
|
effective_config: OllamaConfig | VLLMConfig = config
|
||||||
|
use_vllm = False
|
||||||
if pool is not None:
|
if pool is not None:
|
||||||
try:
|
try:
|
||||||
resolver = AgentConfigResolver(pool, ttl_seconds=60)
|
resolver = AgentConfigResolver(pool, ttl_seconds=60)
|
||||||
resolved = await resolver.resolve("thesis-rewriter")
|
resolved = await resolver.resolve("thesis-rewriter")
|
||||||
if resolved is not None:
|
if resolved is not None:
|
||||||
effective_config = OllamaConfig(
|
provider = (resolved.model_provider or "").strip().lower()
|
||||||
base_url=config.base_url,
|
if provider == "vllm":
|
||||||
model=resolved.model_name,
|
use_vllm = True
|
||||||
timeout=resolved.timeout_seconds,
|
# Import load_config to get vllm base_url from env
|
||||||
max_retries=resolved.max_retries,
|
from services.shared.config import load_config as _load_config
|
||||||
retry_base_delay=config.retry_base_delay,
|
_cfg = _load_config()
|
||||||
retry_max_delay=config.retry_max_delay,
|
effective_config = VLLMConfig(
|
||||||
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
base_url=_cfg.vllm.base_url,
|
||||||
max_tokens=resolved.max_tokens,
|
model=resolved.model_name,
|
||||||
context_window=resolved.context_window,
|
timeout=resolved.timeout_seconds,
|
||||||
)
|
max_retries=resolved.max_retries,
|
||||||
|
max_tokens=resolved.max_tokens,
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
effective_config = OllamaConfig(
|
||||||
|
base_url=config.base_url,
|
||||||
|
model=resolved.model_name,
|
||||||
|
timeout=resolved.timeout_seconds,
|
||||||
|
max_retries=resolved.max_retries,
|
||||||
|
retry_base_delay=config.retry_base_delay,
|
||||||
|
retry_max_delay=config.retry_max_delay,
|
||||||
|
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
||||||
|
max_tokens=resolved.max_tokens,
|
||||||
|
context_window=resolved.context_window,
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Thesis rewriter using resolved config: model=%s variant=%s",
|
"Thesis rewriter using resolved config: model=%s variant=%s provider=%s",
|
||||||
resolved.model_name, resolved.variant_id,
|
resolved.model_name, resolved.variant_id, provider or "ollama",
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@@ -177,7 +193,10 @@ async def rewrite_thesis_with_llm(
|
|||||||
client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)
|
client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rewritten = await _call_ollama_thesis(client, effective_config, prompts)
|
if use_vllm:
|
||||||
|
rewritten = await _call_vllm_thesis(client, effective_config, prompts) # type: ignore[arg-type]
|
||||||
|
else:
|
||||||
|
rewritten = await _call_ollama_thesis(client, effective_config, prompts) # type: ignore[arg-type]
|
||||||
duration_ms = int((time.monotonic() - start_time) * 1000)
|
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||||
|
|
||||||
if rewritten:
|
if rewritten:
|
||||||
@@ -318,3 +337,55 @@ async def _call_ollama_thesis(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return content.strip()
|
return content.strip()
|
||||||
|
|
||||||
|
|
||||||
|
async def _call_vllm_thesis(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
config: VLLMConfig,
|
||||||
|
prompts: dict[str, str],
|
||||||
|
) -> str:
|
||||||
|
"""Make a vLLM chat completion call for thesis rewriting.
|
||||||
|
|
||||||
|
Uses the OpenAI-compatible /v1/chat/completions endpoint.
|
||||||
|
Returns the model's text response, or empty string on failure.
|
||||||
|
"""
|
||||||
|
start = time.monotonic()
|
||||||
|
|
||||||
|
payload: dict[str, object] = {
|
||||||
|
"model": config.model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": prompts["system"]},
|
||||||
|
{"role": "user", "content": prompts["user"]},
|
||||||
|
],
|
||||||
|
"max_tokens": config.max_tokens,
|
||||||
|
"temperature": config.temperature,
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||||||
|
if config.api_key:
|
||||||
|
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
f"{config.base_url}/v1/chat/completions",
|
||||||
|
json=payload,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
_ = resp.raise_for_status()
|
||||||
|
|
||||||
|
duration_ms = int((time.monotonic() - start) * 1000)
|
||||||
|
|
||||||
|
body: dict[str, object] = resp.json()
|
||||||
|
choices = body.get("choices", [])
|
||||||
|
content: str = ""
|
||||||
|
if choices and isinstance(choices, list):
|
||||||
|
msg = choices[0].get("message", {}) # type: ignore[union-attr]
|
||||||
|
content = msg.get("content", "") if isinstance(msg, dict) else ""
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"vLLM thesis call completed in %dms, response length=%d",
|
||||||
|
duration_ms,
|
||||||
|
len(content),
|
||||||
|
)
|
||||||
|
|
||||||
|
return content.strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user