fix: thesis rewriter now routes to vLLM when provider is vllm
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
- thesis_llm.py: add _call_vllm_thesis() using /v1/chat/completions - thesis_llm.py: check resolved model_provider and route accordingly - values.yaml: set OLLAMA_BASE_URL to http://10.1.1.12:2701
This commit is contained in:
@@ -20,7 +20,7 @@ import asyncpg
|
||||
import httpx
|
||||
|
||||
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
||||
from services.shared.config import OllamaConfig
|
||||
from services.shared.config import OllamaConfig, VLLMConfig
|
||||
from services.shared.schemas import TrendSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -115,26 +115,42 @@ async def rewrite_thesis_with_llm(
|
||||
|
||||
# Resolve thesis-rewriter config from DB for variant override
|
||||
resolved: ResolvedAgentConfig | None = None
|
||||
effective_config = config
|
||||
effective_config: OllamaConfig | VLLMConfig = config
|
||||
use_vllm = False
|
||||
if pool is not None:
|
||||
try:
|
||||
resolver = AgentConfigResolver(pool, ttl_seconds=60)
|
||||
resolved = await resolver.resolve("thesis-rewriter")
|
||||
if resolved is not None:
|
||||
effective_config = OllamaConfig(
|
||||
base_url=config.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
retry_base_delay=config.retry_base_delay,
|
||||
retry_max_delay=config.retry_max_delay,
|
||||
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
||||
max_tokens=resolved.max_tokens,
|
||||
context_window=resolved.context_window,
|
||||
)
|
||||
provider = (resolved.model_provider or "").strip().lower()
|
||||
if provider == "vllm":
|
||||
use_vllm = True
|
||||
# Import load_config to get vllm base_url from env
|
||||
from services.shared.config import load_config as _load_config
|
||||
_cfg = _load_config()
|
||||
effective_config = VLLMConfig(
|
||||
base_url=_cfg.vllm.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
max_tokens=resolved.max_tokens,
|
||||
temperature=0.0,
|
||||
)
|
||||
else:
|
||||
effective_config = OllamaConfig(
|
||||
base_url=config.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
retry_base_delay=config.retry_base_delay,
|
||||
retry_max_delay=config.retry_max_delay,
|
||||
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
||||
max_tokens=resolved.max_tokens,
|
||||
context_window=resolved.context_window,
|
||||
)
|
||||
logger.info(
|
||||
"Thesis rewriter using resolved config: model=%s variant=%s",
|
||||
resolved.model_name, resolved.variant_id,
|
||||
"Thesis rewriter using resolved config: model=%s variant=%s provider=%s",
|
||||
resolved.model_name, resolved.variant_id, provider or "ollama",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
@@ -177,7 +193,10 @@ async def rewrite_thesis_with_llm(
|
||||
client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)
|
||||
|
||||
try:
|
||||
rewritten = await _call_ollama_thesis(client, effective_config, prompts)
|
||||
if use_vllm:
|
||||
rewritten = await _call_vllm_thesis(client, effective_config, prompts) # type: ignore[arg-type]
|
||||
else:
|
||||
rewritten = await _call_ollama_thesis(client, effective_config, prompts) # type: ignore[arg-type]
|
||||
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||
|
||||
if rewritten:
|
||||
@@ -318,3 +337,55 @@ async def _call_ollama_thesis(
|
||||
)
|
||||
|
||||
return content.strip()
|
||||
|
||||
|
||||
async def _call_vllm_thesis(
|
||||
client: httpx.AsyncClient,
|
||||
config: VLLMConfig,
|
||||
prompts: dict[str, str],
|
||||
) -> str:
|
||||
"""Make a vLLM chat completion call for thesis rewriting.
|
||||
|
||||
Uses the OpenAI-compatible /v1/chat/completions endpoint.
|
||||
Returns the model's text response, or empty string on failure.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
|
||||
payload: dict[str, object] = {
|
||||
"model": config.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": prompts["system"]},
|
||||
{"role": "user", "content": prompts["user"]},
|
||||
],
|
||||
"max_tokens": config.max_tokens,
|
||||
"temperature": config.temperature,
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||||
if config.api_key:
|
||||
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||
|
||||
resp = await client.post(
|
||||
f"{config.base_url}/v1/chat/completions",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
_ = resp.raise_for_status()
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
body: dict[str, object] = resp.json()
|
||||
choices = body.get("choices", [])
|
||||
content: str = ""
|
||||
if choices and isinstance(choices, list):
|
||||
msg = choices[0].get("message", {}) # type: ignore[union-attr]
|
||||
content = msg.get("content", "") if isinstance(msg, dict) else ""
|
||||
|
||||
logger.debug(
|
||||
"vLLM thesis call completed in %dms, response length=%d",
|
||||
duration_ms,
|
||||
len(content),
|
||||
)
|
||||
|
||||
return content.strip()
|
||||
|
||||
Reference in New Issue
Block a user