phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,354 @@
|
||||
"""Deterministic recommendation eligibility logic.
|
||||
|
||||
Evaluates trend summaries against configurable thresholds to decide:
|
||||
- Whether a recommendation should be generated at all
|
||||
- What action type (buy/sell/hold/watch) is appropriate
|
||||
- What execution mode (informational/paper_eligible/live_eligible) is allowed
|
||||
- Position sizing guidance based on portfolio rules
|
||||
|
||||
All decisions are rule-based with no model involvement. The LLM is only
|
||||
used downstream for optional thesis wording (a separate task).
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
|
||||
from services.shared.schemas import (
|
||||
ActionType,
|
||||
PositionSizing,
|
||||
RecommendationMode,
|
||||
TrendDirection,
|
||||
TrendSummary,
|
||||
)
|
||||
|
||||
|
||||
class RejectionReason(str, Enum):
|
||||
"""Why a trend summary was deemed ineligible for a recommendation."""
|
||||
|
||||
LOW_CONFIDENCE = "low_confidence"
|
||||
LOW_TREND_STRENGTH = "low_trend_strength"
|
||||
HIGH_CONTRADICTION = "high_contradiction"
|
||||
INSUFFICIENT_EVIDENCE = "insufficient_evidence"
|
||||
NEUTRAL_DIRECTION = "neutral_direction"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EligibilityConfig:
|
||||
"""Tunable thresholds for recommendation eligibility.
|
||||
|
||||
All thresholds are deterministic — no model inference involved.
|
||||
"""
|
||||
|
||||
# --- Gate thresholds (below these → no recommendation) ---
|
||||
min_confidence: float = 0.35
|
||||
min_trend_strength: float = 0.10
|
||||
max_contradiction_score: float = 0.60
|
||||
min_evidence_count: int = 2 # combined supporting + opposing
|
||||
|
||||
# --- Action mapping thresholds ---
|
||||
# Trend strength above this → buy/sell; below → hold/watch
|
||||
action_strength_threshold: float = 0.25
|
||||
# Confidence above this → hold (rather than watch) for weak signals
|
||||
hold_confidence_threshold: float = 0.50
|
||||
|
||||
# --- Mode escalation thresholds ---
|
||||
# Confidence required for paper_eligible (below → informational)
|
||||
paper_confidence_threshold: float = 0.50
|
||||
# Confidence required for live_eligible (below → paper at most)
|
||||
live_confidence_threshold: float = 0.70
|
||||
# Contradiction must be below this for live eligibility
|
||||
live_max_contradiction: float = 0.25
|
||||
# Minimum evidence count for live eligibility
|
||||
live_min_evidence: int = 5
|
||||
|
||||
# --- Position sizing rules (Requirement 7.3) ---
|
||||
# Base portfolio allocation percentage
|
||||
base_portfolio_pct: float = 0.02
|
||||
# Maximum portfolio allocation percentage
|
||||
max_portfolio_pct: float = 0.05
|
||||
# Base max loss percentage
|
||||
base_max_loss_pct: float = 0.005
|
||||
# Maximum max loss percentage
|
||||
max_max_loss_pct: float = 0.01
|
||||
# Confidence scaling: higher confidence → larger position (linear)
|
||||
confidence_sizing_weight: float = 0.5
|
||||
# Contradiction penalty: higher contradiction → smaller position
|
||||
contradiction_sizing_penalty: float = 0.3
|
||||
|
||||
|
||||
DEFAULT_ELIGIBILITY_CONFIG = EligibilityConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class EligibilityResult:
|
||||
"""Output of the deterministic eligibility evaluation.
|
||||
|
||||
Captures the decision, the reasoning, and all inputs used so the
|
||||
full decision trace is reproducible (Requirement 8.3).
|
||||
"""
|
||||
|
||||
eligible: bool
|
||||
action: ActionType
|
||||
mode: RecommendationMode
|
||||
position_sizing: PositionSizing
|
||||
rejection_reasons: list[RejectionReason] = field(default_factory=list)
|
||||
time_horizon: str = ""
|
||||
invalidation_conditions: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gate checks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _check_gates(
|
||||
summary: TrendSummary,
|
||||
config: EligibilityConfig,
|
||||
) -> list[RejectionReason]:
|
||||
"""Apply hard gate checks. Returns a list of rejection reasons (empty = pass)."""
|
||||
reasons: list[RejectionReason] = []
|
||||
|
||||
if summary.confidence < config.min_confidence:
|
||||
reasons.append(RejectionReason.LOW_CONFIDENCE)
|
||||
|
||||
if summary.trend_strength < config.min_trend_strength:
|
||||
reasons.append(RejectionReason.LOW_TREND_STRENGTH)
|
||||
|
||||
if summary.contradiction_score > config.max_contradiction_score:
|
||||
reasons.append(RejectionReason.HIGH_CONTRADICTION)
|
||||
|
||||
evidence_count = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
|
||||
if evidence_count < config.min_evidence_count:
|
||||
reasons.append(RejectionReason.INSUFFICIENT_EVIDENCE)
|
||||
|
||||
if summary.trend_direction == TrendDirection.NEUTRAL:
|
||||
reasons.append(RejectionReason.NEUTRAL_DIRECTION)
|
||||
|
||||
return reasons
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action mapping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _determine_action(
|
||||
summary: TrendSummary,
|
||||
config: EligibilityConfig,
|
||||
) -> ActionType:
|
||||
"""Map trend direction and strength to an action type.
|
||||
|
||||
Strong bullish → BUY, strong bearish → SELL.
|
||||
Weak but directional → HOLD if confidence is decent, else WATCH.
|
||||
Mixed → WATCH.
|
||||
"""
|
||||
direction = summary.trend_direction
|
||||
strength = summary.trend_strength
|
||||
|
||||
if direction == TrendDirection.MIXED:
|
||||
return ActionType.WATCH
|
||||
|
||||
if direction == TrendDirection.NEUTRAL:
|
||||
return ActionType.WATCH
|
||||
|
||||
strong_signal = strength >= config.action_strength_threshold
|
||||
|
||||
if direction == TrendDirection.BULLISH:
|
||||
if strong_signal:
|
||||
return ActionType.BUY
|
||||
return ActionType.HOLD if summary.confidence >= config.hold_confidence_threshold else ActionType.WATCH
|
||||
|
||||
if direction == TrendDirection.BEARISH:
|
||||
if strong_signal:
|
||||
return ActionType.SELL
|
||||
return ActionType.HOLD if summary.confidence >= config.hold_confidence_threshold else ActionType.WATCH
|
||||
|
||||
return ActionType.WATCH
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mode escalation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _determine_mode(
|
||||
summary: TrendSummary,
|
||||
action: ActionType,
|
||||
config: EligibilityConfig,
|
||||
) -> RecommendationMode:
|
||||
"""Determine the highest execution mode allowed.
|
||||
|
||||
WATCH and HOLD actions are always informational — they don't trigger trades.
|
||||
BUY/SELL can escalate to paper_eligible or live_eligible based on
|
||||
confidence, contradiction, and evidence thresholds.
|
||||
"""
|
||||
if action in (ActionType.WATCH, ActionType.HOLD):
|
||||
return RecommendationMode.INFORMATIONAL
|
||||
|
||||
evidence_count = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
|
||||
|
||||
# Check live eligibility first (strictest)
|
||||
if (
|
||||
summary.confidence >= config.live_confidence_threshold
|
||||
and summary.contradiction_score <= config.live_max_contradiction
|
||||
and evidence_count >= config.live_min_evidence
|
||||
):
|
||||
return RecommendationMode.LIVE_ELIGIBLE
|
||||
|
||||
# Check paper eligibility
|
||||
if summary.confidence >= config.paper_confidence_threshold:
|
||||
return RecommendationMode.PAPER_ELIGIBLE
|
||||
|
||||
return RecommendationMode.INFORMATIONAL
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Position sizing (Requirement 7.3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_position_sizing(
|
||||
summary: TrendSummary,
|
||||
config: EligibilityConfig,
|
||||
) -> PositionSizing:
|
||||
"""Compute position sizing guidance from portfolio rules and signal quality.
|
||||
|
||||
Higher confidence → larger allocation (up to max).
|
||||
Higher contradiction → smaller allocation (penalty).
|
||||
"""
|
||||
# Start from base allocation
|
||||
confidence_scale = config.base_portfolio_pct + (
|
||||
config.confidence_sizing_weight
|
||||
* summary.confidence
|
||||
* (config.max_portfolio_pct - config.base_portfolio_pct)
|
||||
)
|
||||
|
||||
# Apply contradiction penalty
|
||||
contradiction_penalty = config.contradiction_sizing_penalty * summary.contradiction_score
|
||||
portfolio_pct = confidence_scale * (1.0 - contradiction_penalty)
|
||||
|
||||
# Clamp to bounds
|
||||
portfolio_pct = max(config.base_portfolio_pct * 0.5, min(portfolio_pct, config.max_portfolio_pct))
|
||||
|
||||
# Max loss scales similarly
|
||||
loss_scale = config.base_max_loss_pct + (
|
||||
config.confidence_sizing_weight
|
||||
* summary.confidence
|
||||
* (config.max_max_loss_pct - config.base_max_loss_pct)
|
||||
)
|
||||
max_loss_pct = loss_scale * (1.0 - contradiction_penalty)
|
||||
max_loss_pct = max(config.base_max_loss_pct * 0.5, min(max_loss_pct, config.max_max_loss_pct))
|
||||
|
||||
return PositionSizing(
|
||||
portfolio_pct=round(portfolio_pct, 6),
|
||||
max_loss_pct=round(max_loss_pct, 6),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Time horizon mapping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_WINDOW_TO_HORIZON: dict[str, str] = {
|
||||
"intraday": "intraday",
|
||||
"1d": "swing_1d_3d",
|
||||
"7d": "swing_1d_10d",
|
||||
"30d": "position_10d_30d",
|
||||
"90d": "position_30d_90d",
|
||||
}
|
||||
|
||||
|
||||
def _map_time_horizon(window: str) -> str:
|
||||
"""Map a trend window to a human-readable time horizon label."""
|
||||
return _WINDOW_TO_HORIZON.get(window, f"window_{window}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Invalidation conditions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _derive_invalidation_conditions(
|
||||
summary: TrendSummary,
|
||||
action: ActionType,
|
||||
) -> list[str]:
|
||||
"""Generate deterministic invalidation conditions for the recommendation.
|
||||
|
||||
These describe when the recommendation should be considered stale or wrong.
|
||||
"""
|
||||
conditions: list[str] = []
|
||||
|
||||
if action == ActionType.BUY:
|
||||
conditions.append(
|
||||
f"Trend direction for {summary.entity_id} reverses to bearish"
|
||||
)
|
||||
elif action == ActionType.SELL:
|
||||
conditions.append(
|
||||
f"Trend direction for {summary.entity_id} reverses to bullish"
|
||||
)
|
||||
|
||||
if summary.contradiction_score > 0.0:
|
||||
conditions.append(
|
||||
f"Contradiction score exceeds 0.60 (currently {summary.contradiction_score:.2f})"
|
||||
)
|
||||
|
||||
if summary.confidence > 0.0:
|
||||
conditions.append(
|
||||
f"Confidence drops below {summary.confidence * 0.7:.2f}"
|
||||
)
|
||||
|
||||
if summary.material_risks:
|
||||
conditions.append(
|
||||
f"Material risk materialises: {summary.material_risks[0]}"
|
||||
)
|
||||
|
||||
return conditions
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def evaluate_eligibility(
|
||||
summary: TrendSummary,
|
||||
config: EligibilityConfig = DEFAULT_ELIGIBILITY_CONFIG,
|
||||
) -> EligibilityResult:
|
||||
"""Evaluate a trend summary for recommendation eligibility.
|
||||
|
||||
This is the single deterministic entry point. It:
|
||||
1. Applies gate checks (confidence, strength, contradiction, evidence)
|
||||
2. Maps trend direction + strength to an action type
|
||||
3. Determines the highest allowed execution mode
|
||||
4. Computes position sizing from portfolio rules
|
||||
5. Derives invalidation conditions
|
||||
|
||||
Returns an EligibilityResult with the full decision trace.
|
||||
"""
|
||||
rejection_reasons = _check_gates(summary, config)
|
||||
|
||||
# Even if rejected, we still compute action/mode for the trace
|
||||
action = _determine_action(summary, config)
|
||||
mode = _determine_mode(summary, action, config)
|
||||
sizing = _compute_position_sizing(summary, config)
|
||||
horizon = _map_time_horizon(summary.window.value)
|
||||
invalidation = _derive_invalidation_conditions(summary, action)
|
||||
|
||||
eligible = len(rejection_reasons) == 0
|
||||
|
||||
# If not eligible, force mode to informational (Requirement 7.4)
|
||||
if not eligible:
|
||||
mode = RecommendationMode.INFORMATIONAL
|
||||
|
||||
return EligibilityResult(
|
||||
eligible=eligible,
|
||||
action=action,
|
||||
mode=mode,
|
||||
position_sizing=sizing,
|
||||
rejection_reasons=rejection_reasons,
|
||||
time_horizon=horizon,
|
||||
invalidation_conditions=invalidation,
|
||||
)
|
||||
@@ -0,0 +1,71 @@
|
||||
"""Recommendation worker entrypoint - polls Redis for recommendation jobs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
import asyncpg
|
||||
from minio import Minio
|
||||
|
||||
from services.recommendation.worker import generate_recommendation
|
||||
from services.shared.config import load_config
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import QUEUE_RECOMMENDATION, queue_key
|
||||
|
||||
logger = logging.getLogger("recommendation_main")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
config = load_config()
|
||||
setup_logging("recommendation", level=config.log_level, json_output=config.json_logs)
|
||||
|
||||
pool = await asyncpg.create_pool(dsn=config.postgres.dsn, min_size=2, max_size=8)
|
||||
minio_client = Minio(
|
||||
config.minio.endpoint,
|
||||
access_key=config.minio.access_key,
|
||||
secret_key=config.minio.secret_key,
|
||||
secure=config.minio.secure,
|
||||
)
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
redis_client = aioredis.from_url(config.redis.url)
|
||||
queue = queue_key(QUEUE_RECOMMENDATION)
|
||||
logger.info("Recommendation worker started, polling %s", queue)
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = await redis_client.lpop(queue)
|
||||
if raw is None:
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
payload = raw
|
||||
job = json.loads(payload)
|
||||
ticker = job.get("ticker", "")
|
||||
window = job.get("window", "7d")
|
||||
|
||||
logger.info("Processing recommendation job for %s/%s", ticker, window)
|
||||
|
||||
try:
|
||||
rec = await generate_recommendation(
|
||||
pool, ticker, window,
|
||||
minio_client=minio_client,
|
||||
)
|
||||
if rec:
|
||||
logger.info(
|
||||
"Recommendation generated for %s: %s %s",
|
||||
ticker, rec.action.value, rec.mode.value,
|
||||
)
|
||||
else:
|
||||
logger.info("No recommendation generated for %s (no trend data)", ticker)
|
||||
except Exception:
|
||||
logger.exception("Recommendation failed for %s", ticker)
|
||||
finally:
|
||||
await pool.close()
|
||||
await redis_client.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,241 @@
|
||||
"""Suppression logic for low-quality data or low confidence.
|
||||
|
||||
Evaluates the quality of the underlying data feeding a trend summary
|
||||
and suppresses automated trade eligibility when data quality is poor.
|
||||
Suppressed recommendations are marked as informational only.
|
||||
|
||||
This layer runs *before* the eligibility engine and acts as a pre-filter
|
||||
on data quality. The eligibility engine handles signal-level thresholds
|
||||
(confidence, strength, contradiction); this module handles data-level
|
||||
quality concerns (stale evidence, low extraction quality, poor source
|
||||
diversity, insufficient valid documents).
|
||||
|
||||
Requirements: 7.4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
|
||||
from services.shared.schemas import TrendSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SuppressionReason(str, Enum):
|
||||
"""Why a recommendation was suppressed due to data quality."""
|
||||
|
||||
LOW_DATA_CONFIDENCE = "low_data_confidence"
|
||||
STALE_EVIDENCE = "stale_evidence"
|
||||
LOW_SOURCE_DIVERSITY = "low_source_diversity"
|
||||
HIGH_EXTRACTION_FAILURE_RATE = "high_extraction_failure_rate"
|
||||
INSUFFICIENT_VALID_DOCUMENTS = "insufficient_valid_documents"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SuppressionConfig:
|
||||
"""Tunable thresholds for data quality suppression.
|
||||
|
||||
These thresholds focus on the quality of the *input data* rather
|
||||
than the trend signal itself (which is handled by EligibilityConfig).
|
||||
"""
|
||||
|
||||
# Minimum average extraction confidence across evidence documents.
|
||||
# Below this, the underlying data is too unreliable for trade decisions.
|
||||
min_avg_extraction_confidence: float = 0.40
|
||||
|
||||
# Maximum age (hours) of the most recent evidence document.
|
||||
# If the freshest evidence is older than this, the trend is stale.
|
||||
max_evidence_staleness_hours: float = 168.0 # 7 days
|
||||
|
||||
# Minimum number of distinct source types (e.g. news, filings, market)
|
||||
# represented in the evidence. Low diversity means the signal may be
|
||||
# driven by a single unreliable source class.
|
||||
min_source_types: int = 1
|
||||
|
||||
# Maximum tolerable extraction failure rate (0-1).
|
||||
# If more than this fraction of documents failed extraction,
|
||||
# the data pipeline is unreliable for this ticker.
|
||||
max_extraction_failure_rate: float = 0.50
|
||||
|
||||
# Minimum number of valid (non-failed) documents that contributed
|
||||
# to the trend. Below this, there isn't enough data to act on.
|
||||
min_valid_documents: int = 2
|
||||
|
||||
# Overall data quality confidence threshold.
|
||||
# The computed data quality score must exceed this for the
|
||||
# recommendation to be eligible for automated trading.
|
||||
min_data_quality_score: float = 0.30
|
||||
|
||||
|
||||
DEFAULT_SUPPRESSION_CONFIG = SuppressionConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataQualityContext:
|
||||
"""Quality metrics about the data underlying a trend summary.
|
||||
|
||||
Populated by querying document and extraction metadata for the
|
||||
ticker and window. When not available from the database, callers
|
||||
can construct this from the trend summary itself.
|
||||
"""
|
||||
|
||||
total_documents: int = 0
|
||||
valid_documents: int = 0
|
||||
failed_documents: int = 0
|
||||
avg_extraction_confidence: float = 0.0
|
||||
newest_evidence_at: datetime | None = None
|
||||
source_types: set[str] = field(default_factory=set)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SuppressionResult:
|
||||
"""Output of the suppression evaluation."""
|
||||
|
||||
suppressed: bool
|
||||
reasons: list[SuppressionReason] = field(default_factory=list)
|
||||
data_quality_score: float = 0.0
|
||||
context: DataQualityContext | None = None
|
||||
|
||||
|
||||
def build_quality_context_from_summary(
|
||||
summary: TrendSummary,
|
||||
) -> DataQualityContext:
|
||||
"""Build a minimal DataQualityContext from a TrendSummary.
|
||||
|
||||
This is a fallback when full document-level quality metrics aren't
|
||||
available. It uses the trend summary's evidence counts and confidence
|
||||
as proxies.
|
||||
"""
|
||||
total = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
|
||||
return DataQualityContext(
|
||||
total_documents=total,
|
||||
valid_documents=total,
|
||||
failed_documents=0,
|
||||
avg_extraction_confidence=summary.confidence,
|
||||
newest_evidence_at=summary.generated_at,
|
||||
source_types=set(),
|
||||
)
|
||||
|
||||
|
||||
def _compute_data_quality_score(
|
||||
ctx: DataQualityContext,
|
||||
config: SuppressionConfig,
|
||||
reference_time: datetime,
|
||||
) -> float:
|
||||
"""Compute an overall data quality score from the context.
|
||||
|
||||
Returns a value in [0, 1] where higher is better quality.
|
||||
Components:
|
||||
- Extraction confidence (40% weight)
|
||||
- Evidence freshness (30% weight)
|
||||
- Document coverage (30% weight)
|
||||
"""
|
||||
# Extraction confidence component
|
||||
conf_component = min(ctx.avg_extraction_confidence / 0.8, 1.0)
|
||||
|
||||
# Freshness component
|
||||
if ctx.newest_evidence_at is not None:
|
||||
if ctx.newest_evidence_at.tzinfo is None:
|
||||
newest = ctx.newest_evidence_at.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
newest = ctx.newest_evidence_at
|
||||
age_hours = (reference_time - newest).total_seconds() / 3600.0
|
||||
max_hours = config.max_evidence_staleness_hours
|
||||
freshness_component = max(0.0, 1.0 - (age_hours / max_hours))
|
||||
else:
|
||||
freshness_component = 0.0
|
||||
|
||||
# Document coverage component
|
||||
if ctx.total_documents > 0:
|
||||
valid_ratio = ctx.valid_documents / ctx.total_documents
|
||||
count_factor = min(ctx.valid_documents / 10.0, 1.0)
|
||||
coverage_component = valid_ratio * count_factor
|
||||
else:
|
||||
coverage_component = 0.0
|
||||
|
||||
score = (0.4 * conf_component) + (0.3 * freshness_component) + (0.3 * coverage_component)
|
||||
return round(max(0.0, min(1.0, score)), 4)
|
||||
|
||||
|
||||
def evaluate_suppression(
|
||||
summary: TrendSummary,
|
||||
quality_ctx: DataQualityContext | None = None,
|
||||
config: SuppressionConfig = DEFAULT_SUPPRESSION_CONFIG,
|
||||
reference_time: datetime | None = None,
|
||||
) -> SuppressionResult:
|
||||
"""Evaluate whether a recommendation should be suppressed due to data quality.
|
||||
|
||||
Checks multiple data quality dimensions and returns a SuppressionResult
|
||||
indicating whether the recommendation should be suppressed and why.
|
||||
|
||||
Args:
|
||||
summary: The trend summary to evaluate.
|
||||
quality_ctx: Data quality context. If None, a minimal context is
|
||||
built from the trend summary itself.
|
||||
config: Suppression thresholds.
|
||||
reference_time: Reference time for staleness checks.
|
||||
|
||||
Returns:
|
||||
SuppressionResult with suppression decision and reasons.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
ctx = quality_ctx or build_quality_context_from_summary(summary)
|
||||
reasons: list[SuppressionReason] = []
|
||||
|
||||
# Check average extraction confidence
|
||||
if ctx.avg_extraction_confidence < config.min_avg_extraction_confidence:
|
||||
reasons.append(SuppressionReason.LOW_DATA_CONFIDENCE)
|
||||
|
||||
# Check evidence staleness
|
||||
if ctx.newest_evidence_at is not None:
|
||||
newest = ctx.newest_evidence_at
|
||||
if newest.tzinfo is None:
|
||||
newest = newest.replace(tzinfo=timezone.utc)
|
||||
age_hours = (reference_time - newest).total_seconds() / 3600.0
|
||||
if age_hours > config.max_evidence_staleness_hours:
|
||||
reasons.append(SuppressionReason.STALE_EVIDENCE)
|
||||
elif ctx.total_documents > 0:
|
||||
# Have documents but no timestamp — treat as stale
|
||||
reasons.append(SuppressionReason.STALE_EVIDENCE)
|
||||
|
||||
# Check source diversity
|
||||
if len(ctx.source_types) < config.min_source_types and ctx.total_documents > 0:
|
||||
reasons.append(SuppressionReason.LOW_SOURCE_DIVERSITY)
|
||||
|
||||
# Check extraction failure rate
|
||||
if ctx.total_documents > 0:
|
||||
failure_rate = ctx.failed_documents / ctx.total_documents
|
||||
if failure_rate > config.max_extraction_failure_rate:
|
||||
reasons.append(SuppressionReason.HIGH_EXTRACTION_FAILURE_RATE)
|
||||
|
||||
# Check minimum valid documents
|
||||
if ctx.valid_documents < config.min_valid_documents:
|
||||
reasons.append(SuppressionReason.INSUFFICIENT_VALID_DOCUMENTS)
|
||||
|
||||
# Compute overall data quality score
|
||||
quality_score = _compute_data_quality_score(ctx, config, reference_time)
|
||||
|
||||
# If quality score is below threshold, add a general suppression reason
|
||||
if quality_score < config.min_data_quality_score and SuppressionReason.LOW_DATA_CONFIDENCE not in reasons:
|
||||
reasons.append(SuppressionReason.LOW_DATA_CONFIDENCE)
|
||||
|
||||
suppressed = len(reasons) > 0
|
||||
|
||||
if suppressed:
|
||||
logger.info(
|
||||
"Recommendation suppressed for %s/%s: reasons=%s quality_score=%.3f",
|
||||
summary.entity_id, summary.window.value,
|
||||
[r.value for r in reasons], quality_score,
|
||||
)
|
||||
|
||||
return SuppressionResult(
|
||||
suppressed=suppressed,
|
||||
reasons=reasons,
|
||||
data_quality_score=quality_score,
|
||||
context=ctx,
|
||||
)
|
||||
@@ -0,0 +1,175 @@
|
||||
"""Optional LLM wording layer for thesis generation.
|
||||
|
||||
Takes a deterministic thesis string (built from trend data and eligibility
|
||||
rules) and rewrites it into natural, analyst-quality prose using a local
|
||||
Ollama model. The deterministic thesis is always preserved as the fallback
|
||||
and audit reference.
|
||||
|
||||
This module is opt-in: callers must explicitly request LLM rewriting.
|
||||
If the LLM call fails or is disabled, the original deterministic thesis
|
||||
is returned unchanged.
|
||||
|
||||
Requirements: 7.1, 7.2
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from services.shared.config import OllamaConfig
|
||||
from services.shared.schemas import TrendSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
THESIS_PROMPT_VERSION = "thesis-rewrite-v1"
|
||||
|
||||
THESIS_SYSTEM_PROMPT = """\
|
||||
You are a concise financial analyst. You rewrite structured trade thesis \
|
||||
summaries into clear, professional prose suitable for an internal research note.
|
||||
|
||||
STRICT RULES:
|
||||
1. Do NOT add any information that is not present in the input.
|
||||
2. Do NOT fabricate numbers, dates, company names, or analyst opinions.
|
||||
3. Keep the rewrite under 150 words.
|
||||
4. Preserve all factual claims, risk notes, and evidence counts from the input.
|
||||
5. Use a neutral, professional tone. Avoid hype or marketing language.
|
||||
6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary."""
|
||||
|
||||
|
||||
def build_thesis_rewrite_prompt(
|
||||
deterministic_thesis: str,
|
||||
summary: TrendSummary,
|
||||
) -> dict[str, str]:
|
||||
"""Build system and user prompts for thesis rewriting.
|
||||
|
||||
Provides the model with the deterministic thesis and key trend
|
||||
context so it can produce a natural-language version.
|
||||
"""
|
||||
context_parts = [
|
||||
f"Ticker: {summary.entity_id}",
|
||||
f"Window: {summary.window.value}",
|
||||
f"Direction: {summary.trend_direction.value}",
|
||||
f"Strength: {summary.trend_strength:.2f}",
|
||||
f"Confidence: {summary.confidence:.2f}",
|
||||
f"Contradiction score: {summary.contradiction_score:.2f}",
|
||||
]
|
||||
if summary.dominant_catalysts:
|
||||
context_parts.append(f"Catalysts: {', '.join(summary.dominant_catalysts[:3])}")
|
||||
if summary.material_risks:
|
||||
context_parts.append(f"Risks: {'; '.join(summary.material_risks[:2])}")
|
||||
|
||||
context_block = "\n".join(context_parts)
|
||||
|
||||
user_prompt = f"""\
|
||||
Rewrite the following structured thesis into clear, professional analyst prose.
|
||||
|
||||
--- STRUCTURED THESIS ---
|
||||
{deterministic_thesis}
|
||||
--- END STRUCTURED THESIS ---
|
||||
|
||||
--- CONTEXT ---
|
||||
{context_block}
|
||||
--- END CONTEXT ---
|
||||
|
||||
Return ONLY the rewritten thesis. No other text."""
|
||||
|
||||
return {
|
||||
"system": THESIS_SYSTEM_PROMPT,
|
||||
"user": user_prompt,
|
||||
}
|
||||
|
||||
|
||||
async def rewrite_thesis_with_llm(
|
||||
deterministic_thesis: str,
|
||||
summary: TrendSummary,
|
||||
config: OllamaConfig,
|
||||
http_client: httpx.AsyncClient | None = None,
|
||||
) -> str:
|
||||
"""Rewrite a deterministic thesis using a local Ollama model.
|
||||
|
||||
If the LLM call fails for any reason, returns the original
|
||||
deterministic thesis unchanged. This ensures the LLM layer is
|
||||
purely additive and never blocks recommendation generation.
|
||||
|
||||
Args:
|
||||
deterministic_thesis: The rule-based thesis string.
|
||||
summary: The trend summary that produced the thesis.
|
||||
config: Ollama connection and model configuration.
|
||||
http_client: Optional shared HTTP client for connection reuse.
|
||||
|
||||
Returns:
|
||||
The LLM-rewritten thesis on success, or the original on failure.
|
||||
"""
|
||||
prompts = build_thesis_rewrite_prompt(deterministic_thesis, summary)
|
||||
|
||||
owns_client = http_client is None
|
||||
client = http_client or httpx.AsyncClient(timeout=config.timeout)
|
||||
|
||||
try:
|
||||
rewritten = await _call_ollama_thesis(client, config, prompts)
|
||||
if rewritten:
|
||||
logger.info(
|
||||
"LLM thesis rewrite succeeded for %s (%d chars → %d chars)",
|
||||
summary.entity_id,
|
||||
len(deterministic_thesis),
|
||||
len(rewritten),
|
||||
)
|
||||
return rewritten
|
||||
|
||||
logger.warning(
|
||||
"LLM thesis rewrite returned empty for %s — using deterministic thesis",
|
||||
summary.entity_id,
|
||||
)
|
||||
return deterministic_thesis
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"LLM thesis rewrite failed for %s — using deterministic thesis",
|
||||
summary.entity_id,
|
||||
)
|
||||
return deterministic_thesis
|
||||
finally:
|
||||
if owns_client:
|
||||
await client.aclose()
|
||||
|
||||
|
||||
async def _call_ollama_thesis(
|
||||
client: httpx.AsyncClient,
|
||||
config: OllamaConfig,
|
||||
prompts: dict[str, str],
|
||||
) -> str:
|
||||
"""Make a single Ollama chat call for thesis rewriting.
|
||||
|
||||
Returns the model's text response, or empty string on failure.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
|
||||
payload = {
|
||||
"model": config.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": prompts["system"]},
|
||||
{"role": "user", "content": prompts["user"]},
|
||||
],
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
f"{config.base_url}/api/chat",
|
||||
json=payload,
|
||||
)
|
||||
_ = resp.raise_for_status()
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
body: dict[str, object] = resp.json()
|
||||
msg = body.get("message")
|
||||
content: str = msg.get("content", "") if isinstance(msg, dict) else ""
|
||||
|
||||
logger.debug(
|
||||
"Ollama thesis call completed in %dms, response length=%d",
|
||||
duration_ms,
|
||||
len(content),
|
||||
)
|
||||
|
||||
return content.strip()
|
||||
@@ -1 +1,721 @@
|
||||
"""Recommendation worker - generates explainable trade recommendations from trend data."""
|
||||
"""Recommendation worker - generates explainable trade recommendations from trend data.
|
||||
|
||||
Fetches the latest trend summaries for a ticker, evaluates eligibility
|
||||
using deterministic rules, builds Recommendation objects with thesis
|
||||
and evidence citations, and persists them to PostgreSQL.
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.recommendation.eligibility import (
|
||||
EligibilityConfig,
|
||||
EligibilityResult,
|
||||
evaluate_eligibility,
|
||||
)
|
||||
from services.recommendation.suppression import (
|
||||
DataQualityContext,
|
||||
SuppressionConfig,
|
||||
SuppressionResult,
|
||||
evaluate_suppression,
|
||||
)
|
||||
from services.recommendation.thesis_llm import (
|
||||
THESIS_PROMPT_VERSION,
|
||||
rewrite_thesis_with_llm,
|
||||
)
|
||||
from minio import Minio
|
||||
|
||||
from services.lake_publisher.worker import publish_recommendation_facts
|
||||
from services.shared.config import OllamaConfig
|
||||
from services.shared.schemas import (
|
||||
ModelMetadata,
|
||||
PositionSizing,
|
||||
Recommendation,
|
||||
RecommendationMode,
|
||||
TrendDirection,
|
||||
TrendSummary,
|
||||
TrendWindow,
|
||||
)
|
||||
from services.shared.metrics import (
|
||||
RECOMMENDATION_CONFIDENCE,
|
||||
RECOMMENDATION_GENERATED,
|
||||
RECOMMENDATION_SUPPRESSED,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch latest trend summary for a ticker + window
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch data quality context for suppression checks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DATA_QUALITY_QUERY = """
|
||||
SELECT
|
||||
COUNT(*) AS total_documents,
|
||||
COUNT(*) FILTER (WHERE di.validation_status = 'valid') AS valid_documents,
|
||||
COUNT(*) FILTER (WHERE di.validation_status = 'failed') AS failed_documents,
|
||||
AVG(di.confidence) FILTER (WHERE di.validation_status = 'valid') AS avg_extraction_confidence,
|
||||
MAX(d.published_at) AS newest_evidence_at,
|
||||
ARRAY_AGG(DISTINCT s.source_class) FILTER (WHERE s.source_class IS NOT NULL) AS source_types
|
||||
FROM documents d
|
||||
JOIN document_intelligence di ON di.document_id = d.id
|
||||
LEFT JOIN sources s ON d.source_id = s.id
|
||||
WHERE d.id = ANY(
|
||||
SELECT UNNEST(
|
||||
COALESCE(tw.top_supporting_evidence, '[]'::jsonb)
|
||||
|| COALESCE(tw.top_opposing_evidence, '[]'::jsonb)
|
||||
)::uuid
|
||||
FROM trend_windows tw
|
||||
WHERE tw.entity_id = $1 AND tw.window = $2
|
||||
ORDER BY tw.generated_at DESC
|
||||
LIMIT 1
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
async def fetch_data_quality_context(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
window: str,
|
||||
) -> DataQualityContext | None:
|
||||
"""Fetch data quality metrics for the documents underlying a trend.
|
||||
|
||||
Returns None if the query fails or returns no data, in which case
|
||||
the suppression module will fall back to summary-based estimation.
|
||||
"""
|
||||
try:
|
||||
row = await pool.fetchrow(_DATA_QUALITY_QUERY, ticker, window)
|
||||
if row is None or row["total_documents"] == 0:
|
||||
return None
|
||||
|
||||
source_types_raw = row["source_types"]
|
||||
source_types = set(source_types_raw) if source_types_raw else set()
|
||||
|
||||
return DataQualityContext(
|
||||
total_documents=int(row["total_documents"]),
|
||||
valid_documents=int(row["valid_documents"] or 0),
|
||||
failed_documents=int(row["failed_documents"] or 0),
|
||||
avg_extraction_confidence=float(row["avg_extraction_confidence"] or 0.0),
|
||||
newest_evidence_at=row["newest_evidence_at"],
|
||||
source_types=source_types,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to fetch data quality context for %s/%s — will use summary fallback",
|
||||
ticker, window, exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
_LATEST_TREND_QUERY = """
|
||||
SELECT
|
||||
entity_type, entity_id, window, trend_direction, trend_strength,
|
||||
confidence, top_supporting_evidence, top_opposing_evidence,
|
||||
dominant_catalysts, material_risks, contradiction_score,
|
||||
disagreement_details, market_context, generated_at
|
||||
FROM trend_windows
|
||||
WHERE entity_id = $1 AND window = $2
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
def _parse_trend_row(row: asyncpg.Record) -> TrendSummary:
|
||||
"""Convert a trend_windows row into a TrendSummary."""
|
||||
supporting = row["top_supporting_evidence"]
|
||||
if isinstance(supporting, str):
|
||||
supporting = json.loads(supporting)
|
||||
|
||||
opposing = row["top_opposing_evidence"]
|
||||
if isinstance(opposing, str):
|
||||
opposing = json.loads(opposing)
|
||||
|
||||
catalysts = row["dominant_catalysts"]
|
||||
if isinstance(catalysts, str):
|
||||
catalysts = json.loads(catalysts)
|
||||
|
||||
risks = row["material_risks"]
|
||||
if isinstance(risks, str):
|
||||
risks = json.loads(risks)
|
||||
|
||||
return TrendSummary(
|
||||
entity_type=row["entity_type"],
|
||||
entity_id=row["entity_id"],
|
||||
window=TrendWindow(row["window"]),
|
||||
trend_direction=TrendDirection(row["trend_direction"]),
|
||||
trend_strength=float(row["trend_strength"]),
|
||||
confidence=float(row["confidence"]),
|
||||
top_supporting_evidence=supporting or [],
|
||||
top_opposing_evidence=opposing or [],
|
||||
dominant_catalysts=catalysts or [],
|
||||
material_risks=risks or [],
|
||||
contradiction_score=float(row["contradiction_score"] or 0.0),
|
||||
generated_at=row["generated_at"],
|
||||
)
|
||||
|
||||
|
||||
async def fetch_latest_trend(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
window: str,
|
||||
) -> TrendSummary | None:
|
||||
"""Fetch the most recent trend summary for a ticker and window."""
|
||||
row = await pool.fetchrow(_LATEST_TREND_QUERY, ticker, window)
|
||||
if row is None:
|
||||
return None
|
||||
return _parse_trend_row(row)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build thesis from trend summary (deterministic, no LLM)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_thesis(
|
||||
summary: TrendSummary,
|
||||
result: EligibilityResult,
|
||||
) -> str:
|
||||
"""Generate a deterministic thesis string from trend data.
|
||||
|
||||
This is the descriptive analysis portion (Requirement 7.2).
|
||||
The LLM wording layer is a separate optional task.
|
||||
"""
|
||||
direction = summary.trend_direction.value
|
||||
ticker = summary.entity_id
|
||||
window = summary.window.value
|
||||
strength = summary.trend_strength
|
||||
confidence = summary.confidence
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
# Opening: direction and strength
|
||||
parts.append(
|
||||
f"{ticker} shows a {direction} trend over the {window} window "
|
||||
+ f"with strength {strength:.2f} and confidence {confidence:.2f}."
|
||||
)
|
||||
|
||||
# Catalysts
|
||||
if summary.dominant_catalysts:
|
||||
catalyst_str = ", ".join(summary.dominant_catalysts[:3])
|
||||
parts.append(f"Dominant catalysts: {catalyst_str}.")
|
||||
|
||||
# Contradiction note (Requirement 7.2 — separate descriptive from prescriptive)
|
||||
if summary.contradiction_score > 0.15:
|
||||
parts.append(
|
||||
"Notable signal disagreement detected "
|
||||
+ f"(contradiction score: {summary.contradiction_score:.2f})."
|
||||
)
|
||||
|
||||
# Risks
|
||||
if summary.material_risks:
|
||||
risk_str = "; ".join(summary.material_risks[:2])
|
||||
parts.append(f"Key risks: {risk_str}.")
|
||||
|
||||
# Evidence count
|
||||
supporting_count = len(summary.top_supporting_evidence)
|
||||
opposing_count = len(summary.top_opposing_evidence)
|
||||
parts.append(
|
||||
f"Based on {supporting_count} supporting and "
|
||||
+ f"{opposing_count} opposing evidence documents."
|
||||
)
|
||||
|
||||
# Prescriptive action (separated per Requirement 7.2)
|
||||
action = result.action.value.upper()
|
||||
mode = result.mode.value.replace("_", " ")
|
||||
parts.append(f"Recommendation: {action} ({mode}).")
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build risk classification (Requirement 7.2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def classify_risk(
|
||||
summary: TrendSummary,
|
||||
result: EligibilityResult,
|
||||
) -> str:
|
||||
"""Assign a risk classification label based on signal quality.
|
||||
|
||||
Returns one of: low, moderate, high, very_high.
|
||||
"""
|
||||
score = 0.0
|
||||
|
||||
# Contradiction raises risk
|
||||
score += summary.contradiction_score * 2.0
|
||||
|
||||
# Low confidence raises risk
|
||||
score += (1.0 - summary.confidence) * 1.5
|
||||
|
||||
# Low evidence count raises risk
|
||||
evidence_count = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
|
||||
if evidence_count < 3:
|
||||
score += 1.0
|
||||
elif evidence_count < 5:
|
||||
score += 0.5
|
||||
|
||||
# Rejection reasons raise risk
|
||||
score += len(result.rejection_reasons) * 0.5
|
||||
|
||||
if score >= 3.0:
|
||||
return "very_high"
|
||||
if score >= 2.0:
|
||||
return "high"
|
||||
if score >= 1.0:
|
||||
return "moderate"
|
||||
return "low"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build Recommendation from eligibility result
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_recommendation(
|
||||
summary: TrendSummary,
|
||||
result: EligibilityResult,
|
||||
reference_time: datetime | None = None,
|
||||
llm_thesis: str | None = None,
|
||||
suppression_result: SuppressionResult | None = None,
|
||||
) -> Recommendation:
|
||||
"""Assemble a Recommendation object from a trend summary and eligibility result.
|
||||
|
||||
Combines all evidence refs (supporting + opposing) into the recommendation
|
||||
so the full decision trace is available (Requirement 8.3).
|
||||
|
||||
If ``llm_thesis`` is provided (from the optional LLM wording layer),
|
||||
it replaces the deterministic thesis text while preserving the risk
|
||||
classification prefix.
|
||||
|
||||
If ``suppression_result`` indicates suppression, a suppression note
|
||||
is appended to the thesis for audit visibility (Requirement 7.4).
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
# Combine evidence refs — supporting first, then opposing
|
||||
evidence_refs = list(summary.top_supporting_evidence) + list(summary.top_opposing_evidence)
|
||||
|
||||
deterministic_thesis = build_thesis(summary, result)
|
||||
risk_class = classify_risk(summary, result)
|
||||
|
||||
# Use LLM-rewritten thesis if available, otherwise deterministic
|
||||
thesis_body = llm_thesis if llm_thesis else deterministic_thesis
|
||||
|
||||
# Append suppression note if suppressed (Requirement 7.4)
|
||||
if suppression_result and suppression_result.suppressed:
|
||||
reason_strs = [r.value for r in suppression_result.reasons]
|
||||
thesis_body += (
|
||||
f" [SUPPRESSED: data quality below threshold "
|
||||
f"(score={suppression_result.data_quality_score:.2f}, "
|
||||
f"reasons={', '.join(reason_strs)})]"
|
||||
)
|
||||
|
||||
# Track whether the thesis was LLM-generated for audit
|
||||
if llm_thesis:
|
||||
provider = "ollama"
|
||||
model_name = "thesis-rewrite"
|
||||
prompt_version = THESIS_PROMPT_VERSION
|
||||
else:
|
||||
provider = "deterministic"
|
||||
model_name = "eligibility-v1"
|
||||
prompt_version = ""
|
||||
|
||||
return Recommendation(
|
||||
ticker=summary.entity_id,
|
||||
action=result.action,
|
||||
mode=result.mode,
|
||||
confidence=summary.confidence,
|
||||
time_horizon=result.time_horizon,
|
||||
thesis=f"[risk:{risk_class}] {thesis_body}",
|
||||
invalidation_conditions=result.invalidation_conditions,
|
||||
position_sizing=PositionSizing(
|
||||
portfolio_pct=result.position_sizing.portfolio_pct,
|
||||
max_loss_pct=result.position_sizing.max_loss_pct,
|
||||
),
|
||||
evidence_refs=evidence_refs,
|
||||
model_metadata=ModelMetadata(
|
||||
provider=provider,
|
||||
model_name=model_name,
|
||||
prompt_version=prompt_version,
|
||||
schema_version="1.0.0",
|
||||
),
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist recommendation to PostgreSQL
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_INSERT_RECOMMENDATION = """
|
||||
INSERT INTO recommendations (
|
||||
ticker, action, mode, confidence, time_horizon,
|
||||
thesis, invalidation_conditions, portfolio_pct, max_loss_pct,
|
||||
model_version, model_provider, prompt_version, schema_version,
|
||||
risk_classification, generated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5,
|
||||
$6, $7::jsonb, $8, $9,
|
||||
$10, $11, $12, $13,
|
||||
$14, $15
|
||||
)
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
_INSERT_REC_EVIDENCE = """
|
||||
INSERT INTO recommendation_evidence (
|
||||
recommendation_id, document_id, evidence_type, weight
|
||||
) VALUES ($1, $2::uuid, $3, $4)
|
||||
"""
|
||||
|
||||
_INSERT_RISK_EVALUATION = """
|
||||
INSERT INTO risk_evaluations (
|
||||
recommendation_id, eligible, allowed_mode, rejection_reasons, risk_checks, evaluated_at
|
||||
) VALUES ($1::uuid, $2, $3, $4::jsonb, $5::jsonb, $6)
|
||||
"""
|
||||
|
||||
_FETCH_RECOMMENDATION = """
|
||||
SELECT
|
||||
id, ticker, action, mode, confidence, time_horizon,
|
||||
thesis, invalidation_conditions, portfolio_pct, max_loss_pct,
|
||||
model_version, model_provider, prompt_version, schema_version,
|
||||
risk_classification, generated_at
|
||||
FROM recommendations
|
||||
WHERE id = $1::uuid
|
||||
"""
|
||||
|
||||
_FETCH_REC_EVIDENCE = """
|
||||
SELECT document_id, evidence_type, weight
|
||||
FROM recommendation_evidence
|
||||
WHERE recommendation_id = $1::uuid
|
||||
ORDER BY evidence_type, weight DESC
|
||||
"""
|
||||
|
||||
_FETCH_LATEST_RECS_FOR_TICKER = """
|
||||
SELECT
|
||||
id, ticker, action, mode, confidence, time_horizon,
|
||||
thesis, invalidation_conditions, portfolio_pct, max_loss_pct,
|
||||
model_version, model_provider, prompt_version, schema_version,
|
||||
risk_classification, generated_at
|
||||
FROM recommendations
|
||||
WHERE ticker = $1
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT $2
|
||||
"""
|
||||
|
||||
|
||||
def _extract_risk_classification(thesis: str) -> str:
|
||||
"""Extract the risk classification from the thesis prefix."""
|
||||
if thesis.startswith("[risk:"):
|
||||
end = thesis.find("]")
|
||||
if end > 6:
|
||||
return thesis[6:end]
|
||||
return "moderate"
|
||||
|
||||
|
||||
async def persist_recommendation(
|
||||
pool: asyncpg.Pool,
|
||||
rec: Recommendation,
|
||||
supporting_ids: list[str],
|
||||
opposing_ids: list[str],
|
||||
eligibility_result: EligibilityResult | None = None,
|
||||
) -> str:
|
||||
"""Insert a recommendation, evidence citations, and risk evaluation.
|
||||
|
||||
Persists the full model metadata and risk classification for audit
|
||||
trail (Requirement 8.3). Also writes the eligibility decision to
|
||||
the risk_evaluations table when provided.
|
||||
|
||||
Returns the recommendation UUID.
|
||||
"""
|
||||
risk_class = _extract_risk_classification(rec.thesis)
|
||||
|
||||
row = await pool.fetchrow(
|
||||
_INSERT_RECOMMENDATION,
|
||||
rec.ticker,
|
||||
rec.action.value,
|
||||
rec.mode.value,
|
||||
rec.confidence,
|
||||
rec.time_horizon,
|
||||
rec.thesis,
|
||||
json.dumps(rec.invalidation_conditions),
|
||||
rec.position_sizing.portfolio_pct,
|
||||
rec.position_sizing.max_loss_pct,
|
||||
rec.model_metadata.model_name,
|
||||
rec.model_metadata.provider,
|
||||
rec.model_metadata.prompt_version,
|
||||
rec.model_metadata.schema_version,
|
||||
risk_class,
|
||||
rec.generated_at,
|
||||
)
|
||||
rec_id = str(row["id"])
|
||||
|
||||
# Insert evidence citations with position-based weighting
|
||||
evidence_rows: list[tuple[str, str, str, float]] = []
|
||||
for idx, doc_id in enumerate(supporting_ids):
|
||||
weight = round(1.0 / (1.0 + idx * 0.1), 4) # rank decay
|
||||
evidence_rows.append((rec_id, doc_id, "supporting", weight))
|
||||
for idx, doc_id in enumerate(opposing_ids):
|
||||
weight = round(1.0 / (1.0 + idx * 0.1), 4)
|
||||
evidence_rows.append((rec_id, doc_id, "opposing", weight))
|
||||
|
||||
if evidence_rows:
|
||||
await pool.executemany(_INSERT_REC_EVIDENCE, evidence_rows)
|
||||
|
||||
# Persist the eligibility/risk evaluation for audit trail
|
||||
if eligibility_result is not None:
|
||||
rejection_reasons_json = json.dumps(
|
||||
[r.value for r in eligibility_result.rejection_reasons]
|
||||
)
|
||||
risk_checks = {
|
||||
"time_horizon": eligibility_result.time_horizon,
|
||||
"position_sizing": {
|
||||
"portfolio_pct": eligibility_result.position_sizing.portfolio_pct,
|
||||
"max_loss_pct": eligibility_result.position_sizing.max_loss_pct,
|
||||
},
|
||||
"invalidation_conditions": eligibility_result.invalidation_conditions,
|
||||
"risk_classification": risk_class,
|
||||
}
|
||||
await pool.execute(
|
||||
_INSERT_RISK_EVALUATION,
|
||||
rec_id,
|
||||
eligibility_result.eligible,
|
||||
eligibility_result.mode.value,
|
||||
rejection_reasons_json,
|
||||
json.dumps(risk_checks),
|
||||
rec.generated_at,
|
||||
)
|
||||
|
||||
return rec_id
|
||||
|
||||
|
||||
async def fetch_recommendation_by_id(
|
||||
pool: asyncpg.Pool,
|
||||
recommendation_id: str,
|
||||
) -> dict[str, object] | None:
|
||||
"""Fetch a persisted recommendation with its evidence citations.
|
||||
|
||||
Returns a dict with the recommendation fields and an 'evidence' list,
|
||||
or None if not found.
|
||||
"""
|
||||
row = await pool.fetchrow(_FETCH_RECOMMENDATION, recommendation_id)
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
rec_dict = dict(row)
|
||||
# Parse JSONB fields
|
||||
if isinstance(rec_dict.get("invalidation_conditions"), str):
|
||||
rec_dict["invalidation_conditions"] = json.loads(rec_dict["invalidation_conditions"])
|
||||
|
||||
# Fetch evidence
|
||||
evidence_rows = await pool.fetch(_FETCH_REC_EVIDENCE, recommendation_id)
|
||||
rec_dict["evidence"] = [
|
||||
{
|
||||
"document_id": str(e["document_id"]),
|
||||
"evidence_type": e["evidence_type"],
|
||||
"weight": float(e["weight"]),
|
||||
}
|
||||
for e in evidence_rows
|
||||
]
|
||||
|
||||
return rec_dict
|
||||
|
||||
|
||||
async def fetch_latest_recommendations(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
limit: int = 10,
|
||||
) -> list[dict[str, object]]:
|
||||
"""Fetch the most recent recommendations for a ticker.
|
||||
|
||||
Returns a list of recommendation dicts (without evidence — use
|
||||
fetch_recommendation_by_id for full detail).
|
||||
"""
|
||||
rows = await pool.fetch(_FETCH_LATEST_RECS_FOR_TICKER, ticker, limit)
|
||||
results = []
|
||||
for row in rows:
|
||||
rec_dict = dict(row)
|
||||
if isinstance(rec_dict.get("invalidation_conditions"), str):
|
||||
rec_dict["invalidation_conditions"] = json.loads(rec_dict["invalidation_conditions"])
|
||||
results.append(rec_dict)
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point: generate recommendation for a ticker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def generate_recommendation(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
window: str = TrendWindow.SEVEN_DAY.value,
|
||||
config: EligibilityConfig | None = None,
|
||||
reference_time: datetime | None = None,
|
||||
ollama_config: OllamaConfig | None = None,
|
||||
suppression_config: SuppressionConfig | None = None,
|
||||
minio_client: Minio | None = None,
|
||||
) -> Recommendation | None:
|
||||
"""Generate and persist a recommendation for a ticker from its latest trend.
|
||||
|
||||
Steps:
|
||||
1. Fetch the latest trend summary for the ticker + window.
|
||||
2. Evaluate data quality suppression (Requirement 7.4).
|
||||
3. Evaluate eligibility using deterministic rules.
|
||||
4. Build a Recommendation object with thesis and evidence.
|
||||
- If ``ollama_config`` is provided, the deterministic thesis is
|
||||
rewritten into analyst-quality prose via the LLM wording layer.
|
||||
5. Persist the recommendation and evidence citations.
|
||||
|
||||
Returns the Recommendation, or None if no trend data exists.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
cfg = config or EligibilityConfig()
|
||||
sup_cfg = suppression_config or SuppressionConfig()
|
||||
|
||||
# 1. Fetch latest trend
|
||||
summary = await fetch_latest_trend(pool, ticker, window)
|
||||
if summary is None:
|
||||
logger.info("No trend data for %s/%s — skipping recommendation", ticker, window)
|
||||
return None
|
||||
|
||||
# 2. Evaluate data quality suppression (Requirement 7.4)
|
||||
quality_ctx = await fetch_data_quality_context(pool, ticker, window)
|
||||
suppression = evaluate_suppression(
|
||||
summary, quality_ctx=quality_ctx, config=sup_cfg, reference_time=reference_time,
|
||||
)
|
||||
|
||||
# 3. Evaluate eligibility
|
||||
result = evaluate_eligibility(summary, cfg)
|
||||
|
||||
# Apply suppression: force mode to informational if suppressed
|
||||
if suppression.suppressed:
|
||||
result = EligibilityResult(
|
||||
eligible=False,
|
||||
action=result.action,
|
||||
mode=RecommendationMode.INFORMATIONAL,
|
||||
position_sizing=result.position_sizing,
|
||||
rejection_reasons=result.rejection_reasons,
|
||||
time_horizon=result.time_horizon,
|
||||
invalidation_conditions=result.invalidation_conditions,
|
||||
)
|
||||
|
||||
# 4. Optional LLM thesis rewrite
|
||||
llm_thesis: str | None = None
|
||||
if ollama_config is not None:
|
||||
deterministic_thesis = build_thesis(summary, result)
|
||||
llm_thesis = await rewrite_thesis_with_llm(
|
||||
deterministic_thesis=deterministic_thesis,
|
||||
summary=summary,
|
||||
config=ollama_config,
|
||||
)
|
||||
# If the LLM returned the same text as the deterministic thesis,
|
||||
# treat it as a no-op (fallback was used).
|
||||
if llm_thesis == deterministic_thesis:
|
||||
llm_thesis = None
|
||||
|
||||
# 5. Build recommendation
|
||||
rec = build_recommendation(
|
||||
summary, result, reference_time, llm_thesis=llm_thesis,
|
||||
suppression_result=suppression,
|
||||
)
|
||||
|
||||
# 6. Persist recommendation, evidence citations, and risk evaluation
|
||||
rec_id = await persist_recommendation(
|
||||
pool,
|
||||
rec,
|
||||
supporting_ids=list(summary.top_supporting_evidence),
|
||||
opposing_ids=list(summary.top_opposing_evidence),
|
||||
eligibility_result=result,
|
||||
)
|
||||
|
||||
# 7. Publish prediction facts to analytical tables (Requirement 9.4)
|
||||
if minio_client is not None:
|
||||
try:
|
||||
lake_refs = publish_recommendation_facts(
|
||||
minio_client,
|
||||
rec,
|
||||
trend_direction=summary.trend_direction.value,
|
||||
trend_strength=summary.trend_strength,
|
||||
)
|
||||
logger.info(
|
||||
"Published analytical facts for %s: %s",
|
||||
ticker, lake_refs,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to publish analytical facts for %s/%s — recommendation "
|
||||
"persisted but lake publication failed",
|
||||
ticker, rec_id, exc_info=True,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Generated recommendation %s for %s: action=%s mode=%s confidence=%.3f "
|
||||
"eligible=%s suppressed=%s quality_score=%.3f llm_thesis=%s",
|
||||
rec_id, ticker, rec.action.value, rec.mode.value, rec.confidence,
|
||||
result.eligible, suppression.suppressed, suppression.data_quality_score,
|
||||
llm_thesis is not None,
|
||||
)
|
||||
|
||||
# Prometheus metrics
|
||||
RECOMMENDATION_GENERATED.labels(action=rec.action.value, mode=rec.mode.value).inc()
|
||||
RECOMMENDATION_CONFIDENCE.observe(rec.confidence)
|
||||
if suppression.suppressed:
|
||||
RECOMMENDATION_SUPPRESSED.inc()
|
||||
|
||||
return rec
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch: generate recommendations for multiple tickers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def generate_recommendations_batch(
|
||||
pool: asyncpg.Pool,
|
||||
tickers: list[str],
|
||||
window: str = TrendWindow.SEVEN_DAY.value,
|
||||
config: EligibilityConfig | None = None,
|
||||
ollama_config: OllamaConfig | None = None,
|
||||
suppression_config: SuppressionConfig | None = None,
|
||||
minio_client: Minio | None = None,
|
||||
) -> list[Recommendation]:
|
||||
"""Generate recommendations for a list of tickers.
|
||||
|
||||
Processes each ticker sequentially. Returns only the successfully
|
||||
generated recommendations (tickers with no trend data are skipped).
|
||||
|
||||
If ``ollama_config`` is provided, each recommendation's thesis will
|
||||
be rewritten using the LLM wording layer.
|
||||
"""
|
||||
results: list[Recommendation] = []
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
for ticker in tickers:
|
||||
rec = await generate_recommendation(
|
||||
pool, ticker, window, config, reference_time,
|
||||
ollama_config=ollama_config,
|
||||
suppression_config=suppression_config,
|
||||
minio_client=minio_client,
|
||||
)
|
||||
if rec is not None:
|
||||
results.append(rec)
|
||||
|
||||
logger.info(
|
||||
"Batch recommendation: %d/%d tickers produced recommendations",
|
||||
len(results), len(tickers),
|
||||
)
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user