fix: deduplicate recommendations and widen position sizing range

- Add dedup check in recommendation worker: skip generation when latest rec for same ticker+window has identical action/mode/confidence - Widen position sizing range (1-10% portfolio, 0.3-2% max loss) and factor in trend strength + evidence count for differentiated sizing - API returns only latest recommendation per ticker by default (DISTINCT ON) to eliminate duplicate rows in the frontend list view
2026-04-17 00:15:32 +00:00
parent 29f46d387c
commit f11aa0a1ee
3 changed files with 134 additions and 35 deletions
@@ -66,17 +66,17 @@ class EligibilityConfig:

    # --- Position sizing rules (Requirement 7.3) ---
    # Base portfolio allocation percentage
-    base_portfolio_pct: float = 0.02
+    base_portfolio_pct: float = 0.01
    # Maximum portfolio allocation percentage
-    max_portfolio_pct: float = 0.05
+    max_portfolio_pct: float = 0.10
    # Base max loss percentage
-    base_max_loss_pct: float = 0.005
+    base_max_loss_pct: float = 0.003
    # Maximum max loss percentage
-    max_max_loss_pct: float = 0.01
+    max_max_loss_pct: float = 0.02
    # Confidence scaling: higher confidence → larger position (linear)
-    confidence_sizing_weight: float = 0.5
+    confidence_sizing_weight: float = 0.8
    # Contradiction penalty: higher contradiction → smaller position
-    contradiction_sizing_penalty: float = 0.3
+    contradiction_sizing_penalty: float = 0.5


 DEFAULT_ELIGIBILITY_CONFIG = EligibilityConfig()
@@ -216,30 +216,40 @@ def _compute_position_sizing(
 ) -> PositionSizing:
    """Compute position sizing guidance from portfolio rules and signal quality.

-    Higher confidence → larger allocation (up to max).
+    Higher confidence and trend strength → larger allocation (up to max).
    Higher contradiction → smaller allocation (penalty).
+    Low evidence count further reduces allocation.
    """
-    # Start from base allocation
-    confidence_scale = config.base_portfolio_pct + (
-        config.confidence_sizing_weight
-        * summary.confidence
-        * (config.max_portfolio_pct - config.base_portfolio_pct)
-    )
+    # Confidence-based scaling over the full range
+    confidence_factor = config.confidence_sizing_weight * summary.confidence
+    # Trend strength multiplier — stronger trends justify larger positions
+    strength_factor = 0.5 + 0.5 * summary.trend_strength  # range [0.5, 1.0]
+
+    portfolio_range = config.max_portfolio_pct - config.base_portfolio_pct
+    raw_portfolio = config.base_portfolio_pct + confidence_factor * strength_factor * portfolio_range

    # Apply contradiction penalty
    contradiction_penalty = config.contradiction_sizing_penalty * summary.contradiction_score
-    portfolio_pct = confidence_scale * (1.0 - contradiction_penalty)
+    portfolio_pct = raw_portfolio * (1.0 - contradiction_penalty)
+
+    # Evidence count penalty — fewer sources = less confidence in sizing
+    evidence_count = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
+    if evidence_count < 3:
+        portfolio_pct *= 0.5
+    elif evidence_count < 5:
+        portfolio_pct *= 0.75

    # Clamp to bounds
    portfolio_pct = max(config.base_portfolio_pct * 0.5, min(portfolio_pct, config.max_portfolio_pct))

    # Max loss scales similarly
-    loss_scale = config.base_max_loss_pct + (
-        config.confidence_sizing_weight
-        * summary.confidence
-        * (config.max_max_loss_pct - config.base_max_loss_pct)
-    )
-    max_loss_pct = loss_scale * (1.0 - contradiction_penalty)
+    loss_range = config.max_max_loss_pct - config.base_max_loss_pct
+    raw_loss = config.base_max_loss_pct + confidence_factor * strength_factor * loss_range
+    max_loss_pct = raw_loss * (1.0 - contradiction_penalty)
+    if evidence_count < 3:
+        max_loss_pct *= 0.5
+    elif evidence_count < 5:
+        max_loss_pct *= 0.75
    max_loss_pct = max(config.base_max_loss_pct * 0.5, min(max_loss_pct, config.max_max_loss_pct))

    return PositionSizing(
@@ -656,6 +656,64 @@ async def fetch_latest_recommendations(
 # ---------------------------------------------------------------------------


+_DEDUP_CHECK_QUERY = """
+SELECT r.id, r.confidence, r.action, r.mode, r.generated_at
+FROM recommendations r
+WHERE r.ticker = $1
+  AND r.time_horizon LIKE $2 || '%'
+ORDER BY r.generated_at DESC
+LIMIT 1
+"""
+
+
+async def _is_duplicate_recommendation(
+    pool: asyncpg.Pool,
+    ticker: str,
+    summary: TrendSummary,
+    result: EligibilityResult,
+) -> bool:
+    """Check if the latest recommendation for this ticker+window is effectively identical.
+
+    Compares confidence, action, and mode. If they match, the underlying
+    trend data hasn't changed meaningfully and we skip regeneration.
+    """
+    horizon_prefix = _map_time_horizon_prefix(summary.window.value)
+    row = await pool.fetchrow(_DEDUP_CHECK_QUERY, ticker, horizon_prefix)
+    if row is None:
+        return False
+
+    # If the previous recommendation has the same action, mode, and confidence
+    # (within a small tolerance), it's a duplicate
+    prev_confidence = float(row["confidence"])
+    prev_action = row["action"]
+    prev_mode = row["mode"]
+
+    same_action = prev_action == result.action.value
+    same_mode = prev_mode == result.mode.value
+    same_confidence = abs(prev_confidence - summary.confidence) < 0.01
+
+    if same_action and same_mode and same_confidence:
+        logger.info(
+            "Skipping duplicate recommendation for %s — action=%s mode=%s "
+            "confidence=%.3f matches previous",
+            ticker, prev_action, prev_mode, prev_confidence,
+        )
+        return True
+    return False
+
+
+def _map_time_horizon_prefix(window: str) -> str:
+    """Map window to the time_horizon prefix for dedup matching."""
+    mapping = {
+        "intraday": "intraday",
+        "1d": "swing_1d",
+        "7d": "swing_1d",
+        "30d": "position_10d",
+        "90d": "position_30d",
+    }
+    return mapping.get(window, "window_")
+
+
 async def generate_recommendation(
    pool: asyncpg.Pool,
    ticker: str,
@@ -670,6 +728,7 @@ async def generate_recommendation(

    Steps:
    1. Fetch the latest trend summary for the ticker + window.
+    1b. Skip if the latest recommendation for this ticker is effectively identical.
    2. Fetch the latest trend projection (Requirement 12.8, 12.9).
    3. Evaluate data quality suppression (Requirement 7.4).
    4. Evaluate eligibility using deterministic rules.
@@ -678,7 +737,8 @@ async def generate_recommendation(
         rewritten into analyst-quality prose via the LLM wording layer.
    6. Persist the recommendation and evidence citations.

-    Returns the Recommendation, or None if no trend data exists.
+    Returns the Recommendation, or None if no trend data exists or recommendation
+    is a duplicate of the most recent one.
    """
    if reference_time is None:
        reference_time = datetime.now(timezone.utc)
@@ -692,6 +752,11 @@ async def generate_recommendation(
        logger.info("No trend data for %s/%s — skipping recommendation", ticker, window)
        return None

+    # 1b. Check for duplicate: evaluate eligibility early for dedup comparison
+    preliminary_result = evaluate_eligibility(summary, cfg)
+    if await _is_duplicate_recommendation(pool, ticker, summary, preliminary_result):
+        return None
+
    # 2. Fetch latest trend projection (Requirement 12.8, 12.9)
    projection = await fetch_latest_projection(pool, ticker, window)
    # Exclude low-confidence projections from influencing recommendation
@@ -708,8 +773,8 @@ async def generate_recommendation(
        summary, quality_ctx=quality_ctx, config=sup_cfg, reference_time=reference_time,
    )

-    # 4. Evaluate eligibility
-    result = evaluate_eligibility(summary, cfg)
+    # 4. Evaluate eligibility (use preliminary result, already computed for dedup)
+    result = preliminary_result

    # Apply suppression: force mode to informational if suppressed
    if suppression.suppressed: