"""Tests for extraction model performance metrics collection. Validates that collect_metrics correctly computes metrics from ExtractionResponse objects for both successful and failed extractions. Requirements: 5.2, 5.4, 12.1, 12.2 """ from __future__ import annotations from services.extractor.client import ExtractionAttempt, ExtractionResponse from services.extractor.metrics import collect_metrics from services.extractor.schemas import ExtractionResult, ValidationReport def _make_valid_result() -> ExtractionResult: return ExtractionResult.model_validate({ "summary": "Apple beat earnings expectations.", "companies": [ { "ticker": "AAPL", "company_name": "Apple Inc.", "relevance": 0.95, "sentiment": "positive", "impact_score": 0.7, "impact_horizon": "1d_30d", "catalyst_type": "earnings", "key_facts": ["Revenue up 12%"], "risks": [], "evidence_spans": ["Apple beat expectations"], } ], "macro_themes": ["ai_capex"], "novelty_score": 0.6, "confidence": 0.85, "extraction_warnings": [], }) def _make_success_response() -> ExtractionResponse: result = _make_valid_result() validation = ValidationReport(valid=True, errors=[], warnings=["low_novelty"], parsed=result) attempt = ExtractionAttempt( raw_output=result.model_dump_json(), validation=validation, error=None, duration_ms=500, model="test-model", ) return ExtractionResponse( success=True, result=result, attempts=[attempt], prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"}, model="test-model", total_duration_ms=500, ) def _make_failed_response_with_retries() -> ExtractionResponse: attempt1 = ExtractionAttempt( raw_output="bad json", validation=None, error="invalid_json", duration_ms=200, model="test-model", ) attempt2 = ExtractionAttempt( raw_output="still bad output here", validation=ValidationReport( valid=False, errors=["schema_fail", "missing_companies"], warnings=["truncated"], ), error="schema_fail; missing_companies", duration_ms=300, model="test-model", ) return ExtractionResponse( success=False, result=None, attempts=[attempt1, attempt2], prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"}, model="test-model", total_duration_ms=500, ) def test_collect_metrics_success(): """Successful extraction produces correct metrics.""" resp = _make_success_response() m = collect_metrics( resp, document_id="doc-1", ticker="AAPL", document_text_length=4000, ) assert m.document_id == "doc-1" assert m.ticker == "AAPL" assert m.model_name == "test-model" assert m.prompt_version == "document-intel-v1" assert m.schema_version == "2.0.0" assert m.success is True assert m.attempt_count == 1 assert m.total_duration_ms == 500 assert m.first_attempt_duration_ms == 500 assert m.final_attempt_duration_ms == 500 assert m.confidence == 0.85 assert m.validation_status == "valid" assert m.validation_error_count == 0 assert m.validation_warning_count == 1 assert m.retry_count == 0 assert m.input_token_estimate == 1000 # 4000 / 4 assert m.output_token_estimate > 0 assert m.company_count == 1 def test_collect_metrics_failed_with_retries(): """Failed extraction with retries produces correct metrics.""" resp = _make_failed_response_with_retries() m = collect_metrics( resp, document_id="doc-2", ticker="MSFT", document_text_length=2000, ) assert m.success is False assert m.attempt_count == 2 assert m.retry_count == 1 assert m.first_attempt_duration_ms == 200 assert m.final_attempt_duration_ms == 300 assert m.total_duration_ms == 500 assert m.validation_status == "failed" assert m.validation_error_count == 2 assert m.validation_warning_count == 1 assert "schema_fail" in m.validation_errors assert m.confidence == 0.0 assert m.company_count == 0 assert m.input_token_estimate == 500 # 2000 / 4 def test_collect_metrics_empty_attempts(): """Response with no attempts produces safe defaults.""" resp = ExtractionResponse( success=False, result=None, attempts=[], prompt_metadata={}, model="test-model", total_duration_ms=0, ) m = collect_metrics(resp, document_id="doc-3") assert m.attempt_count == 0 assert m.retry_count == 0 assert m.first_attempt_duration_ms == 0 assert m.final_attempt_duration_ms == 0 assert m.validation_status == "unknown" assert m.confidence == 0.0 def test_collect_metrics_no_document_text_length(): """Zero document text length produces zero token estimate.""" resp = _make_success_response() m = collect_metrics(resp, document_text_length=0) assert m.input_token_estimate == 0