phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
"""Tests for extraction model performance metrics collection.
|
||||
|
||||
Validates that collect_metrics correctly computes metrics from
|
||||
ExtractionResponse objects for both successful and failed extractions.
|
||||
|
||||
Requirements: 5.2, 5.4, 12.1, 12.2
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from services.extractor.client import ExtractionAttempt, ExtractionResponse
|
||||
from services.extractor.metrics import collect_metrics
|
||||
from services.extractor.schemas import ExtractionResult, ValidationReport
|
||||
|
||||
|
||||
def _make_valid_result() -> ExtractionResult:
|
||||
return ExtractionResult.model_validate({
|
||||
"summary": "Apple beat earnings expectations.",
|
||||
"companies": [
|
||||
{
|
||||
"ticker": "AAPL",
|
||||
"company_name": "Apple Inc.",
|
||||
"relevance": 0.95,
|
||||
"sentiment": "positive",
|
||||
"impact_score": 0.7,
|
||||
"impact_horizon": "1d_30d",
|
||||
"catalyst_type": "earnings",
|
||||
"key_facts": ["Revenue up 12%"],
|
||||
"risks": [],
|
||||
"evidence_spans": ["Apple beat expectations"],
|
||||
}
|
||||
],
|
||||
"macro_themes": ["ai_capex"],
|
||||
"novelty_score": 0.6,
|
||||
"confidence": 0.85,
|
||||
"extraction_warnings": [],
|
||||
})
|
||||
|
||||
|
||||
def _make_success_response() -> ExtractionResponse:
|
||||
result = _make_valid_result()
|
||||
validation = ValidationReport(valid=True, errors=[], warnings=["low_novelty"], parsed=result)
|
||||
attempt = ExtractionAttempt(
|
||||
raw_output=result.model_dump_json(),
|
||||
validation=validation,
|
||||
error=None,
|
||||
duration_ms=500,
|
||||
model="test-model",
|
||||
)
|
||||
return ExtractionResponse(
|
||||
success=True,
|
||||
result=result,
|
||||
attempts=[attempt],
|
||||
prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"},
|
||||
model="test-model",
|
||||
total_duration_ms=500,
|
||||
)
|
||||
|
||||
|
||||
def _make_failed_response_with_retries() -> ExtractionResponse:
|
||||
attempt1 = ExtractionAttempt(
|
||||
raw_output="bad json",
|
||||
validation=None,
|
||||
error="invalid_json",
|
||||
duration_ms=200,
|
||||
model="test-model",
|
||||
)
|
||||
attempt2 = ExtractionAttempt(
|
||||
raw_output="still bad output here",
|
||||
validation=ValidationReport(
|
||||
valid=False,
|
||||
errors=["schema_fail", "missing_companies"],
|
||||
warnings=["truncated"],
|
||||
),
|
||||
error="schema_fail; missing_companies",
|
||||
duration_ms=300,
|
||||
model="test-model",
|
||||
)
|
||||
return ExtractionResponse(
|
||||
success=False,
|
||||
result=None,
|
||||
attempts=[attempt1, attempt2],
|
||||
prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"},
|
||||
model="test-model",
|
||||
total_duration_ms=500,
|
||||
)
|
||||
|
||||
|
||||
def test_collect_metrics_success():
|
||||
"""Successful extraction produces correct metrics."""
|
||||
resp = _make_success_response()
|
||||
m = collect_metrics(
|
||||
resp,
|
||||
document_id="doc-1",
|
||||
ticker="AAPL",
|
||||
document_text_length=4000,
|
||||
)
|
||||
|
||||
assert m.document_id == "doc-1"
|
||||
assert m.ticker == "AAPL"
|
||||
assert m.model_name == "test-model"
|
||||
assert m.prompt_version == "document-intel-v1"
|
||||
assert m.schema_version == "2.0.0"
|
||||
assert m.success is True
|
||||
assert m.attempt_count == 1
|
||||
assert m.total_duration_ms == 500
|
||||
assert m.first_attempt_duration_ms == 500
|
||||
assert m.final_attempt_duration_ms == 500
|
||||
assert m.confidence == 0.85
|
||||
assert m.validation_status == "valid"
|
||||
assert m.validation_error_count == 0
|
||||
assert m.validation_warning_count == 1
|
||||
assert m.retry_count == 0
|
||||
assert m.input_token_estimate == 1000 # 4000 / 4
|
||||
assert m.output_token_estimate > 0
|
||||
assert m.company_count == 1
|
||||
|
||||
|
||||
def test_collect_metrics_failed_with_retries():
|
||||
"""Failed extraction with retries produces correct metrics."""
|
||||
resp = _make_failed_response_with_retries()
|
||||
m = collect_metrics(
|
||||
resp,
|
||||
document_id="doc-2",
|
||||
ticker="MSFT",
|
||||
document_text_length=2000,
|
||||
)
|
||||
|
||||
assert m.success is False
|
||||
assert m.attempt_count == 2
|
||||
assert m.retry_count == 1
|
||||
assert m.first_attempt_duration_ms == 200
|
||||
assert m.final_attempt_duration_ms == 300
|
||||
assert m.total_duration_ms == 500
|
||||
assert m.validation_status == "failed"
|
||||
assert m.validation_error_count == 2
|
||||
assert m.validation_warning_count == 1
|
||||
assert "schema_fail" in m.validation_errors
|
||||
assert m.confidence == 0.0
|
||||
assert m.company_count == 0
|
||||
assert m.input_token_estimate == 500 # 2000 / 4
|
||||
|
||||
|
||||
def test_collect_metrics_empty_attempts():
|
||||
"""Response with no attempts produces safe defaults."""
|
||||
resp = ExtractionResponse(
|
||||
success=False,
|
||||
result=None,
|
||||
attempts=[],
|
||||
prompt_metadata={},
|
||||
model="test-model",
|
||||
total_duration_ms=0,
|
||||
)
|
||||
m = collect_metrics(resp, document_id="doc-3")
|
||||
|
||||
assert m.attempt_count == 0
|
||||
assert m.retry_count == 0
|
||||
assert m.first_attempt_duration_ms == 0
|
||||
assert m.final_attempt_duration_ms == 0
|
||||
assert m.validation_status == "unknown"
|
||||
assert m.confidence == 0.0
|
||||
|
||||
|
||||
def test_collect_metrics_no_document_text_length():
|
||||
"""Zero document text length produces zero token estimate."""
|
||||
resp = _make_success_response()
|
||||
m = collect_metrics(resp, document_text_length=0)
|
||||
|
||||
assert m.input_token_estimate == 0
|
||||
Reference in New Issue
Block a user