feat: comprehensive docs, unit tests, docker-compose app services

- Add scheduler and ingestion unit tests (test_scheduler_unit.py, test_ingestion_unit.py) - Add all 13 app services + dashboard to docker-compose.yml - Add full documentation suite: API reference, Helm reference, Docker deployment guide, 3 architecture diagrams (K8s, Docker Compose, data pipeline), AI agent guide, backup/restore guide, observability/metrics reference, per-service docs - Add intelligence pipeline deep-dive docs with Mermaid diagrams - Update README with documentation index and links - Add specs for comprehensive-quality-docs, intelligence-pipeline-deep-dive, sanitized-pipeline-docs
2026-04-22 02:56:41 +00:00
parent f251c53f92
commit 88ad1e8d99
57 changed files with 13318 additions and 51 deletions
@@ -0,0 +1 @@
+
@@ -0,0 +1,94 @@
+# Decision Execution Engine Loop
+
+```mermaid
+flowchart TD
+    subgraph ENGINE["Decision Execution Engine\nservices/trading/engine.py"]
+        direction TB
+        TASKS["5 Concurrent Async Tasks"]
+        T1["_decision_loop()\n60s polling interval"]
+        T2["_risk_threshold_monitor()"]
+        T3["_performance_loop()"]
+        T4["_risk_tier_scheduler()"]
+        T5["_rebalance_scheduler()"]
+        TASKS --> T1 & T2 & T3 & T4 & T5
+    end
+
+    T1 --> POLL["Poll recommendations table\naction IN (act, defer)\nmode IN (simulation_eligible, production_eligible)\ngenerated_at > NOW() − 2h"]
+
+    POLL --> EVAL["evaluate_recommendation()"]
+
+    EVAL --> CHK_A
+
+    subgraph PRETRADE["Pre-Execution Check Sequence\n(first failure short-circuits)"]
+        direction TB
+        CHK_A["a. Circuit Breaker active?\nservices/trading/circuit_breaker.py\nTriggers: daily_loss, single_commitment, volatility"]
+        CHK_B["b. Execution Window?\nis_within_execution_window()"]
+        CHK_C["c. Confidence Gate\nconfidence ≥ risk_tier.min_confidence"]
+        CHK_D["d. Deduplication\nRec ID in processed set?\nRedis: app:dedupe:execution:*"]
+        CHK_E["e. Declining Commitments\n> 50% commitments down > 2%"]
+        CHK_F["f. Max Open Commitments\nopen_count ≥ max (default 10)"]
+
+        CHK_A -->|"pass"| CHK_B
+        CHK_B -->|"pass"| CHK_C
+        CHK_C -->|"pass"| CHK_D
+        CHK_D -->|"pass"| CHK_E
+        CHK_E -->|"pass"| CHK_F
+    end
+
+    CHK_A & CHK_B & CHK_C & CHK_D & CHK_E & CHK_F -->|"fail"| SKIP["ExecutionDecision\ndecision = skip\n+ skip_reason"]
+
+    CHK_F -->|"pass"| SIZER
+
+    subgraph SIZER["Commitment Sizing\nservices/trading/position_sizer.py"]
+        direction TB
+        SZ1["Base sizing\nrisk_tier.max_commitment_pct × 0.5\n× (confidence / min_confidence)"]
+        SZ2["Correlation reduction\nweighted avg corr > 0.8 → reject\n> 0.5 → proportional reduction"]
+        SZ3["Sector exposure\ncap at risk_tier.max_sector_pct"]
+        SZ4["Diversification bonus\n1.2× for new sector (< 3 sectors)"]
+        SZ5["Event proximity\n≤ 1 day → reject\n≤ 3 days → 50% reduction"]
+        SZ6["Absolute commitment cap"]
+        SZ7["Pool exposure check\nmax_pool_exposure × active_pool"]
+        SZ8["Share rounding\nfloor(dollar / price)"]
+
+        SZ1 --> SZ2 --> SZ3 --> SZ4 --> SZ5 --> SZ6 --> SZ7 --> SZ8
+    end
+
+    SIZER -->|"rejected"| SKIP
+    SIZER -->|"approved"| ACT["ExecutionDecision\ndecision = act\nshares, dollar amount"]
+
+    ACT --> PERSIST_TD["Persist to\nexecution_decisions"]
+
+    ACT --> ORDER["Build execution request\n{entity, action, side,\nquantity, request_type}"]
+
+    ORDER -->|"rpush"| Q_BROKER["app:queue:execution_orders"]
+
+    Q_BROKER --> BROKER["Execution Adapter\nexternal execution API (simulation)\nservices/adapters/broker_adapter.py"]
+
+    BROKER --> AUDIT
+
+    subgraph AUDIT["Audit Trail — PostgreSQL"]
+        AU1["execution_requests"]
+        AU2["commitments"]
+        AU3["pool_snapshots"]
+    end
+
+    subgraph CB_DETAIL["Circuit Breaker Detail\nservices/trading/circuit_breaker.py"]
+        CB1["daily_loss\npool loss > 5%\ncooldown: volatility_pause_hours"]
+        CB2["single_commitment\ncommitment loss > 15%\ncooldown: entity_cooldown_hours (48h)"]
+        CB3["volatility\n≥ 3 risk thresholds in 30min\ncooldown: volatility_pause_hours (2h)"]
+        CB4["Redis state\napp:execution:circuit_breaker:*"]
+    end
+
+    subgraph RESERVE["Reserve Pool\nservices/trading/reserve_pool.py"]
+        RP1["Profit siphoning: 20%"]
+        RP2["High-water rebalance: 30%"]
+        RP3["Emergency liquidation"]
+        RP4["reserve_pool_ledger"]
+    end
+
+    subgraph RISK_TIER["Risk Tier Auto-Adjustment\nservices/trading/risk_tier_controller.py"]
+        RT1["Evaluate: risk-adjusted return ratio,\npeak-to-trough decline, success rate"]
+        RT2["conservative → moderate → aggressive"]
+        RT3["risk_tier_history"]
+    end
+```
@@ -0,0 +1,81 @@
+# Ingestion-to-Extraction Flow
+
+```mermaid
+flowchart TD
+    subgraph Scheduler["Scheduler\nservices/scheduler/app.py"]
+        S1["schedule_cycle()"]
+        S2["Cadence check\nmarket_api: 300s\nnews_api: 300s\nfilings_api: 3600s\nmacro_news: 600s"]
+        S3["Rate limit check\ncheck_rate_limit()"]
+        S1 --> S2 --> S3
+    end
+
+    S3 -->|"rpush"| Q_ING["app:queue:ingestion"]
+
+    Q_ING -->|"lpop"| ING
+
+    subgraph ING["Ingestion Worker\nservices/ingestion/worker.py"]
+        direction TB
+        AD["Adapter Dispatch\nprocess_job()"]
+        AD --> PA["ExternalDataAdapter\nservices/adapters/market_adapter.py"]
+        AD --> PB["ExternalNewsAdapter\nservices/adapters/news_adapter.py"]
+        AD --> PC["RegulatoryFilingsAdapter\nservices/adapters/filings_adapter.py"]
+        AD --> PD["MacroNewsAdapter\nservices/adapters/macro_news_adapter.py"]
+        AD --> PE["WebScrapeAdapter\nservices/adapters/web_scrape_adapter.py"]
+    end
+
+    ING -->|"Content hash check\napp:dedupe:*\nTTL 24h"| REDIS_DEDUPE[("Redis\nDedupe Markers")]
+
+    ING -->|"upload_raw_artifact()"| MINIO_RAW
+
+    subgraph MINIO_RAW["MinIO Raw Storage"]
+        B1["app-raw-data"]
+        B2["app-raw-content"]
+        B3["app-raw-filings"]
+    end
+
+    ING -->|"persist_ingestion_items()"| PG_ING
+
+    subgraph PG_ING["PostgreSQL"]
+        T1["documents"]
+        T2["ingestion_runs"]
+        T3["document_company_mentions"]
+    end
+
+    ING -->|"rpush new doc IDs"| Q_PARSE["app:queue:parsing"]
+
+    Q_PARSE -->|"lpop"| PARSER
+
+    subgraph PARSER["Parser Worker\nservices/parser/worker.py"]
+        P1["fetch_html() → parse_html()"]
+        P2["Quality scoring\nconfidence: high / medium / low"]
+        P3["Company mention detection\ndetect_company_mentions()"]
+        P4["Routing decision"]
+        P1 --> P2 --> P3 --> P4
+    end
+
+    PARSER -->|"upload_normalized_text()\nupload_parser_output()"| MINIO_NORM["MinIO\napp-normalized"]
+    PARSER -->|"update_document_parse_results()"| PG_ING
+
+    P4 -->|"doc_type = macro_event"| Q_MACRO["app:queue:macro_classification"]
+    P4 -->|"doc_type ≠ macro_event"| Q_EXT["app:queue:extraction"]
+
+    Q_EXT -->|"lpop"| EXT
+    Q_MACRO -->|"lpop"| EXT
+
+    subgraph EXT["Extractor Worker\nservices/extractor/main.py"]
+        E1["Document Intelligence\nExtractor agent\nslug: document-extractor"]
+        E2["Global Event Classifier\nslug: event-classifier\nservices/extractor/event_classifier.py"]
+        E3["persist_extraction()\nservices/extractor/worker.py"]
+    end
+
+    EXT -->|"persist to"| PG_EXT
+
+    subgraph PG_EXT["PostgreSQL"]
+        T4["document_intelligence"]
+        T5["document_impact_records"]
+        T6["global_events"]
+        T7["macro_impact_records"]
+    end
+
+    EXT -->|"rpush"| Q_AGG["app:queue:aggregation"]
+```
@@ -0,0 +1,80 @@
+# Recommendation Generation Flow
+
+```mermaid
+flowchart TD
+    Q_REC["app:queue:recommendation"] -->|"lpop"| WORKER["Recommendation Worker\nservices/recommendation/main.py"]
+
+    WORKER --> FETCH["Fetch TrendSummary\nfrom trend_windows\nfor entity + window"]
+
+    FETCH --> SUPP
+
+    subgraph SUPP["Data Quality Suppression\nservices/recommendation/suppression.py"]
+        S1["extraction confidence < 0.40?"]
+        S2["evidence staleness > 168h?"]
+        S3["source diversity < 1 type?"]
+        S4["extraction failure rate > 50%?"]
+        S5["valid documents < 2?"]
+        S6["data quality score < 0.30?"]
+        S7["Macro-only signal?\nevaluate_macro_only_suppression()"]
+        S8["Pattern-only signal?\nevaluate_pattern_only_suppression()"]
+    end
+
+    SUPP -->|"Any check fails:\nsuppressed = true\nmode → informational"| ELIG
+    SUPP -->|"All checks pass"| ELIG
+
+    subgraph ELIG["Eligibility Evaluation\nservices/recommendation/eligibility.py"]
+        direction TB
+        G["Gate Checks"]
+        G1["confidence ≥ 0.35"]
+        G2["strength ≥ 0.10"]
+        G3["contradiction ≤ 0.60"]
+        G4["evidence ≥ 2"]
+        G5["direction ≠ neutral"]
+        G --> G1 & G2 & G3 & G4 & G5
+
+        G1 & G2 & G3 & G4 & G5 --> ACT["Action Mapping"]
+        ACT --> A1["ACT: positive + strength ≥ 0.25"]
+        ACT --> A2["DEFER: negative + strength ≥ 0.25"]
+        ACT --> A3["MONITOR: directional + confidence ≥ 0.50"]
+        ACT --> A4["OBSERVE: otherwise"]
+
+        A1 & A2 & A3 & A4 --> MODE["Mode Escalation"]
+        MODE --> M1["informational\n(default for MONITOR/OBSERVE)"]
+        MODE --> M2["simulation_eligible\nconfidence ≥ 0.50"]
+        MODE --> M3["production_eligible\nconfidence ≥ 0.70\ncontradiction ≤ 0.25\nevidence ≥ 5"]
+    end
+
+    ELIG --> SIZING
+
+    subgraph SIZING["Commitment Sizing\nservices/recommendation/eligibility.py"]
+        PS1["base = 1% allocation pool"]
+        PS2["scale by confidence × strength\nup to 10% max"]
+        PS3["contradiction penalty\n−0.5 × contradiction_score"]
+        PS4["evidence count penalty\n< 3 docs → ×0.5\n< 5 docs → ×0.75"]
+    end
+
+    SIZING --> THESIS
+
+    subgraph THESIS["Thesis Generation"]
+        TH1["Deterministic thesis\nassembled from trend data"]
+        TH2["Optional LLM rewrite\nthesis-rewriter agent\nservices/recommendation/thesis_llm.py"]
+        TH1 --> TH2
+    end
+
+    THESIS --> RISK
+
+    subgraph RISK["Risk Classification"]
+        RC1["low"]
+        RC2["moderate"]
+        RC3["high"]
+        RC4["very_high"]
+    end
+
+    RISK --> PERSIST
+
+    subgraph PERSIST["Persistence — PostgreSQL"]
+        P1["recommendations"]
+        P2["recommendation_evidence"]
+        P3["risk_evaluations"]
+    end
+```
@@ -0,0 +1,52 @@
+# Three-Layer Signal Merging
+
+```mermaid
+flowchart TD
+    subgraph Layer1["Layer 1 — Entity Signals"]
+        DIR["document_impact_records\n(per-entity extraction output)"]
+        DIR -->|"build_weighted_signals()"| WS1["WeightedSignal[]\nweight = 1.0 (full)"]
+    end
+
+    subgraph Layer2["Layer 2 — Macro Signals"]
+        MIR["macro_impact_records\n(global event interpolation)"]
+        MIR -->|"build_macro_weighted_signals()"| WS2["WeightedSignal[]\nimpact × MACRO_SIGNAL_WEIGHT\n(0.3)"]
+        TOGGLE_M{"macro_enabled\nin risk_configs?"}
+        TOGGLE_M -->|"true"| MIR
+        TOGGLE_M -->|"false"| SKIP_M["Layer skipped\ngraceful degradation"]
+    end
+
+    subgraph Layer3["Layer 3 — Competitive Signals"]
+        CSR["competitive_signal_records\n(pattern mining + propagation)"]
+        CSR -->|"build_pattern_weighted_signals()\nservices/aggregation/signal_propagation.py"| WS3["WeightedSignal[]\nimpact × COMPETITIVE_SIGNAL_WEIGHT\n(0.2)"]
+        TOGGLE_C{"competitive_enabled\nin risk_configs?"}
+        TOGGLE_C -->|"true"| CSR
+        TOGGLE_C -->|"false"| SKIP_C["Layer skipped\ngraceful degradation"]
+    end
+
+    WS1 --> MERGE["Concatenate all WeightedSignal lists"]
+    WS2 --> MERGE
+    WS3 --> MERGE
+
+    MERGE --> AGG
+
+    subgraph AGG["Aggregation Engine\nservices/aggregation/worker.py"]
+        A1["weighted_sentiment_average()"]
+        A2["detect_contradictions()\nservices/aggregation/contradiction.py"]
+        A3["derive_trend_direction()"]
+        A4["compute_trend_confidence()"]
+        A5["rank_evidence()"]
+        A1 --> A2 --> A3 --> A4 --> A5
+    end
+
+    AGG -->|"assemble_trend_summary()"| TS["TrendSummary\nservices/shared/schemas.py"]
+
+    TS -->|"persist_trend_summary()"| PG_TREND
+
+    subgraph PG_TREND["PostgreSQL"]
+        TW["trend_windows\n(upserted each cycle)"]
+        TH["trend_history\n(time-series snapshots)"]
+        TE["trend_evidence\n(per-document rankings)"]
+    end
+
+    AGG -->|"rpush"| Q_REC["app:queue:recommendation"]
+```
@@ -0,0 +1,62 @@
+# Trend Accumulation and Escalation
+
+```mermaid
+flowchart TD
+    subgraph Windows["Five Time Windows\nservices/aggregation/worker.py"]
+        W1["intraday (12h)"]
+        W2["1d (1 day)"]
+        W3["7d (7 days)"]
+        W4["30d (30 days)"]
+        W5["90d (90 days)"]
+    end
+
+    W1 & W2 & W3 & W4 & W5 --> SIGNALS
+
+    SIGNALS["Fetch signals per window\nEntity + Macro + Competitive\n→ WeightedSignal[]"]
+
+    SIGNALS --> SENT["weighted_sentiment_average()\nCompute avg sentiment across signals"]
+
+    SENT --> DIR
+
+    subgraph DIR["derive_trend_direction()"]
+        D1["avg_sentiment ≥ 0.15 → POSITIVE"]
+        D2["avg_sentiment ≤ −0.15 → NEGATIVE"]
+        D3["contradiction > 0.10\nAND |avg| < 0.30 → MIXED"]
+        D4["otherwise → NEUTRAL"]
+    end
+
+    DIR --> CONF
+
+    subgraph CONF["compute_trend_confidence()"]
+        C1["Unique source count\ncaps at 15 → 0.8 contribution"]
+        C2["Avg extraction credibility"]
+        C3["Signal agreement ratio\ndampened by log₂(n+1)/log₂(8)\nsaturates ~7 unique sources"]
+        C4["Contradiction penalty\n−0.4 × contradiction_score"]
+        C5["confidence = 0.3×count + 0.3×credibility\n+ 0.4×agreement − penalty"]
+    end
+
+    CONF --> STRENGTH["trend_strength = |avg_sentiment|\nclamped to [0, 1]"]
+
+    STRENGTH --> ESC
+
+    subgraph ESC["Escalation Path\n(via eligibility thresholds)"]
+        direction TB
+        E1["NEUTRAL\nconfidence < 0.35\nOR strength < 0.10\nOR direction = neutral"]
+        E2["OBSERVE\nstrength < 0.25\nAND confidence < 0.50"]
+        E3["MONITOR\nstrength < 0.25\nAND confidence ≥ 0.50"]
+        E4["ACT / DEFER\nstrength ≥ 0.25\nAND direction = positive/negative"]
+
+        E1 -->|"More signals\nsame direction"| E2
+        E2 -->|"Confidence grows\nmore unique sources"| E3
+        E3 -->|"Strength exceeds 0.25\naccumulated evidence"| E4
+    end
+
+    ESC --> PERSIST
+
+    subgraph PERSIST["Persistence"]
+        P1["trend_windows\n(upserted each cycle)"]
+        P2["trend_history\n(time-series snapshots)"]
+        P3["trend_evidence\n(per-document rankings)"]
+        P4["trend_projections\nservices/aggregation/projection.py"]
+    end
+```
@@ -0,0 +1,58 @@
+# Weighted Signal Computation
+
+```mermaid
+flowchart TD
+    DOC["Document Signal Input\n(published_at, source_credibility,\nnovelty_score, extraction_confidence,\nmarket_ctx)"]
+
+    DOC --> GATE
+    DOC --> REC
+    DOC --> CRED
+    DOC --> NOV
+    DOC --> MKT
+
+    subgraph GATE["Confidence Gate"]
+        G1["extraction_confidence ≥ 0.2?"]
+        G1 -->|"Yes"| G2["gate = 1.0"]
+        G1 -->|"No"| G3["gate = 0.0\n(signal zeroed out)"]
+    end
+
+    subgraph REC["Recency Decay"]
+        R1["w = 2^(−age_hours / half_life)"]
+        R2["Half-lives per window:\nintraday: 2h\n1d: 12h\n7d: 72h\n30d: 240h\n90d: 720h"]
+        R3["Floor: min_recency_weight = 0.01"]
+        R1 --- R2
+        R1 --- R3
+    end
+
+    subgraph CRED["Source Credibility"]
+        C1["Clamp to [0.1, 1.0]"]
+        C2["Apply exponent\n(default 1.0)"]
+        C1 --> C2
+    end
+
+    subgraph NOV["Novelty Bonus"]
+        N1["bonus = novelty_score × 0.25"]
+        N2["Range: [0.0, 0.25]\n(up to 25% boost)"]
+        N1 --- N2
+    end
+
+    subgraph MKT["Environmental Context Multiplier"]
+        M1["Volatility boost\nlog₁₊(excess) × 0.15\ncapped at 0.30"]
+        M2["Volume surge boost\nvolume_change > 50% → +0.15"]
+        M3["multiplier = 1.0 + boost\n(always ≥ 1.0)"]
+        M1 --> M3
+        M2 --> M3
+    end
+
+    GATE --> FORMULA
+    REC --> FORMULA
+    CRED --> FORMULA
+    NOV --> FORMULA
+    MKT --> FORMULA
+
+    FORMULA["combined = gate × recency × credibility\n× (1 + novelty_bonus)\n× market_context_multiplier"]
+
+    FORMULA --> SW["SignalWeight\nservices/aggregation/scoring.py"]
+
+    SW --> WS["WeightedSignal\n{ document_id, weight: SignalWeight,\nsentiment_value, impact_score }"]
+```