diff --git a/requirements.txt b/requirements.txt
index 5108fdf..6ede7b7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,9 @@ minio>=7.2.0
 # HTTP client
 httpx>=0.27.0
 
+# JSON repair for LLM output
+json-repair>=0.59.0
+
 # Web scraping
 beautifulsoup4>=4.12.0
 requests>=2.31.0
diff --git a/services/extractor/client.py b/services/extractor/client.py
index 21a57c0..9f8c2bc 100644
--- a/services/extractor/client.py
+++ b/services/extractor/client.py
@@ -1,8 +1,9 @@
-"""Ollama client wrapper using structured output format.
+"""Ollama client wrapper for document intelligence extraction.
 
 Sends documents to a local Ollama instance via the /api/chat endpoint
-with the ``format`` parameter set to the extraction JSON schema, ensuring
-the model returns schema-compliant JSON.
+with think=false for speed. Uses json-repair to fix common JSON syntax
+issues in model output since the Ollama format constraint is broken
+with think=false on qwen3.5 models (Ollama bug #14645).
 
 Includes retry logic for invalid or incomplete model responses with
 exponential backoff, error classification, and full audit preservation.
@@ -12,11 +13,14 @@ Requirements: 5.1, 5.2, 5.4
 from __future__ import annotations
 
 import asyncio
+import json
 import logging
+import re
 import time
 from dataclasses import dataclass, field
 
 import httpx
+from json_repair import repair_json
 
 from services.extractor.prompts import (
     build_extraction_prompt,
@@ -45,6 +49,32 @@ def _is_retryable(error: str | None) -> bool:
     return error not in _NON_RETRYABLE_ERRORS
 
 
+_FENCE_RE = re.compile(r"^```(?:json)?\s*\n?(.*?)\n?\s*```\s*$", re.DOTALL)
+
+
+def _strip_markdown_fences(text: str) -> str:
+    """Remove ```json ... ``` wrappers if present."""
+    m = _FENCE_RE.match(text.strip())
+    return m.group(1) if m else text
+
+
+def _repair_json(text: str) -> str:
+    """Try json.loads first; if it fails, repair with json-repair."""
+    try:
+        json.loads(text)
+        return text  # already valid
+    except (json.JSONDecodeError, ValueError):
+        pass
+
+    try:
+        repaired = repair_json(text, return_objects=False)
+        logger.info("JSON repaired successfully (%d -> %d chars)", len(text), len(repaired))
+        return repaired
+    except Exception:
+        logger.warning("JSON repair failed, returning original text")
+        return text
+
+
 @dataclass
 class ExtractionAttempt:
     """Record of a single extraction attempt for audit."""
@@ -209,12 +239,13 @@ class OllamaClient:
         json_schema: dict[str, object],
         document_text: str = "",
     ) -> ExtractionAttempt:
-        """Make a streaming call to Ollama with early-termination guardrails.
+        """Call Ollama with think=false for speed, then repair any malformed JSON.
 
-        Aborts the stream if:
-        - Total generated tokens exceed ``max_tokens``
-        - No new chunk arrives within ``stall_timeout`` seconds
-        - Repetition loop detected in the last ``loop_window`` tokens
+        Uses think=false to avoid the 2-4 minute thinking overhead.
+        Does NOT use the format parameter (Ollama bug #14645 silently
+        ignores format when think=false on qwen3.5 models).
+        Instead, relies on the prompt to produce JSON and repairs
+        common syntax issues with json-repair.
         """
         attempt = ExtractionAttempt(model=self._config.model)
         start = time.monotonic()
@@ -225,11 +256,8 @@ class OllamaClient:
                 {"role": "system", "content": prompts["system"]},
                 {"role": "user", "content": prompts["user"]},
             ],
-            "format": json_schema,
             "stream": False,
-            "options": {
-                "num_predict": 16384,
-            },
+            "think": False,
         }
 
         url = f"{self._config.base_url}/api/chat"
@@ -271,6 +299,12 @@ class OllamaClient:
             attempt.error = "empty_model_response"
             return attempt
 
+        # Strip markdown fences if present (model sometimes wraps in ```json ... ```)
+        content = _strip_markdown_fences(content)
+
+        # Try json.loads first; if it fails, attempt repair
+        content = _repair_json(content)
+
         # Validate against extraction schema
         attempt.validation = validate_extraction(content, document_text=document_text)
         if not attempt.validation.valid: