From 007189c0a55be3a8322c95571be9b12306495519 Mon Sep 17 00:00:00 2001
From: Celes Renata <celes@frameshift.net>
Date: Wed, 29 Apr 2026 15:50:49 +0000
Subject: [PATCH] fix: handle plain-text thinking blocks and disable think mode

The model outputs 'Thinking Process:' as plain text (not in <think> tags).
Updated _strip_thinking_block to handle both XML tags and plain-text
reasoning patterns. Also:
- Added rule 7 to system prompt: 'Do NOT show your thinking process'
- Set think=False in Ollama payload to disable Qwen3 thinking mode
- Added fallback regex to extract thesis from after thinking blocks
---
 services/recommendation/thesis_llm.py | 44 ++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 7 deletions(-)
diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py
index 1865e86..218e32e 100644
--- a/services/recommendation/thesis_llm.py
+++ b/services/recommendation/thesis_llm.py
@@ -37,7 +37,8 @@ STRICT RULES:
 3. Keep the rewrite under 150 words.
 4. Preserve all factual claims, risk notes, and evidence counts from the input.
 5. Use a neutral, professional tone. Avoid hype or marketing language.
-6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary."""
+6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.
+7. Do NOT show your thinking process. Do NOT include any reasoning steps. Output ONLY the final rewritten text."""
 
 
 def build_thesis_rewrite_prompt(
@@ -315,8 +316,16 @@ async def _call_ollama_thesis(
     }
 
     # Support context_window override via num_ctx (Requirement 10.4)
+    options: dict[str, object] = {}
     if config.context_window > 0:
-        payload["options"] = {"num_ctx": config.context_window}
+        options["num_ctx"] = config.context_window
+    # Disable thinking/reasoning mode for models that support it (e.g. Qwen3)
+    options["num_predict"] = options.get("num_predict", 512)
+    if options:
+        payload["options"] = options
+
+    # Qwen3 thinking mode control: /no_think suffix or think parameter
+    payload["think"] = False
 
     resp = await client.post(
         f"{config.base_url}/api/chat",
@@ -340,17 +349,38 @@ async def _call_ollama_thesis(
 
 
 def _strip_thinking_block(text: str) -> str:
-    """Remove <think>...</think> reasoning blocks from model output.
+    """Remove thinking/reasoning blocks from model output.
 
-    Some models (e.g. Qwen) emit chain-of-thought in <think> tags before
-    the actual response. This strips that prefix to return only the final
-    thesis text.
+    Some models (e.g. Qwen) emit chain-of-thought either in <think> XML tags
+    or as plain-text "Thinking Process:" blocks before the actual response.
+    This strips both patterns to return only the final thesis text.
     """
     import re
     # Remove <think>...</think> blocks (greedy, handles multiline)
     cleaned = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
-    # Also handle unclosed <think> tag (model cut off mid-thought)
+    # Handle unclosed <think> tag (model cut off mid-thought)
     cleaned = re.sub(r"<think>.*", "", cleaned, flags=re.DOTALL)
+    # Remove plain-text "Thinking Process:" blocks followed by the actual thesis
+    # Pattern: everything from "Thinking Process:" up to "</think>" or the final
+    # clean thesis (identified by the last paragraph that doesn't start with numbering/bullets)
+    cleaned = re.sub(
+        r"(?:Thinking Process:|Thought Process:|Chain of Thought:).*?(?=\n[A-Z]{2,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?))",
+        "",
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # Fallback: if "Thinking Process:" still present, take only text after last "</think>" or
+    # after the thinking block ends (heuristic: last substantial paragraph)
+    if "thinking process:" in cleaned.lower():
+        # Find the actual thesis — it's typically the last coherent paragraph
+        # that starts with a ticker symbol pattern
+        match = re.search(
+            r"\n([A-Z]{1,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?)\s.+)",
+            cleaned,
+            flags=re.DOTALL,
+        )
+        if match:
+            cleaned = match.group(1)
     return cleaned.strip()