diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py
index 1865e86..218e32e 100644
--- a/services/recommendation/thesis_llm.py
+++ b/services/recommendation/thesis_llm.py
@@ -37,7 +37,8 @@ STRICT RULES:
3. Keep the rewrite under 150 words.
4. Preserve all factual claims, risk notes, and evidence counts from the input.
5. Use a neutral, professional tone. Avoid hype or marketing language.
-6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary."""
+6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.
+7. Do NOT show your thinking process. Do NOT include any reasoning steps. Output ONLY the final rewritten text."""
def build_thesis_rewrite_prompt(
@@ -315,8 +316,16 @@ async def _call_ollama_thesis(
}
# Support context_window override via num_ctx (Requirement 10.4)
+ options: dict[str, object] = {}
if config.context_window > 0:
- payload["options"] = {"num_ctx": config.context_window}
+ options["num_ctx"] = config.context_window
+ # Disable thinking/reasoning mode for models that support it (e.g. Qwen3)
+ options["num_predict"] = options.get("num_predict", 512)
+ if options:
+ payload["options"] = options
+
+ # Qwen3 thinking mode control: /no_think suffix or think parameter
+ payload["think"] = False
resp = await client.post(
f"{config.base_url}/api/chat",
@@ -340,17 +349,38 @@ async def _call_ollama_thesis(
def _strip_thinking_block(text: str) -> str:
- """Remove ... reasoning blocks from model output.
+ """Remove thinking/reasoning blocks from model output.
- Some models (e.g. Qwen) emit chain-of-thought in tags before
- the actual response. This strips that prefix to return only the final
- thesis text.
+ Some models (e.g. Qwen) emit chain-of-thought either in XML tags
+ or as plain-text "Thinking Process:" blocks before the actual response.
+ This strips both patterns to return only the final thesis text.
"""
import re
# Remove ... blocks (greedy, handles multiline)
cleaned = re.sub(r".*?\s*", "", text, flags=re.DOTALL)
- # Also handle unclosed tag (model cut off mid-thought)
+ # Handle unclosed tag (model cut off mid-thought)
cleaned = re.sub(r".*", "", cleaned, flags=re.DOTALL)
+ # Remove plain-text "Thinking Process:" blocks followed by the actual thesis
+ # Pattern: everything from "Thinking Process:" up to "" or the final
+ # clean thesis (identified by the last paragraph that doesn't start with numbering/bullets)
+ cleaned = re.sub(
+ r"(?:Thinking Process:|Thought Process:|Chain of Thought:).*?(?=\n[A-Z]{2,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?))",
+ "",
+ cleaned,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+ # Fallback: if "Thinking Process:" still present, take only text after last "" or
+ # after the thinking block ends (heuristic: last substantial paragraph)
+ if "thinking process:" in cleaned.lower():
+ # Find the actual thesis — it's typically the last coherent paragraph
+ # that starts with a ticker symbol pattern
+ match = re.search(
+ r"\n([A-Z]{1,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?)\s.+)",
+ cleaned,
+ flags=re.DOTALL,
+ )
+ if match:
+ cleaned = match.group(1)
return cleaned.strip()