From 007189c0a55be3a8322c95571be9b12306495519 Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Wed, 29 Apr 2026 15:50:49 +0000 Subject: [PATCH] fix: handle plain-text thinking blocks and disable think mode The model outputs 'Thinking Process:' as plain text (not in tags). Updated _strip_thinking_block to handle both XML tags and plain-text reasoning patterns. Also: - Added rule 7 to system prompt: 'Do NOT show your thinking process' - Set think=False in Ollama payload to disable Qwen3 thinking mode - Added fallback regex to extract thesis from after thinking blocks --- services/recommendation/thesis_llm.py | 44 ++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py index 1865e86..218e32e 100644 --- a/services/recommendation/thesis_llm.py +++ b/services/recommendation/thesis_llm.py @@ -37,7 +37,8 @@ STRICT RULES: 3. Keep the rewrite under 150 words. 4. Preserve all factual claims, risk notes, and evidence counts from the input. 5. Use a neutral, professional tone. Avoid hype or marketing language. -6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.""" +6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary. +7. Do NOT show your thinking process. Do NOT include any reasoning steps. Output ONLY the final rewritten text.""" def build_thesis_rewrite_prompt( @@ -315,8 +316,16 @@ async def _call_ollama_thesis( } # Support context_window override via num_ctx (Requirement 10.4) + options: dict[str, object] = {} if config.context_window > 0: - payload["options"] = {"num_ctx": config.context_window} + options["num_ctx"] = config.context_window + # Disable thinking/reasoning mode for models that support it (e.g. Qwen3) + options["num_predict"] = options.get("num_predict", 512) + if options: + payload["options"] = options + + # Qwen3 thinking mode control: /no_think suffix or think parameter + payload["think"] = False resp = await client.post( f"{config.base_url}/api/chat", @@ -340,17 +349,38 @@ async def _call_ollama_thesis( def _strip_thinking_block(text: str) -> str: - """Remove ... reasoning blocks from model output. + """Remove thinking/reasoning blocks from model output. - Some models (e.g. Qwen) emit chain-of-thought in tags before - the actual response. This strips that prefix to return only the final - thesis text. + Some models (e.g. Qwen) emit chain-of-thought either in XML tags + or as plain-text "Thinking Process:" blocks before the actual response. + This strips both patterns to return only the final thesis text. """ import re # Remove ... blocks (greedy, handles multiline) cleaned = re.sub(r".*?\s*", "", text, flags=re.DOTALL) - # Also handle unclosed tag (model cut off mid-thought) + # Handle unclosed tag (model cut off mid-thought) cleaned = re.sub(r".*", "", cleaned, flags=re.DOTALL) + # Remove plain-text "Thinking Process:" blocks followed by the actual thesis + # Pattern: everything from "Thinking Process:" up to "" or the final + # clean thesis (identified by the last paragraph that doesn't start with numbering/bullets) + cleaned = re.sub( + r"(?:Thinking Process:|Thought Process:|Chain of Thought:).*?(?=\n[A-Z]{2,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?))", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Fallback: if "Thinking Process:" still present, take only text after last "" or + # after the thinking block ends (heuristic: last substantial paragraph) + if "thinking process:" in cleaned.lower(): + # Find the actual thesis — it's typically the last coherent paragraph + # that starts with a ticker symbol pattern + match = re.search( + r"\n([A-Z]{1,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?)\s.+)", + cleaned, + flags=re.DOTALL, + ) + if match: + cleaned = match.group(1) return cleaned.strip()