fix: handle plain-text thinking blocks and disable think mode

The model outputs 'Thinking Process:' as plain text (not in <think> tags). Updated _strip_thinking_block to handle both XML tags and plain-text reasoning patterns. Also: - Added rule 7 to system prompt: 'Do NOT show your thinking process' - Set think=False in Ollama payload to disable Qwen3 thinking mode - Added fallback regex to extract thesis from after thinking blocks
2026-04-29 15:50:49 +00:00
parent f9ee1532dc
commit 007189c0a5
1 changed files with 37 additions and 7 deletions
@@ -37,7 +37,8 @@ STRICT RULES:
 3. Keep the rewrite under 150 words.
 4. Preserve all factual claims, risk notes, and evidence counts from the input.
 5. Use a neutral, professional tone. Avoid hype or marketing language.
-6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary."""
+6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.
+7. Do NOT show your thinking process. Do NOT include any reasoning steps. Output ONLY the final rewritten text."""


 def build_thesis_rewrite_prompt(
@@ -315,8 +316,16 @@ async def _call_ollama_thesis(
    }

    # Support context_window override via num_ctx (Requirement 10.4)
+    options: dict[str, object] = {}
    if config.context_window > 0:
-        payload["options"] = {"num_ctx": config.context_window}
+        options["num_ctx"] = config.context_window
+    # Disable thinking/reasoning mode for models that support it (e.g. Qwen3)
+    options["num_predict"] = options.get("num_predict", 512)
+    if options:
+        payload["options"] = options
+
+    # Qwen3 thinking mode control: /no_think suffix or think parameter
+    payload["think"] = False

    resp = await client.post(
        f"{config.base_url}/api/chat",
@@ -340,17 +349,38 @@ async def _call_ollama_thesis(


 def _strip_thinking_block(text: str) -> str:
-    """Remove <think>...</think> reasoning blocks from model output.
+    """Remove thinking/reasoning blocks from model output.

-    Some models (e.g. Qwen) emit chain-of-thought in <think> tags before
-    the actual response. This strips that prefix to return only the final
-    thesis text.
+    Some models (e.g. Qwen) emit chain-of-thought either in <think> XML tags
+    or as plain-text "Thinking Process:" blocks before the actual response.
+    This strips both patterns to return only the final thesis text.
    """
    import re
    # Remove <think>...</think> blocks (greedy, handles multiline)
    cleaned = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
-    # Also handle unclosed <think> tag (model cut off mid-thought)
+    # Handle unclosed <think> tag (model cut off mid-thought)
    cleaned = re.sub(r"<think>.*", "", cleaned, flags=re.DOTALL)
+    # Remove plain-text "Thinking Process:" blocks followed by the actual thesis
+    # Pattern: everything from "Thinking Process:" up to "</think>" or the final
+    # clean thesis (identified by the last paragraph that doesn't start with numbering/bullets)
+    cleaned = re.sub(
+        r"(?:Thinking Process:|Thought Process:|Chain of Thought:).*?(?=\n[A-Z]{2,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?))",
+        "",
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # Fallback: if "Thinking Process:" still present, take only text after last "</think>" or
+    # after the thinking block ends (heuristic: last substantial paragraph)
+    if "thinking process:" in cleaned.lower():
+        # Find the actual thesis — it's typically the last coherent paragraph
+        # that starts with a ticker symbol pattern
+        match = re.search(
+            r"\n([A-Z]{1,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?)\s.+)",
+            cleaned,
+            flags=re.DOTALL,
+        )
+        if match:
+            cleaned = match.group(1)
    return cleaned.strip()