diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py
index fc55917..1865e86 100644
--- a/services/recommendation/thesis_llm.py
+++ b/services/recommendation/thesis_llm.py
@@ -336,7 +336,22 @@ async def _call_ollama_thesis(
         len(content),
     )
 
-    return content.strip()
+    return _strip_thinking_block(content.strip())
+
+
+def _strip_thinking_block(text: str) -> str:
+    """Remove <think>...</think> reasoning blocks from model output.
+
+    Some models (e.g. Qwen) emit chain-of-thought in <think> tags before
+    the actual response. This strips that prefix to return only the final
+    thesis text.
+    """
+    import re
+    # Remove <think>...</think> blocks (greedy, handles multiline)
+    cleaned = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
+    # Also handle unclosed <think> tag (model cut off mid-thought)
+    cleaned = re.sub(r"<think>.*", "", cleaned, flags=re.DOTALL)
+    return cleaned.strip()
 
 
 async def _call_vllm_thesis(
@@ -388,4 +403,4 @@ async def _call_vllm_thesis(
         len(content),
     )
 
-    return content.strip()
+    return _strip_thinking_block(content.strip())