diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py
index fc55917..1865e86 100644
--- a/services/recommendation/thesis_llm.py
+++ b/services/recommendation/thesis_llm.py
@@ -336,7 +336,22 @@ async def _call_ollama_thesis(
len(content),
)
- return content.strip()
+ return _strip_thinking_block(content.strip())
+
+
+def _strip_thinking_block(text: str) -> str:
+ """Remove ... reasoning blocks from model output.
+
+ Some models (e.g. Qwen) emit chain-of-thought in tags before
+ the actual response. This strips that prefix to return only the final
+ thesis text.
+ """
+ import re
+ # Remove ... blocks (greedy, handles multiline)
+ cleaned = re.sub(r".*?\s*", "", text, flags=re.DOTALL)
+ # Also handle unclosed tag (model cut off mid-thought)
+ cleaned = re.sub(r".*", "", cleaned, flags=re.DOTALL)
+ return cleaned.strip()
async def _call_vllm_thesis(
@@ -388,4 +403,4 @@ async def _call_vllm_thesis(
len(content),
)
- return content.strip()
+ return _strip_thinking_block(content.strip())