diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py index fc55917..1865e86 100644 --- a/services/recommendation/thesis_llm.py +++ b/services/recommendation/thesis_llm.py @@ -336,7 +336,22 @@ async def _call_ollama_thesis( len(content), ) - return content.strip() + return _strip_thinking_block(content.strip()) + + +def _strip_thinking_block(text: str) -> str: + """Remove ... reasoning blocks from model output. + + Some models (e.g. Qwen) emit chain-of-thought in tags before + the actual response. This strips that prefix to return only the final + thesis text. + """ + import re + # Remove ... blocks (greedy, handles multiline) + cleaned = re.sub(r".*?\s*", "", text, flags=re.DOTALL) + # Also handle unclosed tag (model cut off mid-thought) + cleaned = re.sub(r".*", "", cleaned, flags=re.DOTALL) + return cleaned.strip() async def _call_vllm_thesis( @@ -388,4 +403,4 @@ async def _call_vllm_thesis( len(content), ) - return content.strip() + return _strip_thinking_block(content.strip())