From f9ee1532dcd97911eb3fec79e6744691f615a694 Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Wed, 29 Apr 2026 15:25:04 +0000 Subject: [PATCH] fix: strip reasoning blocks from thesis LLM output Qwen3.5 in thinking mode emits ... chain-of-thought before the actual response. The thesis rewriter was returning the raw output including the entire reasoning block. Now strips thinking tags from both Ollama and vLLM response paths. --- services/recommendation/thesis_llm.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py index fc55917..1865e86 100644 --- a/services/recommendation/thesis_llm.py +++ b/services/recommendation/thesis_llm.py @@ -336,7 +336,22 @@ async def _call_ollama_thesis( len(content), ) - return content.strip() + return _strip_thinking_block(content.strip()) + + +def _strip_thinking_block(text: str) -> str: + """Remove ... reasoning blocks from model output. + + Some models (e.g. Qwen) emit chain-of-thought in tags before + the actual response. This strips that prefix to return only the final + thesis text. + """ + import re + # Remove ... blocks (greedy, handles multiline) + cleaned = re.sub(r".*?\s*", "", text, flags=re.DOTALL) + # Also handle unclosed tag (model cut off mid-thought) + cleaned = re.sub(r".*", "", cleaned, flags=re.DOTALL) + return cleaned.strip() async def _call_vllm_thesis( @@ -388,4 +403,4 @@ async def _call_vllm_thesis( len(content), ) - return content.strip() + return _strip_thinking_block(content.strip())