1 mês atrás · 6d9a376407
--- a/python-services/ner-service/app/services/ollama_service.py
+++ b/python-services/ner-service/app/services/ollama_service.py
@@ -108,9 +108,7 @@ JSON结果:
 
				             }
			
 
				         }
			
 
				         
			
 
				-        # Qwen3 特殊处理：使用 /no_think 标签禁用思考模式
			
 
				-        if "qwen3" in self.model.lower():
			
 
				-            payload["prompt"] = "/no_think\n" + prompt
			
 
				+        # Qwen3 保留思考模式以提高推理质量，解析时提取最终结果
			
 
				         
			
 
				         try:
			
 
				             async with httpx.AsyncClient(timeout=self.timeout) as client:
			
@@ -136,35 +134,36 @@ JSON结果:
 
				         entities = []
			
 
				         
			
 
				         try:
			
 
				-            # Qwen3 可能有 thinking 模式，需要移除 <think>...</think> 部分
			
 
				-            response = re.sub(r'<think>[\s\S]*?</think>', '', response)
			
 
				+            # Qwen3 思考模式：提取 </think> 之后的内容（最终输出）
			
 
				+            think_end = response.find('</think>')
			
 
				+            if think_end != -1:
			
 
				+                # 只保留思考结束后的内容
			
 
				+                response = response[think_end + len('</think>'):]
			
 
				+                logger.debug(f"提取思考后内容: {response[:200]}...")
			
 
				             
			
 
				             # 移除 markdown code block 标记
			
 
				             response = re.sub(r'```json\s*', '', response)
			
 
				             response = re.sub(r'```\s*', '', response)
			
 
				+            response = response.strip()
			
 
				             
			
 
				-            # 查找所有 JSON 对象，取最后一个（通常是实际结果）
			
 
				-            json_matches = re.findall(r'\{[^{}]*"entities"[^{}]*\[[\s\S]*?\]\s*\}', response)
			
 
				-            if not json_matches:
			
 
				-                # 回退：尝试匹配任意 JSON
			
 
				-                json_matches = re.findall(r'\{[\s\S]*?\}', response)
			
 
				-            
			
 
				-            if not json_matches:
			
 
				-                logger.warning(f"LLM 响应中未找到 JSON, response={response[:300]}...")
			
 
				-                return entities
			
 
				-            
			
 
				-            # 尝试解析每个匹配，使用第一个有效的
			
 
				+            # 方法1：直接尝试解析整个响应（如果是纯 JSON）
			
 
				             data = None
			
 
				-            for json_str in json_matches:
			
 
				-                try:
			
 
				-                    parsed = json.loads(json_str)
			
 
				-                    if "entities" in parsed:
			
 
				-                        data = parsed
			
 
				-                        break
			
 
				-                except json.JSONDecodeError:
			
 
				-                    continue
			
 
				+            try:
			
 
				+                data = json.loads(response)
			
 
				+            except json.JSONDecodeError:
			
 
				+                pass
			
 
				+            
			
 
				+            # 方法2：查找包含 entities 的 JSON 对象
			
 
				+            if not data or "entities" not in data:
			
 
				+                # 匹配 {"entities": [...]} 格式
			
 
				+                json_match = re.search(r'\{\s*"entities"\s*:\s*\[[\s\S]*?\]\s*\}', response)
			
 
				+                if json_match:
			
 
				+                    try:
			
 
				+                        data = json.loads(json_match.group())
			
 
				+                    except json.JSONDecodeError:
			
 
				+                        pass
			
 
				             
			
 
				-            if not data:
			
 
				+            if not data or "entities" not in data:
			
 
				                 logger.warning(f"未找到有效的 entities JSON, response={response[:300]}...")
			
 
				                 return entities