1 月之前 · 9a19d3b577
--- a/python-services/ner-service/app/services/ollama_service.py
+++ b/python-services/ner-service/app/services/ollama_service.py
@@ -79,23 +79,31 @@ class OllamaService:
 
				         # 示例帮助模型理解格式
			
 
				         example = '{"entities": [{"name": "成都市", "type": "LOC", "charStart": 10, "charEnd": 13}, {"name": "2024年5月", "type": "DATE", "charStart": 0, "charEnd": 7}]}'
			
 
				         
			
 
				-        prompt = f"""从文本中提取命名实体，只输出JSON。
			
 
				+        # /no_think 指令用于禁用 Qwen3 的思考模式
			
 
				+        prompt = f"""/no_think
			
 
				+你是一个命名实体识别(NER)专家。请从以下文本中提取命名实体。
			
 
				 
			
 
				-实体类型: {types_desc}
			
 
				+【任务要求】
			
 
				+1. 只输出JSON格式，不要输出任何解释或思考过程
			
 
				+2. 实体类型: {types_desc}
			
 
				+3. charStart和charEnd是实体在文本中的字符位置索引（从0开始）
			
 
				 
			
 
				-输出格式示例:
			
 
				+【输出格式】
			
 
				 {example}
			
 
				 
			
 
				-文本内容:
			
 
				+【待处理文本】
			
 
				 {text}
			
 
				 
			
 
				-JSON结果:
			
 
				-```json"""
			
 
				+【JSON输出】"""
			
 
				         return prompt
			
 
				     
			
 
				-    async def _call_ollama(self, prompt: str) -> Optional[str]:
			
 
				+    async def _call_ollama(self, prompt: str, disable_thinking: bool = True) -> Optional[str]:
			
 
				         """
			
 
				         调用 Ollama API
			
 
				+        
			
 
				+        Args:
			
 
				+            prompt: 输入提示词
			
 
				+            disable_thinking: 是否禁用思考模式（适用于 Qwen3 等支持思考的模型）
			
 
				         """
			
 
				         url = f"{self.base_url}/api/generate"
			
 
				         payload = {
			
@@ -108,7 +116,8 @@ JSON结果:
 
				             }
			
 
				         }
			
 
				         
			
 
				-        # Qwen3 保留思考模式以提高推理质量，解析时提取最终结果
			
 
				+        # Qwen3 思考模式：保留思考能力，解析时提取最终结果
			
 
				+        # 如需禁用思考，可设置 payload["think"] = False
			
 
				         
			
 
				         try:
			
 
				             async with httpx.AsyncClient(timeout=self.timeout) as client:
			
@@ -134,12 +143,25 @@ JSON结果:
 
				         entities = []
			
 
				         
			
 
				         try:
			
 
				-            # Qwen3 思考模式：提取 </think> 之后的内容（最终输出）
			
 
				+            # Qwen3 思考模式处理：提取 </think> 之后的内容
			
 
				             think_end = response.find('</think>')
			
 
				             if think_end != -1:
			
 
				                 # 只保留思考结束后的内容
			
 
				                 response = response[think_end + len('</think>'):]
			
 
				                 logger.debug(f"提取思考后内容: {response[:200]}...")
			
 
				+            else:
			
 
				+                # 检查是否存在 <think> 但没有 </think>（思考未完成或被截断）
			
 
				+                think_start = response.find('<think>')
			
 
				+                if think_start != -1:
			
 
				+                    # 尝试从 <think> 之前的内容或整个响应中查找 JSON
			
 
				+                    # 有些情况下 JSON 可能在思考标签之前
			
 
				+                    pre_think = response[:think_start].strip()
			
 
				+                    if pre_think:
			
 
				+                        response = pre_think
			
 
				+                        logger.debug(f"使用思考前内容: {response[:200]}...")
			
 
				+                    else:
			
 
				+                        # 思考内容中可能包含 JSON，尝试直接从响应中提取
			
 
				+                        logger.debug("检测到不完整的思考模式，尝试直接提取JSON")
			
 
				             
			
 
				             # 移除 markdown code block 标记
			
 
				             response = re.sub(r'```json\s*', '', response)
			
@@ -153,10 +175,23 @@ JSON结果:
 
				             except json.JSONDecodeError:
			
 
				                 pass
			
 
				             
			
 
				-            # 方法2：查找包含 entities 的 JSON 对象
			
 
				+            # 方法2：查找包含 entities 的 JSON 对象（使用更宽松的匹配）
			
 
				+            if not data or "entities" not in data:
			
 
				+                # 匹配 {"entities": [...]} 格式，使用贪婪匹配以捕获完整的嵌套结构
			
 
				+                # 先尝试找到所有可能的 JSON 对象
			
 
				+                json_matches = re.findall(r'\{[^{}]*"entities"\s*:\s*\[[^\]]*\][^{}]*\}', response)
			
 
				+                for json_str in json_matches:
			
 
				+                    try:
			
 
				+                        data = json.loads(json_str)
			
 
				+                        if "entities" in data:
			
 
				+                            break
			
 
				+                    except json.JSONDecodeError:
			
 
				+                        continue
			
 
				+            
			
 
				+            # 方法3：尝试更宽松的正则匹配（处理多行和嵌套）
			
 
				             if not data or "entities" not in data:
			
 
				-                # 匹配 {"entities": [...]} 格式
			
 
				-                json_match = re.search(r'\{\s*"entities"\s*:\s*\[[\s\S]*?\]\s*\}', response)
			
 
				+                # 匹配从 {"entities" 开始到最后一个 ]} 的内容
			
 
				+                json_match = re.search(r'\{\s*"entities"\s*:\s*\[[\s\S]*\]\s*\}', response)
			
 
				                 if json_match:
			
 
				                     try:
			
 
				                         data = json.loads(json_match.group())
			
@@ -250,8 +285,8 @@ JSON结果:
 
				                 logger.warning(f"分块 {i+1} Ollama 返回为空")
			
 
				                 continue
			
 
				             
			
 
				-            # 打印前 500 字符用于调试
			
 
				-            logger.debug(f"分块 {i+1} LLM 响应: {response[:500]}...")
			
 
				+            # 打印完整响应用于调试
			
 
				+            logger.debug(f"分块 {i+1} LLM 完整响应:\n{response}\n{'='*50}")
			
 
				             
			
 
				             # 解析结果
			
 
				             entities = self._parse_llm_response(response, chunk["start_pos"])