hace 1 mes · 339a82b359
--- a/python-services/ner-service/app/services/ollama_service.py
+++ b/python-services/ner-service/app/services/ollama_service.py
@@ -76,25 +76,22 @@ class OllamaService:
 
				         types = entity_types or settings.entity_types
			
 
				         types_desc = ", ".join(types)
			
 
				         
			
 
				-        prompt = f"""/no_think
			
 
				-你是一个专业的命名实体识别(NER)系统。请从以下文本中提取实体，直接输出JSON，不要解释。
			
 
				+        # 示例帮助模型理解格式
			
 
				+        example = '{"entities": [{"name": "成都市", "type": "LOC", "charStart": 10, "charEnd": 13}, {"name": "2024年5月", "type": "DATE", "charStart": 0, "charEnd": 7}]}'
			
 
				+        
			
 
				+        prompt = f"""从文本中提取命名实体，只输出JSON。
			
 
				 
			
 
				-## 实体类型: {types_desc}
			
 
				-- PERSON: 人名
			
 
				-- ORG: 机构/公司
			
 
				-- LOC: 地点
			
 
				-- DATE: 日期
			
 
				-- NUMBER: 数值（带单位）
			
 
				-- DEVICE: 设备仪器
			
 
				-- PROJECT: 项目/工程
			
 
				-- METHOD: 方法/标准
			
 
				+实体类型: {types_desc}
			
 
				 
			
 
				-## 输出格式（严格JSON，不要其他内容）:
			
 
				-{{"entities": [{{"name": "实体名", "type": "类型", "charStart": 0, "charEnd": 0}}]}}
			
 
				+输出格式示例:
			
 
				+{example}
			
 
				 
			
 
				-## 文本:
			
 
				+文本内容:
			
 
				 {text}
			
 
				 
			
 
				+JSON结果:
			
 
				+```json"""
			
 
				+
			
 
				 ## JSON结果:
			
 
				 """
			
 
				         return prompt
			
@@ -141,10 +138,14 @@ class OllamaService:
 
				             # Qwen3 可能有 thinking 模式，需要移除 <think>...</think> 部分
			
 
				             response = re.sub(r'<think>[\s\S]*?</think>', '', response)
			
 
				             
			
 
				+            # 移除 markdown code block 标记
			
 
				+            response = re.sub(r'```json\s*', '', response)
			
 
				+            response = re.sub(r'```\s*$', '', response)
			
 
				+            
			
 
				             # 尝试提取 JSON 部分
			
 
				             json_match = re.search(r'\{[\s\S]*\}', response)
			
 
				             if not json_match:
			
 
				-                logger.warning("LLM 响应中未找到 JSON")
			
 
				+                logger.warning(f"LLM 响应中未找到 JSON, response={response[:300]}...")
			
 
				                 return entities
			
 
				             
			
 
				             json_str = json_match.group()
			
@@ -233,6 +234,9 @@ class OllamaService:
 
				                 logger.warning(f"分块 {i+1} Ollama 返回为空")
			
 
				                 continue
			
 
				             
			
 
				+            # 打印前 500 字符用于调试
			
 
				+            logger.debug(f"分块 {i+1} LLM 响应: {response[:500]}...")
			
 
				+            
			
 
				             # 解析结果
			
 
				             entities = self._parse_llm_response(response, chunk["start_pos"])