Ver Fonte

fix: 修复 Pydantic v2 序列化问题确保 position 正确保存

- 将所有模型从 Pydantic v1 Config 类迁移到 v2 ConfigDict
- 添加 serialize_by_alias=True 确保序列化时使用 alias
- 将 .dict() 调用替换为 .model_dump(by_alias=True)
- 增强调试日志输出 position 详情
何文松 há 1 mês atrás
pai
commit
373e96fcca

+ 16 - 18
python-services/ner-service/app/models/request.py

@@ -1,28 +1,32 @@
 """
 请求模型定义
 """
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, ConfigDict
 from typing import Optional, List
 
 
 class PositionInfo(BaseModel):
     """位置信息"""
+    model_config = ConfigDict(
+        populate_by_name=True,
+        # Pydantic v2: 序列化时默认使用 alias
+        serialize_by_alias=True
+    )
+    
     char_start: int = Field(..., alias="charStart", description="字符起始位置")
     char_end: int = Field(..., alias="charEnd", description="字符结束位置")
     line: Optional[int] = Field(None, description="所在行号")
     page: Optional[int] = Field(None, description="所在页码")
     file_id: Optional[str] = Field(None, alias="fileId", description="文件ID")
-    
-    class Config:
-        populate_by_name = True
-        # 序列化时使用 alias (charStart, charEnd)
-        by_alias = True
-        # 确保嵌套对象也使用 alias
-        json_encoders = {}
 
 
 class EntityInfo(BaseModel):
     """实体信息"""
+    model_config = ConfigDict(
+        populate_by_name=True,
+        serialize_by_alias=True
+    )
+    
     name: str = Field(..., description="实体名称")
     type: str = Field(..., description="实体类型")
     value: Optional[str] = Field(None, description="实体值")
@@ -30,29 +34,23 @@ class EntityInfo(BaseModel):
     context: Optional[str] = Field(None, description="上下文片段")
     confidence: Optional[float] = Field(None, description="置信度")
     temp_id: Optional[str] = Field(None, alias="tempId", description="临时ID")
-    
-    class Config:
-        populate_by_name = True
-        by_alias = True
 
 
 class NerRequest(BaseModel):
     """NER 请求"""
+    model_config = ConfigDict(populate_by_name=True)
+    
     document_id: Optional[str] = Field(None, alias="documentId", description="文档ID")
     text: str = Field(..., description="待提取的文本内容")
     entity_types: Optional[List[str]] = Field(None, alias="entityTypes", description="指定提取的实体类型")
     extract_relations: bool = Field(True, alias="extractRelations", description="是否提取关系")
     user_id: Optional[str] = Field(None, alias="userId", description="用户ID")
-    
-    class Config:
-        populate_by_name = True
 
 
 class RelationRequest(BaseModel):
     """关系抽取请求"""
+    model_config = ConfigDict(populate_by_name=True)
+    
     document_id: Optional[str] = Field(None, alias="documentId", description="文档ID")
     text: str = Field(..., description="原始文本内容")
     entities: List[EntityInfo] = Field(..., description="已提取的实体列表")
-    
-    class Config:
-        populate_by_name = True

+ 16 - 14
python-services/ner-service/app/models/response.py

@@ -1,28 +1,33 @@
 """
 响应模型定义
 """
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, ConfigDict
 from typing import Optional, List
 from .request import EntityInfo, PositionInfo
 
 
 class RelationInfo(BaseModel):
     """关系信息"""
+    model_config = ConfigDict(
+        populate_by_name=True,
+        serialize_by_alias=True
+    )
+    
     from_entity: str = Field(..., alias="fromEntity", description="源实体名称")
     from_entity_id: Optional[str] = Field(None, alias="fromEntityId", description="源实体临时ID")
     to_entity: str = Field(..., alias="toEntity", description="目标实体名称")
     to_entity_id: Optional[str] = Field(None, alias="toEntityId", description="目标实体临时ID")
     relation_type: str = Field(..., alias="relationType", description="关系类型")
     confidence: Optional[float] = Field(None, description="置信度")
-    
-    class Config:
-        populate_by_name = True
-        # 序列化时默认使用 alias (camelCase)
-        by_alias = True
 
 
 class NerResponse(BaseModel):
     """NER 响应"""
+    model_config = ConfigDict(
+        populate_by_name=True,
+        serialize_by_alias=True
+    )
+    
     document_id: Optional[str] = Field(None, alias="documentId", description="文档ID")
     entities: List[EntityInfo] = Field(default_factory=list, description="提取的实体列表")
     relations: List[RelationInfo] = Field(default_factory=list, description="提取的关系列表")
@@ -31,11 +36,6 @@ class NerResponse(BaseModel):
     relation_count: int = Field(0, alias="relationCount", description="关系数量")
     success: bool = Field(True, description="是否成功")
     error_message: Optional[str] = Field(None, alias="errorMessage", description="错误信息")
-    
-    class Config:
-        populate_by_name = True
-        # 序列化时默认使用 alias (camelCase)
-        by_alias = True
 
     @classmethod
     def success_response(cls, document_id: str, entities: List[EntityInfo], 
@@ -61,15 +61,17 @@ class NerResponse(BaseModel):
 
 class RelationResponse(BaseModel):
     """关系抽取响应"""
+    model_config = ConfigDict(
+        populate_by_name=True,
+        serialize_by_alias=True
+    )
+    
     document_id: Optional[str] = Field(None, alias="documentId", description="文档ID")
     relations: List[RelationInfo] = Field(default_factory=list, description="提取的关系列表")
     processing_time: Optional[int] = Field(None, alias="processingTime", description="处理耗时(毫秒)")
     relation_count: int = Field(0, alias="relationCount", description="关系数量")
     success: bool = Field(True, description="是否成功")
     error_message: Optional[str] = Field(None, alias="errorMessage", description="错误信息")
-    
-    class Config:
-        populate_by_name = True
 
     @classmethod
     def success_response(cls, document_id: str, relations: List[RelationInfo], processing_time: int):

+ 7 - 3
python-services/ner-service/app/routers/ner.py

@@ -270,14 +270,18 @@ async def _process_ner_task(
             _tasks[task_id]["status"] = "completed"
             _tasks[task_id]["progress"] = 100
             _tasks[task_id]["message"] = f"处理完成: {len(all_entities)} 个实体, {len(all_relations)} 个关系"
-            result_dict = response.dict(by_alias=True)
+            # Pydantic v2: 使用 model_dump(),by_alias 已在 model_config 中设置
+            result_dict = response.model_dump(by_alias=True)
             _tasks[task_id]["result"] = result_dict
             _tasks[task_id]["updated_at"] = time.time()
             
-            # 调试:输出第一个实体的序列化结果
+            # 调试:输出第一个实体的序列化结果(包括 position)
             if result_dict.get("entities") and len(result_dict["entities"]) > 0:
                 first_entity = result_dict["entities"][0]
-                logger.debug(f"实体序列化示例: {first_entity}")
+                logger.info(f"实体序列化示例: name={first_entity.get('name')}, position={first_entity.get('position')}")
+                # 确保 position 被正确序列化
+                if first_entity.get('position'):
+                    logger.info(f"Position 详情: {first_entity['position']}")
         
         logger.info(f"异步 NER 任务完成: task_id={task_id}, document_id={document_id}, "
                    f"entities={len(all_entities)}, relations={len(all_relations)}, time={processing_time}ms")

+ 1 - 1
python-services/ner-service/app/services/deepseek_service.py

@@ -320,7 +320,7 @@ class DeepSeekService:
         
         # 发送实体数据事件(供调用方获取实体列表)
         yield await sse_event("entities_data", {
-            "entities": [entity.dict() for entity in all_entities],
+            "entities": [entity.model_dump(by_alias=True) for entity in all_entities],
             "total_entities": len(all_entities)
         })
     

+ 1 - 1
python-services/ner-service/app/services/ner_service.py

@@ -83,7 +83,7 @@ class NerService:
             entities = await self.extract_entities(text, entity_types)
             yield f"event: chunk_complete\ndata: {json.dumps({'total_entities': len(entities), 'progress_percent': 100}, ensure_ascii=False)}\n\n"
             # 发送实体数据事件
-            yield f"event: entities_data\ndata: {json.dumps({'entities': [e.dict() for e in entities], 'total_entities': len(entities)}, ensure_ascii=False)}\n\n"
+            yield f"event: entities_data\ndata: {json.dumps({'entities': [e.model_dump(by_alias=True) for e in entities], 'total_entities': len(entities)}, ensure_ascii=False)}\n\n"
     
     async def _extract_by_rules(
         self,