| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- """
- NER 路由
- """
- from fastapi import APIRouter, HTTPException
- from loguru import logger
- import time
- from ..models import NerRequest, NerResponse, EntityInfo
- from ..services.ner_service import ner_service
- router = APIRouter()
- @router.post("/extract", response_model=NerResponse)
- async def extract_entities(request: NerRequest):
- """
- 从文本中提取命名实体
- """
- start_time = time.time()
-
- try:
- logger.info(f"开始提取实体: document_id={request.document_id}, text_length={len(request.text)}")
-
- # 验证文本长度
- if len(request.text) > 50000:
- raise HTTPException(status_code=400, detail="文本长度超过限制(最大50000字符)")
-
- # 调用 NER 服务
- entities = await ner_service.extract_entities(
- text=request.text,
- entity_types=request.entity_types
- )
-
- # 如果需要提取关系
- relations = []
- if request.extract_relations and len(entities) > 1:
- from ..services.relation_service import relation_service
- relations = await relation_service.extract_relations(
- text=request.text,
- entities=entities
- )
-
- processing_time = int((time.time() - start_time) * 1000)
-
- logger.info(f"实体提取完成: document_id={request.document_id}, "
- f"entity_count={len(entities)}, relation_count={len(relations)}, "
- f"processing_time={processing_time}ms")
-
- # 输出完整的实体列表
- logger.info(f"========== 实体列表 ({len(entities)} 个) ==========")
- for i, entity in enumerate(entities, 1):
- logger.info(f" [{i}] {entity.type}: {entity.name}")
-
- # 输出完整的关系列表
- if relations:
- logger.info(f"========== 关系列表 ({len(relations)} 个) ==========")
- for i, rel in enumerate(relations, 1):
- logger.info(f" [{i}] {rel.source_entity} --[{rel.relation_type}]--> {rel.target_entity}")
-
- return NerResponse.success_response(
- document_id=request.document_id,
- entities=entities,
- relations=relations,
- processing_time=processing_time
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"实体提取失败: document_id={request.document_id}, error={str(e)}")
- return NerResponse.error_response(
- document_id=request.document_id,
- error_message=str(e)
- )
|