| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- """
- NER 路由
- """
- from fastapi import APIRouter, HTTPException
- from loguru import logger
- import time
- from ..models import NerRequest, NerResponse, EntityInfo
- from ..services.ner_service import ner_service
- router = APIRouter()
- @router.post("/extract", response_model=NerResponse)
- async def extract_entities(request: NerRequest):
- """
- 从文本中提取命名实体
- """
- start_time = time.time()
-
- try:
- logger.info(f"开始提取实体: document_id={request.document_id}, text_length={len(request.text)}")
-
- # 验证文本长度
- if len(request.text) > 50000:
- raise HTTPException(status_code=400, detail="文本长度超过限制(最大50000字符)")
-
- # 调用 NER 服务
- entities = await ner_service.extract_entities(
- text=request.text,
- entity_types=request.entity_types
- )
-
- # 如果需要提取关系
- relations = []
- if request.extract_relations and len(entities) > 1:
- from ..services.relation_service import relation_service
- relations = await relation_service.extract_relations(
- text=request.text,
- entities=entities
- )
-
- processing_time = int((time.time() - start_time) * 1000)
-
- logger.info(f"实体提取完成: document_id={request.document_id}, "
- f"entity_count={len(entities)}, relation_count={len(relations)}, "
- f"processing_time={processing_time}ms")
-
- return NerResponse.success_response(
- document_id=request.document_id,
- entities=entities,
- relations=relations,
- processing_time=processing_time
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"实体提取失败: document_id={request.document_id}, error={str(e)}")
- return NerResponse.error_response(
- document_id=request.document_id,
- error_message=str(e)
- )
|