ner.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. """
  2. NER 路由
  3. """
  4. from fastapi import APIRouter, HTTPException
  5. from loguru import logger
  6. import time
  7. from ..models import NerRequest, NerResponse, EntityInfo
  8. from ..services.ner_service import ner_service
  9. router = APIRouter()
  10. @router.post("/extract", response_model=NerResponse)
  11. async def extract_entities(request: NerRequest):
  12. """
  13. 从文本中提取命名实体
  14. """
  15. start_time = time.time()
  16. try:
  17. logger.info(f"开始提取实体: document_id={request.document_id}, text_length={len(request.text)}")
  18. # 验证文本长度
  19. if len(request.text) > 50000:
  20. raise HTTPException(status_code=400, detail="文本长度超过限制(最大50000字符)")
  21. # 调用 NER 服务
  22. entities = await ner_service.extract_entities(
  23. text=request.text,
  24. entity_types=request.entity_types
  25. )
  26. # 如果需要提取关系
  27. relations = []
  28. if request.extract_relations and len(entities) > 1:
  29. from ..services.relation_service import relation_service
  30. relations = await relation_service.extract_relations(
  31. text=request.text,
  32. entities=entities
  33. )
  34. processing_time = int((time.time() - start_time) * 1000)
  35. logger.info(f"实体提取完成: document_id={request.document_id}, "
  36. f"entity_count={len(entities)}, relation_count={len(relations)}, "
  37. f"processing_time={processing_time}ms")
  38. return NerResponse.success_response(
  39. document_id=request.document_id,
  40. entities=entities,
  41. relations=relations,
  42. processing_time=processing_time
  43. )
  44. except HTTPException:
  45. raise
  46. except Exception as e:
  47. logger.error(f"实体提取失败: document_id={request.document_id}, error={str(e)}")
  48. return NerResponse.error_response(
  49. document_id=request.document_id,
  50. error_message=str(e)
  51. )