| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- """
- NER 服务配置
- """
- import os
- from pydantic_settings import BaseSettings
- from typing import Optional, List
- class Settings(BaseSettings):
- """应用配置"""
-
- # 服务配置
- app_name: str = "NER Service"
- app_version: str = "1.0.0"
- debug: bool = False
- host: str = "0.0.0.0"
- port: int = 8001
-
- # NER 模型配置
- # rule: 基于规则的简单 NER(开发测试用)
- # ollama: 使用本地 Ollama LLM
- # deepseek: 使用阿里云百炼 DeepSeek API(推荐)
- ner_model: str = "deepseek"
- use_gpu: bool = False
- max_text_length: int = 50000
-
- # Ollama 配置(用于 ollama 模式)
- ollama_url: str = "http://localhost:11434"
- ollama_model: str = "hoangquan456/qwen3-nothink:4b" # qwen3 无思考版本,直接输出结果
- ollama_timeout: int = 180 # 秒(CPU 模式需要更长时间)
-
- # UniversalNER 专用配置(当 ollama_model 包含 'universal-ner' 时自动启用)
- # 模型名: zeffmuks/universal-ner
- universal_ner_model: str = "zeffmuks/universal-ner"
-
- # 文本分块配置(用于长文本处理)
- chunk_size: int = 2000 # 每个分块的最大字符数
- chunk_overlap: int = 200 # 分块重叠字符数
-
- # DeepSeek API 配置(阿里云百炼平台)
- deepseek_api_key: str = "sk-14a0d0b9eda54b3bb6f0e55a7b8df084"
- deepseek_base_url: str = "https://dashscope.aliyuncs.com/compatible-mode"
- deepseek_model: str = "deepseek-v3.2-exp"
- deepseek_timeout: int = 600
- deepseek_temperature: float = 0.1 # NER 任务使用低温度
- deepseek_max_tokens: int = 4096
- deepseek_max_retries: int = 3
-
- # 实体类型配置
- entity_types: List[str] = [
- "PERSON", # 人名
- "ORG", # 机构/组织
- "LOC", # 地点
- "DATE", # 日期
- "NUMBER", # 数值
- "DEVICE", # 设备
- "PROJECT", # 项目
- "METHOD", # 方法/标准
- ]
-
- # 日志配置
- log_level: str = "INFO"
-
- class Config:
- env_file = ".env"
- env_file_encoding = "utf-8"
- settings = Settings()
|