|
|
@@ -195,8 +195,16 @@ class DeepSeekService:
|
|
|
# OpenAI 格式响应
|
|
|
choices = result.get("choices", [])
|
|
|
if choices:
|
|
|
- message = choices[0].get("message", {})
|
|
|
- return message.get("content", "")
|
|
|
+ choice = choices[0]
|
|
|
+ message = choice.get("message", {})
|
|
|
+ content = message.get("content", "")
|
|
|
+
|
|
|
+ # 检查是否因为 max_tokens 被截断
|
|
|
+ finish_reason = choice.get("finish_reason", "")
|
|
|
+ if finish_reason == "length":
|
|
|
+ logger.warning(f"API 响应被截断 (finish_reason=length), 考虑增加 max_tokens 或减小分块大小")
|
|
|
+
|
|
|
+ return content
|
|
|
return None
|
|
|
|
|
|
except httpx.TimeoutException:
|
|
|
@@ -221,8 +229,9 @@ class DeepSeekService:
|
|
|
entities = []
|
|
|
|
|
|
try:
|
|
|
- # 移除 markdown code block 标记
|
|
|
- response = re.sub(r'```json\s*', '', response)
|
|
|
+ # 移除 markdown code block 标记(支持多行模式)
|
|
|
+ response = re.sub(r'```json\s*\n?', '', response, flags=re.IGNORECASE)
|
|
|
+ response = re.sub(r'\n?```\s*$', '', response)
|
|
|
response = re.sub(r'```\s*', '', response)
|
|
|
response = response.strip()
|
|
|
|
|
|
@@ -230,20 +239,47 @@ class DeepSeekService:
|
|
|
data = None
|
|
|
try:
|
|
|
data = json.loads(response)
|
|
|
- except json.JSONDecodeError:
|
|
|
- pass
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ logger.debug(f"直接解析 JSON 失败: {e}")
|
|
|
|
|
|
- # 方法2:查找 JSON 对象
|
|
|
+ # 方法2:查找 JSON 对象(使用更宽松的正则)
|
|
|
if not data or "entities" not in data:
|
|
|
- json_match = re.search(r'\{\s*"entities"\s*:\s*\[[\s\S]*\]\s*\}', response)
|
|
|
+ # 尝试匹配从 { 开始到最后一个 } 的内容
|
|
|
+ json_match = re.search(r'\{[^{}]*"entities"\s*:\s*\[[\s\S]*?\]\s*\}', response)
|
|
|
if json_match:
|
|
|
try:
|
|
|
data = json.loads(json_match.group())
|
|
|
- except json.JSONDecodeError:
|
|
|
- pass
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ logger.debug(f"正则匹配 JSON 解析失败: {e}")
|
|
|
|
|
|
+ # 方法3:尝试提取 entities 数组
|
|
|
if not data or "entities" not in data:
|
|
|
- logger.warning(f"未找到有效的 entities JSON, response={response[:300]}...")
|
|
|
+ array_match = re.search(r'"entities"\s*:\s*(\[[\s\S]*\])', response)
|
|
|
+ if array_match:
|
|
|
+ try:
|
|
|
+ entity_list = json.loads(array_match.group(1))
|
|
|
+ data = {"entities": entity_list}
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ logger.debug(f"提取 entities 数组失败: {e}")
|
|
|
+
|
|
|
+ # 方法4:处理被截断的 JSON,尝试逐个解析完整的实体对象
|
|
|
+ if not data or "entities" not in data:
|
|
|
+ logger.debug("尝试从截断的 JSON 中提取完整实体...")
|
|
|
+ entity_pattern = r'\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"type"\s*:\s*"([^"]+)"\s*,\s*"charStart"\s*:\s*(\d+)\s*,\s*"charEnd"\s*:\s*(\d+)\s*\}'
|
|
|
+ matches = re.findall(entity_pattern, response)
|
|
|
+ if matches:
|
|
|
+ data = {"entities": []}
|
|
|
+ for match in matches:
|
|
|
+ data["entities"].append({
|
|
|
+ "name": match[0],
|
|
|
+ "type": match[1],
|
|
|
+ "charStart": int(match[2]),
|
|
|
+ "charEnd": int(match[3])
|
|
|
+ })
|
|
|
+ logger.info(f"从截断 JSON 中恢复了 {len(matches)} 个实体")
|
|
|
+
|
|
|
+ if not data or "entities" not in data:
|
|
|
+ logger.warning(f"未找到有效的 entities JSON, response_length={len(response)}, response_preview={response[:500]}...")
|
|
|
return entities
|
|
|
|
|
|
entity_list = data.get("entities", [])
|