|
@@ -1512,7 +1512,7 @@ main() {
|
|
|
check_service
|
|
check_service
|
|
|
register_test_user
|
|
register_test_user
|
|
|
|
|
|
|
|
- print_header "步骤 1/2: 文件上传"
|
|
|
|
|
|
|
+ print_header "步骤 1/3: 文件上传"
|
|
|
print_info "上传后将自动触发: RAG向量化 + 结构化解析 + NER提取"
|
|
print_info "上传后将自动触发: RAG向量化 + 结构化解析 + NER提取"
|
|
|
test_upload
|
|
test_upload
|
|
|
|
|
|
|
@@ -1525,30 +1525,67 @@ main() {
|
|
|
exit 1
|
|
exit 1
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
- print_header "步骤 2/2: 轮询任务进度直到完成"
|
|
|
|
|
- print_info "正在等待所有处理阶段完成..."
|
|
|
|
|
- print_info "阶段: 文本解析 → RAG向量化 → 结构化解析 → NER实体提取 → 图构建"
|
|
|
|
|
|
|
+ print_header "步骤 2/3: 等待基础解析完成"
|
|
|
|
|
+ if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
|
|
|
|
|
+ print_error "基础解析未完成,终止测试"
|
|
|
|
|
+ exit 1
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ print_header "步骤 3/3: 等待后台自动处理"
|
|
|
|
|
+ print_info "阶段: RAG向量化 → 结构化解析 → NER实体提取 → 图构建"
|
|
|
print_info "NER 提取可能需要 5-10 分钟,请耐心等待..."
|
|
print_info "NER 提取可能需要 5-10 分钟,请耐心等待..."
|
|
|
echo ""
|
|
echo ""
|
|
|
|
|
|
|
|
- # 轮询任务进度直到完成(最多等待15分钟)
|
|
|
|
|
- if poll_task_progress "$DOCUMENT_ID" 300 3; then
|
|
|
|
|
- echo ""
|
|
|
|
|
- print_header "查看最终结果"
|
|
|
|
|
|
|
+ # 尝试使用任务中心轮询(如果数据库支持)
|
|
|
|
|
+ # 否则使用简单的轮询等待 GraphNode 数量变化
|
|
|
|
|
+ print_info "轮询等待 NER 完成..."
|
|
|
|
|
+
|
|
|
|
|
+ local MAX_WAIT=180 # 最多等待 6 分钟(180 * 2秒)
|
|
|
|
|
+ local LAST_COUNT=0
|
|
|
|
|
+ local STABLE_COUNT=0
|
|
|
|
|
+
|
|
|
|
|
+ for ((i=1; i<=MAX_WAIT; i++)); do
|
|
|
|
|
+ # 获取 GraphNode 数量
|
|
|
|
|
+ NODE_RESPONSE=$(curl -s "${GRAPH_URL}/documents/${DOCUMENT_ID}/nodes" --connect-timeout 5)
|
|
|
|
|
|
|
|
- # 查看结构化解析结果
|
|
|
|
|
- print_step "结构化解析结果"
|
|
|
|
|
- test_get_images "$DOCUMENT_ID"
|
|
|
|
|
- test_get_tables "$DOCUMENT_ID"
|
|
|
|
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
|
|
+ CURRENT_COUNT=$(echo "$NODE_RESPONSE" | jq -r '.data | length' 2>/dev/null || echo "0")
|
|
|
|
|
+ else
|
|
|
|
|
+ CURRENT_COUNT="?"
|
|
|
|
|
+ fi
|
|
|
|
|
|
|
|
- # 查看 GraphNode
|
|
|
|
|
- print_step "NER 提取结果 (GraphNode)"
|
|
|
|
|
- test_get_graph_nodes "$DOCUMENT_ID"
|
|
|
|
|
- else
|
|
|
|
|
- print_error "任务未在预期时间内完成"
|
|
|
|
|
- print_info "可以稍后使用以下命令查看进度:"
|
|
|
|
|
- print_info " $0 --task --doc-id $DOCUMENT_ID"
|
|
|
|
|
- fi
|
|
|
|
|
|
|
+ # 显示进度
|
|
|
|
|
+ echo -ne "\r\033[K[${i}/${MAX_WAIT}] GraphNode 数量: ${CURRENT_COUNT} "
|
|
|
|
|
+
|
|
|
|
|
+ # 检查是否稳定(连续 5 次数量不变且大于 0)
|
|
|
|
|
+ if [ "$CURRENT_COUNT" != "?" ] && [ "$CURRENT_COUNT" -gt 0 ]; then
|
|
|
|
|
+ if [ "$CURRENT_COUNT" = "$LAST_COUNT" ]; then
|
|
|
|
|
+ STABLE_COUNT=$((STABLE_COUNT + 1))
|
|
|
|
|
+ if [ $STABLE_COUNT -ge 5 ]; then
|
|
|
|
|
+ echo ""
|
|
|
|
|
+ print_success "NER 处理完成! 共提取 $CURRENT_COUNT 个实体"
|
|
|
|
|
+ break
|
|
|
|
|
+ fi
|
|
|
|
|
+ else
|
|
|
|
|
+ STABLE_COUNT=0
|
|
|
|
|
+ fi
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ LAST_COUNT=$CURRENT_COUNT
|
|
|
|
|
+ sleep 2
|
|
|
|
|
+ done
|
|
|
|
|
+
|
|
|
|
|
+ echo ""
|
|
|
|
|
+ print_header "查看最终结果"
|
|
|
|
|
+
|
|
|
|
|
+ # 查看结构化解析结果
|
|
|
|
|
+ print_step "结构化解析结果"
|
|
|
|
|
+ test_get_images "$DOCUMENT_ID"
|
|
|
|
|
+ test_get_tables "$DOCUMENT_ID"
|
|
|
|
|
+
|
|
|
|
|
+ # 查看 GraphNode
|
|
|
|
|
+ print_step "NER 提取结果 (GraphNode)"
|
|
|
|
|
+ test_get_graph_nodes "$DOCUMENT_ID"
|
|
|
;;
|
|
;;
|
|
|
esac
|
|
esac
|
|
|
|
|
|