|
|
@@ -1,8 +1,9 @@
|
|
|
#!/bin/bash
|
|
|
|
|
|
# ============================================
|
|
|
-# 文件上传接口测试脚本
|
|
|
+# 文件上传端到端测试脚本
|
|
|
# ============================================
|
|
|
+# 测试流程: 上传 -> 解析等待 -> 向量提取 -> NER提取
|
|
|
# 使用方法: ./test_upload_api.sh [host] [port]
|
|
|
# 示例: ./test_upload_api.sh localhost 5232
|
|
|
# ============================================
|
|
|
@@ -14,6 +15,9 @@ BASE_URL="http://${HOST}:${PORT}"
|
|
|
UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
|
|
|
STATUS_URL="${BASE_URL}/parse/status"
|
|
|
REGISTER_URL="${BASE_URL}/auth/register"
|
|
|
+TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
|
|
|
+RAG_INDEX_URL="${BASE_URL}/api/rag/index"
|
|
|
+NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
|
|
|
|
|
|
# 测试文件路径(相对于脚本所在目录)
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
@@ -25,12 +29,15 @@ TEST_USERNAME="testuser_${TIMESTAMP}"
|
|
|
TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
|
|
|
TEST_PASSWORD="Test123456!"
|
|
|
USER_ID=""
|
|
|
+DOCUMENT_ID=""
|
|
|
+DOCUMENT_TEXT=""
|
|
|
|
|
|
# 颜色定义
|
|
|
RED='\033[0;31m'
|
|
|
GREEN='\033[0;32m'
|
|
|
YELLOW='\033[1;33m'
|
|
|
BLUE='\033[0;34m'
|
|
|
+CYAN='\033[0;36m'
|
|
|
NC='\033[0m' # No Color
|
|
|
|
|
|
# 输出函数
|
|
|
@@ -40,6 +47,10 @@ print_header() {
|
|
|
echo -e "${BLUE}============================================${NC}"
|
|
|
}
|
|
|
|
|
|
+print_step() {
|
|
|
+ echo -e "\n${CYAN}>>> $1${NC}"
|
|
|
+}
|
|
|
+
|
|
|
print_success() {
|
|
|
echo -e "${GREEN}✓ $1${NC}"
|
|
|
}
|
|
|
@@ -158,7 +169,7 @@ register_test_user() {
|
|
|
|
|
|
# 测试文件上传
|
|
|
test_upload() {
|
|
|
- print_header "测试文件上传接口"
|
|
|
+ print_step "文件上传"
|
|
|
|
|
|
print_info "上传URL: $UPLOAD_URL"
|
|
|
print_info "用户ID: $USER_ID"
|
|
|
@@ -190,34 +201,31 @@ test_upload() {
|
|
|
if [ "$HTTP_CODE" = "200" ]; then
|
|
|
print_success "文件上传成功!"
|
|
|
|
|
|
- # 提取documentId用于后续状态查询
|
|
|
+ # 提取documentId用于后续操作
|
|
|
if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
|
|
|
if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
|
|
|
print_info "文档ID: $DOCUMENT_ID"
|
|
|
echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
|
|
|
-
|
|
|
- # 查询解析状态
|
|
|
- test_parse_status "$DOCUMENT_ID"
|
|
|
+ else
|
|
|
+ print_error "无法从响应中获取文档ID"
|
|
|
+ return 1
|
|
|
fi
|
|
|
fi
|
|
|
+ return 0
|
|
|
else
|
|
|
print_error "文件上传失败 (HTTP $HTTP_CODE)"
|
|
|
+ return 1
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
-# 测试解析状态查询
|
|
|
+# 测试解析状态查询(单次)
|
|
|
test_parse_status() {
|
|
|
local DOC_ID=$1
|
|
|
|
|
|
- print_header "查询解析状态"
|
|
|
-
|
|
|
print_info "文档ID: $DOC_ID"
|
|
|
print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
|
|
|
|
|
|
- # 等待一会儿让解析任务开始
|
|
|
- sleep 2
|
|
|
-
|
|
|
RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
-X GET "${STATUS_URL}/${DOC_ID}" \
|
|
|
--connect-timeout 10)
|
|
|
@@ -244,25 +252,24 @@ test_parse_status() {
|
|
|
# 轮询解析状态直到完成
|
|
|
poll_parse_status() {
|
|
|
local DOC_ID=$1
|
|
|
- local MAX_ATTEMPTS=${2:-30}
|
|
|
- local INTERVAL=${3:-5}
|
|
|
+ local MAX_ATTEMPTS=${2:-60}
|
|
|
+ local INTERVAL=${3:-3}
|
|
|
|
|
|
- print_header "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
|
|
|
+ print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
|
|
|
|
|
|
for ((i=1; i<=MAX_ATTEMPTS; i++)); do
|
|
|
- print_info "第 $i 次查询..."
|
|
|
-
|
|
|
RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
|
|
|
|
|
|
if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
STATUS=$(echo "$RESPONSE" | jq -r '.data.parseStatus // .parseStatus // empty' 2>/dev/null)
|
|
|
- echo "当前状态: $STATUS"
|
|
|
+ echo -ne "\r第 $i 次查询... 状态: $STATUS "
|
|
|
|
|
|
if [ "$STATUS" = "2" ] || [ "$STATUS" = "COMPLETED" ]; then
|
|
|
+ echo ""
|
|
|
print_success "解析完成!"
|
|
|
- echo "$RESPONSE" | jq .
|
|
|
return 0
|
|
|
elif [ "$STATUS" = "3" ] || [ "$STATUS" = "FAILED" ]; then
|
|
|
+ echo ""
|
|
|
print_error "解析失败!"
|
|
|
echo "$RESPONSE" | jq .
|
|
|
return 1
|
|
|
@@ -274,30 +281,196 @@ poll_parse_status() {
|
|
|
sleep $INTERVAL
|
|
|
done
|
|
|
|
|
|
- print_error "轮询超时"
|
|
|
+ echo ""
|
|
|
+ print_error "轮询超时,解析未完成"
|
|
|
return 1
|
|
|
}
|
|
|
|
|
|
+# 获取解析后的文本内容
|
|
|
+get_document_text() {
|
|
|
+ local DOC_ID=$1
|
|
|
+
|
|
|
+ print_step "获取文档解析文本"
|
|
|
+
|
|
|
+ print_info "文档ID: $DOC_ID"
|
|
|
+ print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
|
|
|
+ --connect-timeout 10)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ # 获取文件路径
|
|
|
+ FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
|
|
|
+ if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
|
|
|
+ print_success "获取文本存储记录成功!"
|
|
|
+ print_info "文件路径: $FILE_PATH"
|
|
|
+
|
|
|
+ # 读取文件内容
|
|
|
+ if [ -f "$FILE_PATH" ]; then
|
|
|
+ DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
|
|
|
+ TEXT_LENGTH=${#DOCUMENT_TEXT}
|
|
|
+ print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
|
|
|
+
|
|
|
+ # 显示前200个字符
|
|
|
+ echo -e "${YELLOW}文本预览:${NC}"
|
|
|
+ echo "${DOCUMENT_TEXT:0:200}..."
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "文件不存在: $FILE_PATH"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_error "响应中无文件路径"
|
|
|
+ echo "$BODY" | jq . 2>/dev/null
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
|
|
|
+ echo "$BODY"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 向量提取(RAG 索引)
|
|
|
+test_vector_extraction() {
|
|
|
+ local DOC_ID=$1
|
|
|
+ local TEXT=$2
|
|
|
+
|
|
|
+ print_step "向量提取 (RAG 索引)"
|
|
|
+
|
|
|
+ print_info "文档ID: $DOC_ID"
|
|
|
+ print_info "文本长度: ${#TEXT} 字符"
|
|
|
+ print_info "请求URL: $RAG_INDEX_URL"
|
|
|
+
|
|
|
+ # 构建请求JSON(需要转义文本中的特殊字符)
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ REQUEST_BODY=$(jq -n \
|
|
|
+ --arg docId "$DOC_ID" \
|
|
|
+ --arg text "$TEXT" \
|
|
|
+ '{documentId: $docId, text: $text}')
|
|
|
+ else
|
|
|
+ # 简单转义
|
|
|
+ ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
|
|
|
+ REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
|
|
|
+ fi
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X POST "$RAG_INDEX_URL" \
|
|
|
+ -H "Content-Type: application/json" \
|
|
|
+ -d "$REQUEST_BODY" \
|
|
|
+ --connect-timeout 30 \
|
|
|
+ --max-time 300)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+ echo -e "${YELLOW}响应内容:${NC}"
|
|
|
+
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
|
|
|
+ if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
|
|
|
+ print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
|
|
|
+ else
|
|
|
+ print_success "向量提取成功!"
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_success "向量提取成功!"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "向量提取失败 (HTTP $HTTP_CODE)"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# NER 提取
|
|
|
+test_ner_extraction() {
|
|
|
+ local DOC_ID=$1
|
|
|
+
|
|
|
+ print_step "NER 提取 (命名实体识别)"
|
|
|
+
|
|
|
+ print_info "文档ID: $DOC_ID"
|
|
|
+ print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
|
|
|
+ -H "Content-Type: application/json" \
|
|
|
+ --connect-timeout 30 \
|
|
|
+ --max-time 300)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+ echo -e "${YELLOW}响应内容:${NC}"
|
|
|
+
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
|
|
|
+ RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
|
|
|
+ if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
|
|
|
+ print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
|
|
|
+ else
|
|
|
+ print_success "NER 提取成功!"
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_success "NER 提取成功!"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "NER 提取失败 (HTTP $HTTP_CODE)"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
# 显示使用帮助
|
|
|
show_help() {
|
|
|
echo "使用方法: $0 [选项] [host] [port]"
|
|
|
echo ""
|
|
|
+ echo "端到端测试流程: 上传文件 -> 等待解析 -> 向量提取 -> NER提取"
|
|
|
+ echo ""
|
|
|
echo "选项:"
|
|
|
- echo " -h, --help 显示帮助信息"
|
|
|
- echo " -p, --poll 上传后轮询解析状态直到完成"
|
|
|
- echo " -s, --status 仅查询上次上传的文档状态"
|
|
|
+ echo " -h, --help 显示帮助信息"
|
|
|
+ echo " -e, --e2e 执行完整端到端测试 (默认)"
|
|
|
+ echo " -u, --upload-only 仅执行上传测试"
|
|
|
+ echo " -s, --status 仅查询上次上传的文档状态"
|
|
|
+ echo " -v, --vector 仅执行向量提取(使用上次的文档)"
|
|
|
+ echo " -n, --ner 仅执行NER提取(使用上次的文档)"
|
|
|
echo ""
|
|
|
echo "示例:"
|
|
|
- echo " $0 # 使用默认配置 (localhost:5232)"
|
|
|
+ echo " $0 # 使用默认配置执行完整端到端测试"
|
|
|
echo " $0 192.168.1.100 5232 # 指定服务器地址"
|
|
|
- echo " $0 -p # 上传并轮询状态"
|
|
|
+ echo " $0 -u # 仅上传文件"
|
|
|
echo " $0 -s # 查询上次上传的状态"
|
|
|
+ echo " $0 -v # 对上次文档执行向量提取"
|
|
|
+ echo " $0 -n # 对上次文档执行NER提取"
|
|
|
}
|
|
|
|
|
|
# 主函数
|
|
|
main() {
|
|
|
- local POLL_STATUS=false
|
|
|
- local STATUS_ONLY=false
|
|
|
+ local MODE="e2e" # 默认执行完整端到端测试
|
|
|
|
|
|
# 解析参数
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
@@ -306,19 +479,39 @@ main() {
|
|
|
show_help
|
|
|
exit 0
|
|
|
;;
|
|
|
- -p|--poll)
|
|
|
- POLL_STATUS=true
|
|
|
+ -e|--e2e)
|
|
|
+ MODE="e2e"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ -u|--upload-only)
|
|
|
+ MODE="upload"
|
|
|
shift
|
|
|
;;
|
|
|
-s|--status)
|
|
|
- STATUS_ONLY=true
|
|
|
+ MODE="status"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ -v|--vector)
|
|
|
+ MODE="vector"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ -n|--ner)
|
|
|
+ MODE="ner"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ -p|--poll)
|
|
|
+ # 兼容旧参数,等同于e2e
|
|
|
+ MODE="e2e"
|
|
|
shift
|
|
|
;;
|
|
|
*)
|
|
|
- if [[ -z "$HOST_ARG" ]]; then
|
|
|
- HOST=$1
|
|
|
- elif [[ -z "$PORT_ARG" ]]; then
|
|
|
- PORT=$1
|
|
|
+ if [[ ! "$1" =~ ^- ]]; then
|
|
|
+ if [[ -z "$HOST_SET" ]]; then
|
|
|
+ HOST=$1
|
|
|
+ HOST_SET=true
|
|
|
+ else
|
|
|
+ PORT=$1
|
|
|
+ fi
|
|
|
fi
|
|
|
shift
|
|
|
;;
|
|
|
@@ -330,35 +523,107 @@ main() {
|
|
|
UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
|
|
|
STATUS_URL="${BASE_URL}/parse/status"
|
|
|
REGISTER_URL="${BASE_URL}/auth/register"
|
|
|
+ TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
|
|
|
+ RAG_INDEX_URL="${BASE_URL}/api/rag/index"
|
|
|
+ NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
|
|
|
|
|
|
- print_header "文件上传接口测试"
|
|
|
+ print_header "文件上传端到端测试"
|
|
|
echo "目标服务: $BASE_URL"
|
|
|
+ echo "测试模式: $MODE"
|
|
|
echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
|
|
|
|
|
check_dependencies
|
|
|
|
|
|
- if [ "$STATUS_ONLY" = true ]; then
|
|
|
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
- test_parse_status "$DOCUMENT_ID"
|
|
|
- else
|
|
|
- print_error "未找到上次上传的文档ID"
|
|
|
- exit 1
|
|
|
- fi
|
|
|
- exit 0
|
|
|
- fi
|
|
|
-
|
|
|
- check_test_file
|
|
|
- check_service
|
|
|
- register_test_user
|
|
|
- test_upload
|
|
|
-
|
|
|
- if [ "$POLL_STATUS" = true ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
- poll_parse_status "$DOCUMENT_ID"
|
|
|
- fi
|
|
|
+ # 根据模式执行不同操作
|
|
|
+ case $MODE in
|
|
|
+ status)
|
|
|
+ # 仅查询状态
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ print_header "查询解析状态"
|
|
|
+ test_parse_status "$DOCUMENT_ID"
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
+ vector)
|
|
|
+ # 仅执行向量提取
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ print_header "向量提取测试"
|
|
|
+ if get_document_text "$DOCUMENT_ID"; then
|
|
|
+ test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
+ ner)
|
|
|
+ # 仅执行NER提取
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ print_header "NER 提取测试"
|
|
|
+ test_ner_extraction "$DOCUMENT_ID"
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
+ upload)
|
|
|
+ # 仅上传
|
|
|
+ check_test_file
|
|
|
+ check_service
|
|
|
+ register_test_user
|
|
|
+ test_upload
|
|
|
+ ;;
|
|
|
+
|
|
|
+ e2e)
|
|
|
+ # 完整端到端测试
|
|
|
+ check_test_file
|
|
|
+ check_service
|
|
|
+ register_test_user
|
|
|
+
|
|
|
+ print_header "步骤 1/4: 文件上传"
|
|
|
+ test_upload
|
|
|
+
|
|
|
+ if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ fi
|
|
|
+
|
|
|
+ if [ -z "$DOCUMENT_ID" ]; then
|
|
|
+ print_error "无法获取文档ID,终止测试"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ print_header "步骤 2/4: 等待解析完成"
|
|
|
+ if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
|
|
|
+ print_error "解析未完成,终止测试"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ print_header "步骤 3/4: 向量提取"
|
|
|
+ if get_document_text "$DOCUMENT_ID"; then
|
|
|
+ test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
|
|
|
+ else
|
|
|
+ print_info "跳过向量提取(无法获取文本)"
|
|
|
+ fi
|
|
|
+
|
|
|
+ print_header "步骤 4/4: NER 提取"
|
|
|
+ test_ner_extraction "$DOCUMENT_ID"
|
|
|
+ ;;
|
|
|
+ esac
|
|
|
|
|
|
print_header "测试完成"
|
|
|
+ echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
|
|
|
+ echo "可使用以下命令进行后续操作:"
|
|
|
+ echo " $0 -s # 查询状态"
|
|
|
+ echo " $0 -v # 重新向量提取"
|
|
|
+ echo " $0 -n # 重新NER提取"
|
|
|
}
|
|
|
|
|
|
# 运行主函数
|