| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640 |
- #!/bin/bash
- # ============================================
- # 文件上传端到端测试脚本
- # ============================================
- # 测试流程: 上传 -> 解析等待 -> 向量提取 -> NER提取
- # 使用方法: ./test_upload_api.sh [host] [port]
- # 示例: ./test_upload_api.sh localhost 5232
- # ============================================
- # 配置参数
- HOST=${1:-localhost}
- PORT=${2:-5232}
- BASE_URL="http://${HOST}:${PORT}"
- UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
- STATUS_URL="${BASE_URL}/parse/status"
- REGISTER_URL="${BASE_URL}/auth/register"
- TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
- RAG_INDEX_URL="${BASE_URL}/api/rag/index"
- NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
- # 测试文件路径(相对于脚本所在目录)
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- TEST_FILE="${SCRIPT_DIR}/test.docx"
- # 测试用户信息
- TIMESTAMP=$(date +%s)
- TEST_USERNAME="testuser_${TIMESTAMP}"
- TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
- TEST_PASSWORD="Test123456!"
- USER_ID=""
- DOCUMENT_ID=""
- DOCUMENT_TEXT=""
- # 颜色定义
- RED='\033[0;31m'
- GREEN='\033[0;32m'
- YELLOW='\033[1;33m'
- BLUE='\033[0;34m'
- CYAN='\033[0;36m'
- NC='\033[0m' # No Color
- # 输出函数
- print_header() {
- echo -e "\n${BLUE}============================================${NC}"
- echo -e "${BLUE}$1${NC}"
- echo -e "${BLUE}============================================${NC}"
- }
- print_step() {
- echo -e "\n${CYAN}>>> $1${NC}"
- }
- print_success() {
- echo -e "${GREEN}✓ $1${NC}"
- }
- print_error() {
- echo -e "${RED}✗ $1${NC}"
- }
- print_info() {
- echo -e "${YELLOW}➤ $1${NC}"
- }
- # 检查依赖
- check_dependencies() {
- print_header "检查依赖"
-
- if ! command -v curl &> /dev/null; then
- print_error "curl 未安装"
- exit 1
- fi
- print_success "curl 已安装"
-
- if ! command -v jq &> /dev/null; then
- print_info "jq 未安装,JSON格式化将不可用"
- JQ_AVAILABLE=false
- else
- print_success "jq 已安装"
- JQ_AVAILABLE=true
- fi
- }
- # 检查测试文件
- check_test_file() {
- print_header "检查测试文件"
-
- if [ ! -f "$TEST_FILE" ]; then
- print_error "测试文件不存在: $TEST_FILE"
- exit 1
- fi
-
- FILE_SIZE=$(stat -c%s "$TEST_FILE" 2>/dev/null || stat -f%z "$TEST_FILE" 2>/dev/null)
- print_success "测试文件存在: $TEST_FILE"
- print_info "文件大小: $FILE_SIZE bytes"
- }
- # 检查服务是否可用
- check_service() {
- print_header "检查服务状态"
-
- print_info "测试服务: $BASE_URL"
-
- HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${BASE_URL}/actuator/health" 2>/dev/null)
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "服务正常运行 (HTTP $HTTP_CODE)"
- elif [ "$HTTP_CODE" = "000" ]; then
- print_error "无法连接到服务 $BASE_URL"
- print_info "请确保 parse-service 正在运行"
- exit 1
- else
- print_info "健康检查返回 HTTP $HTTP_CODE,继续测试..."
- fi
- }
- # 注册测试用户
- register_test_user() {
- print_header "注册测试用户"
-
- print_info "用户名: $TEST_USERNAME"
- print_info "邮箱: $TEST_EMAIL"
- print_info "注册URL: $REGISTER_URL"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$REGISTER_URL" \
- -H "Content-Type: application/json" \
- -d "{\"username\":\"${TEST_USERNAME}\",\"email\":\"${TEST_EMAIL}\",\"password\":\"${TEST_PASSWORD}\",\"confirmPassword\":\"${TEST_PASSWORD}\"}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- # 解析用户ID
- if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
- print_success "用户注册成功!"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- USER_ID=$(echo "$BODY" | jq -r '.data.user.id // .data.userId // .userId // empty' 2>/dev/null)
- if [ -z "$USER_ID" ] || [ "$USER_ID" = "null" ]; then
- # 尝试其他可能的字段
- USER_ID=$(echo "$BODY" | jq -r '.data.id // .id // empty' 2>/dev/null)
- fi
-
- if [ -n "$USER_ID" ] && [ "$USER_ID" != "null" ]; then
- print_info "用户ID: $USER_ID"
- echo "$USER_ID" > "${SCRIPT_DIR}/.last_user_id"
- else
- print_error "无法从响应中获取用户ID"
- echo "响应内容: $BODY"
- exit 1
- fi
- fi
- else
- print_error "用户注册失败 (HTTP $HTTP_CODE)"
- print_info "响应: $BODY"
- exit 1
- fi
- }
- # 测试文件上传
- test_upload() {
- print_step "文件上传"
-
- print_info "上传URL: $UPLOAD_URL"
- print_info "用户ID: $USER_ID"
- print_info "文件: $TEST_FILE"
-
- echo -e "\n发送请求..."
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$UPLOAD_URL" \
- -H "Content-Type: multipart/form-data" \
- -F "file=@${TEST_FILE}" \
- -F "userId=${USER_ID}" \
- --connect-timeout 10 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- # 解析响应
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "文件上传成功!"
-
- # 提取documentId用于后续操作
- if [ "$JQ_AVAILABLE" = true ]; then
- DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
- if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
- print_info "文档ID: $DOCUMENT_ID"
- echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
- else
- print_error "无法从响应中获取文档ID"
- return 1
- fi
- fi
- return 0
- else
- print_error "文件上传失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 测试解析状态查询(单次)
- test_parse_status() {
- local DOC_ID=$1
-
- print_info "文档ID: $DOC_ID"
- print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${STATUS_URL}/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
-
- # 显示关键状态信息
- if [ "$HTTP_CODE" = "200" ]; then
- STATUS=$(echo "$BODY" | jq -r '.data.status // empty' 2>/dev/null)
- PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
- CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // empty' 2>/dev/null)
- print_info "状态: $STATUS, 进度: ${PROGRESS}%, 当前步骤: $CURRENT_STEP"
- fi
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "状态查询成功!"
- else
- print_error "状态查询失败 (HTTP $HTTP_CODE)"
- fi
- }
- # 轮询解析状态直到完成
- poll_parse_status() {
- local DOC_ID=$1
- local MAX_ATTEMPTS=${2:-60}
- local INTERVAL=${3:-3}
-
- print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
-
- for ((i=1; i<=MAX_ATTEMPTS; i++)); do
- RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
-
- if [ "$JQ_AVAILABLE" = true ]; then
- # 状态字段为 status,值为: pending/processing/completed/failed
- STATUS=$(echo "$RESPONSE" | jq -r '.data.status // .status // empty' 2>/dev/null)
- PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // .progress // 0' 2>/dev/null)
- echo -ne "\r第 $i 次查询... 状态: $STATUS, 进度: ${PROGRESS}% "
-
- if [ "$STATUS" = "completed" ] || [ "$STATUS" = "COMPLETED" ]; then
- echo ""
- print_success "解析完成!"
- return 0
- elif [ "$STATUS" = "failed" ] || [ "$STATUS" = "FAILED" ]; then
- echo ""
- print_error "解析失败!"
- echo "$RESPONSE" | jq .
- return 1
- fi
- else
- echo "$RESPONSE"
- fi
-
- sleep $INTERVAL
- done
-
- echo ""
- print_error "轮询超时,解析未完成"
- return 1
- }
- # 获取解析后的文本内容
- get_document_text() {
- local DOC_ID=$1
-
- print_step "获取文档解析文本"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- # 获取文件路径
- FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
- if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
- print_success "获取文本存储记录成功!"
- print_info "文件路径: $FILE_PATH"
-
- # 读取文件内容
- if [ -f "$FILE_PATH" ]; then
- DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
- TEXT_LENGTH=${#DOCUMENT_TEXT}
- print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
-
- # 显示前200个字符
- echo -e "${YELLOW}文本预览:${NC}"
- echo "${DOCUMENT_TEXT:0:200}..."
- return 0
- else
- print_error "文件不存在: $FILE_PATH"
- return 1
- fi
- else
- print_error "响应中无文件路径"
- echo "$BODY" | jq . 2>/dev/null
- return 1
- fi
- fi
- else
- print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
- echo "$BODY"
- return 1
- fi
- }
- # 向量提取(RAG 索引)
- test_vector_extraction() {
- local DOC_ID=$1
- local TEXT=$2
-
- print_step "向量提取 (RAG 索引)"
-
- print_info "文档ID: $DOC_ID"
- print_info "文本长度: ${#TEXT} 字符"
- print_info "请求URL: $RAG_INDEX_URL"
-
- # 构建请求JSON(需要转义文本中的特殊字符)
- if [ "$JQ_AVAILABLE" = true ]; then
- REQUEST_BODY=$(jq -n \
- --arg docId "$DOC_ID" \
- --arg text "$TEXT" \
- '{documentId: $docId, text: $text}')
- else
- # 简单转义
- ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
- REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
- fi
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$RAG_INDEX_URL" \
- -H "Content-Type: application/json" \
- -d "$REQUEST_BODY" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
- if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
- print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
- else
- print_success "向量提取成功!"
- fi
- else
- print_success "向量提取成功!"
- fi
- return 0
- else
- print_error "向量提取失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # NER 提取
- test_ner_extraction() {
- local DOC_ID=$1
-
- print_step "NER 提取 (命名实体识别)"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
- -H "Content-Type: application/json" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
- RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
- if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
- print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
- else
- print_success "NER 提取成功!"
- fi
- else
- print_success "NER 提取成功!"
- fi
- return 0
- else
- print_error "NER 提取失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 显示使用帮助
- show_help() {
- echo "使用方法: $0 [选项] [host] [port]"
- echo ""
- echo "端到端测试流程: 上传文件 -> 等待解析 -> 向量提取 -> NER提取"
- echo ""
- echo "选项:"
- echo " -h, --help 显示帮助信息"
- echo " -e, --e2e 执行完整端到端测试 (默认)"
- echo " -u, --upload-only 仅执行上传测试"
- echo " -s, --status 仅查询上次上传的文档状态"
- echo " -v, --vector 仅执行向量提取(使用上次的文档)"
- echo " -n, --ner 仅执行NER提取(使用上次的文档)"
- echo ""
- echo "示例:"
- echo " $0 # 使用默认配置执行完整端到端测试"
- echo " $0 192.168.1.100 5232 # 指定服务器地址"
- echo " $0 -u # 仅上传文件"
- echo " $0 -s # 查询上次上传的状态"
- echo " $0 -v # 对上次文档执行向量提取"
- echo " $0 -n # 对上次文档执行NER提取"
- }
- # 主函数
- main() {
- local MODE="e2e" # 默认执行完整端到端测试
-
- # 解析参数
- while [[ $# -gt 0 ]]; do
- case $1 in
- -h|--help)
- show_help
- exit 0
- ;;
- -e|--e2e)
- MODE="e2e"
- shift
- ;;
- -u|--upload-only)
- MODE="upload"
- shift
- ;;
- -s|--status)
- MODE="status"
- shift
- ;;
- -v|--vector)
- MODE="vector"
- shift
- ;;
- -n|--ner)
- MODE="ner"
- shift
- ;;
- -p|--poll)
- # 兼容旧参数,等同于e2e
- MODE="e2e"
- shift
- ;;
- *)
- if [[ ! "$1" =~ ^- ]]; then
- if [[ -z "$HOST_SET" ]]; then
- HOST=$1
- HOST_SET=true
- else
- PORT=$1
- fi
- fi
- shift
- ;;
- esac
- done
-
- # 更新URL
- BASE_URL="http://${HOST}:${PORT}"
- UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
- STATUS_URL="${BASE_URL}/parse/status"
- REGISTER_URL="${BASE_URL}/auth/register"
- TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
- RAG_INDEX_URL="${BASE_URL}/api/rag/index"
- NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
-
- print_header "文件上传端到端测试"
- echo "目标服务: $BASE_URL"
- echo "测试模式: $MODE"
- echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
-
- check_dependencies
-
- # 根据模式执行不同操作
- case $MODE in
- status)
- # 仅查询状态
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "查询解析状态"
- test_parse_status "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- vector)
- # 仅执行向量提取
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "向量提取测试"
- if get_document_text "$DOCUMENT_ID"; then
- test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
- fi
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ner)
- # 仅执行NER提取
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "NER 提取测试"
- test_ner_extraction "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- upload)
- # 仅上传
- check_test_file
- check_service
- register_test_user
- test_upload
- ;;
-
- e2e)
- # 完整端到端测试
- check_test_file
- check_service
- register_test_user
-
- print_header "步骤 1/4: 文件上传"
- test_upload
-
- if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- fi
-
- if [ -z "$DOCUMENT_ID" ]; then
- print_error "无法获取文档ID,终止测试"
- exit 1
- fi
-
- print_header "步骤 2/4: 等待解析完成"
- if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
- print_error "解析未完成,终止测试"
- exit 1
- fi
-
- print_header "步骤 3/4: 向量提取"
- if get_document_text "$DOCUMENT_ID"; then
- test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
- else
- print_info "跳过向量提取(无法获取文本)"
- fi
-
- print_header "步骤 4/4: NER 提取"
- test_ner_extraction "$DOCUMENT_ID"
- ;;
- esac
-
- print_header "测试完成"
- echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
- echo "可使用以下命令进行后续操作:"
- echo " $0 -s # 查询状态"
- echo " $0 -v # 重新向量提取"
- echo " $0 -n # 重新NER提取"
- }
- # 运行主函数
- main "$@"
|