test_upload_api.sh 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. #!/bin/bash
  2. # ============================================
  3. # 文件上传端到端测试脚本
  4. # ============================================
  5. # 测试流程: 上传 -> 解析等待 -> 向量提取 -> NER提取
  6. # 使用方法: ./test_upload_api.sh [host] [port]
  7. # 示例: ./test_upload_api.sh localhost 5232
  8. # ============================================
  9. # 配置参数
  10. HOST=${1:-localhost}
  11. PORT=${2:-5232}
  12. BASE_URL="http://${HOST}:${PORT}"
  13. UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
  14. STATUS_URL="${BASE_URL}/parse/status"
  15. REGISTER_URL="${BASE_URL}/auth/register"
  16. TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
  17. RAG_INDEX_URL="${BASE_URL}/api/rag/index"
  18. NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
  19. # 测试文件路径(相对于脚本所在目录)
  20. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  21. TEST_FILE="${SCRIPT_DIR}/test.docx"
  22. # 测试用户信息
  23. TIMESTAMP=$(date +%s)
  24. TEST_USERNAME="testuser_${TIMESTAMP}"
  25. TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
  26. TEST_PASSWORD="Test123456!"
  27. USER_ID=""
  28. DOCUMENT_ID=""
  29. DOCUMENT_TEXT=""
  30. # 颜色定义
  31. RED='\033[0;31m'
  32. GREEN='\033[0;32m'
  33. YELLOW='\033[1;33m'
  34. BLUE='\033[0;34m'
  35. CYAN='\033[0;36m'
  36. NC='\033[0m' # No Color
  37. # 输出函数
  38. print_header() {
  39. echo -e "\n${BLUE}============================================${NC}"
  40. echo -e "${BLUE}$1${NC}"
  41. echo -e "${BLUE}============================================${NC}"
  42. }
  43. print_step() {
  44. echo -e "\n${CYAN}>>> $1${NC}"
  45. }
  46. print_success() {
  47. echo -e "${GREEN}✓ $1${NC}"
  48. }
  49. print_error() {
  50. echo -e "${RED}✗ $1${NC}"
  51. }
  52. print_info() {
  53. echo -e "${YELLOW}➤ $1${NC}"
  54. }
  55. # 检查依赖
  56. check_dependencies() {
  57. print_header "检查依赖"
  58. if ! command -v curl &> /dev/null; then
  59. print_error "curl 未安装"
  60. exit 1
  61. fi
  62. print_success "curl 已安装"
  63. if ! command -v jq &> /dev/null; then
  64. print_info "jq 未安装,JSON格式化将不可用"
  65. JQ_AVAILABLE=false
  66. else
  67. print_success "jq 已安装"
  68. JQ_AVAILABLE=true
  69. fi
  70. }
  71. # 检查测试文件
  72. check_test_file() {
  73. print_header "检查测试文件"
  74. if [ ! -f "$TEST_FILE" ]; then
  75. print_error "测试文件不存在: $TEST_FILE"
  76. exit 1
  77. fi
  78. FILE_SIZE=$(stat -c%s "$TEST_FILE" 2>/dev/null || stat -f%z "$TEST_FILE" 2>/dev/null)
  79. print_success "测试文件存在: $TEST_FILE"
  80. print_info "文件大小: $FILE_SIZE bytes"
  81. }
  82. # 检查服务是否可用
  83. check_service() {
  84. print_header "检查服务状态"
  85. print_info "测试服务: $BASE_URL"
  86. HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${BASE_URL}/actuator/health" 2>/dev/null)
  87. if [ "$HTTP_CODE" = "200" ]; then
  88. print_success "服务正常运行 (HTTP $HTTP_CODE)"
  89. elif [ "$HTTP_CODE" = "000" ]; then
  90. print_error "无法连接到服务 $BASE_URL"
  91. print_info "请确保 parse-service 正在运行"
  92. exit 1
  93. else
  94. print_info "健康检查返回 HTTP $HTTP_CODE,继续测试..."
  95. fi
  96. }
  97. # 注册测试用户
  98. register_test_user() {
  99. print_header "注册测试用户"
  100. print_info "用户名: $TEST_USERNAME"
  101. print_info "邮箱: $TEST_EMAIL"
  102. print_info "注册URL: $REGISTER_URL"
  103. RESPONSE=$(curl -s -w "\n%{http_code}" \
  104. -X POST "$REGISTER_URL" \
  105. -H "Content-Type: application/json" \
  106. -d "{\"username\":\"${TEST_USERNAME}\",\"email\":\"${TEST_EMAIL}\",\"password\":\"${TEST_PASSWORD}\",\"confirmPassword\":\"${TEST_PASSWORD}\"}" \
  107. --connect-timeout 10)
  108. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  109. BODY=$(echo "$RESPONSE" | sed '$d')
  110. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  111. if [ "$JQ_AVAILABLE" = true ]; then
  112. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  113. else
  114. echo "$BODY"
  115. fi
  116. # 解析用户ID
  117. if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
  118. print_success "用户注册成功!"
  119. if [ "$JQ_AVAILABLE" = true ]; then
  120. USER_ID=$(echo "$BODY" | jq -r '.data.user.id // .data.userId // .userId // empty' 2>/dev/null)
  121. if [ -z "$USER_ID" ] || [ "$USER_ID" = "null" ]; then
  122. # 尝试其他可能的字段
  123. USER_ID=$(echo "$BODY" | jq -r '.data.id // .id // empty' 2>/dev/null)
  124. fi
  125. if [ -n "$USER_ID" ] && [ "$USER_ID" != "null" ]; then
  126. print_info "用户ID: $USER_ID"
  127. echo "$USER_ID" > "${SCRIPT_DIR}/.last_user_id"
  128. else
  129. print_error "无法从响应中获取用户ID"
  130. echo "响应内容: $BODY"
  131. exit 1
  132. fi
  133. fi
  134. else
  135. print_error "用户注册失败 (HTTP $HTTP_CODE)"
  136. print_info "响应: $BODY"
  137. exit 1
  138. fi
  139. }
  140. # 测试文件上传
  141. test_upload() {
  142. print_step "文件上传"
  143. print_info "上传URL: $UPLOAD_URL"
  144. print_info "用户ID: $USER_ID"
  145. print_info "文件: $TEST_FILE"
  146. echo -e "\n发送请求..."
  147. RESPONSE=$(curl -s -w "\n%{http_code}" \
  148. -X POST "$UPLOAD_URL" \
  149. -H "Content-Type: multipart/form-data" \
  150. -F "file=@${TEST_FILE}" \
  151. -F "userId=${USER_ID}" \
  152. --connect-timeout 10 \
  153. --max-time 300)
  154. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  155. BODY=$(echo "$RESPONSE" | sed '$d')
  156. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  157. echo -e "${YELLOW}响应内容:${NC}"
  158. if [ "$JQ_AVAILABLE" = true ]; then
  159. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  160. else
  161. echo "$BODY"
  162. fi
  163. # 解析响应
  164. if [ "$HTTP_CODE" = "200" ]; then
  165. print_success "文件上传成功!"
  166. # 提取documentId用于后续操作
  167. if [ "$JQ_AVAILABLE" = true ]; then
  168. DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
  169. if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
  170. print_info "文档ID: $DOCUMENT_ID"
  171. echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
  172. else
  173. print_error "无法从响应中获取文档ID"
  174. return 1
  175. fi
  176. fi
  177. return 0
  178. else
  179. print_error "文件上传失败 (HTTP $HTTP_CODE)"
  180. return 1
  181. fi
  182. }
  183. # 测试解析状态查询(单次)
  184. test_parse_status() {
  185. local DOC_ID=$1
  186. print_info "文档ID: $DOC_ID"
  187. print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
  188. RESPONSE=$(curl -s -w "\n%{http_code}" \
  189. -X GET "${STATUS_URL}/${DOC_ID}" \
  190. --connect-timeout 10)
  191. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  192. BODY=$(echo "$RESPONSE" | sed '$d')
  193. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  194. echo -e "${YELLOW}响应内容:${NC}"
  195. if [ "$JQ_AVAILABLE" = true ]; then
  196. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  197. # 显示关键状态信息
  198. if [ "$HTTP_CODE" = "200" ]; then
  199. STATUS=$(echo "$BODY" | jq -r '.data.status // empty' 2>/dev/null)
  200. PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
  201. CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // empty' 2>/dev/null)
  202. print_info "状态: $STATUS, 进度: ${PROGRESS}%, 当前步骤: $CURRENT_STEP"
  203. fi
  204. else
  205. echo "$BODY"
  206. fi
  207. if [ "$HTTP_CODE" = "200" ]; then
  208. print_success "状态查询成功!"
  209. else
  210. print_error "状态查询失败 (HTTP $HTTP_CODE)"
  211. fi
  212. }
  213. # 轮询解析状态直到完成
  214. poll_parse_status() {
  215. local DOC_ID=$1
  216. local MAX_ATTEMPTS=${2:-60}
  217. local INTERVAL=${3:-3}
  218. print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
  219. for ((i=1; i<=MAX_ATTEMPTS; i++)); do
  220. RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
  221. if [ "$JQ_AVAILABLE" = true ]; then
  222. # 状态字段为 status,值为: pending/processing/completed/failed
  223. STATUS=$(echo "$RESPONSE" | jq -r '.data.status // .status // empty' 2>/dev/null)
  224. PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // .progress // 0' 2>/dev/null)
  225. echo -ne "\r第 $i 次查询... 状态: $STATUS, 进度: ${PROGRESS}% "
  226. if [ "$STATUS" = "completed" ] || [ "$STATUS" = "COMPLETED" ]; then
  227. echo ""
  228. print_success "解析完成!"
  229. return 0
  230. elif [ "$STATUS" = "failed" ] || [ "$STATUS" = "FAILED" ]; then
  231. echo ""
  232. print_error "解析失败!"
  233. echo "$RESPONSE" | jq .
  234. return 1
  235. fi
  236. else
  237. echo "$RESPONSE"
  238. fi
  239. sleep $INTERVAL
  240. done
  241. echo ""
  242. print_error "轮询超时,解析未完成"
  243. return 1
  244. }
  245. # 获取解析后的文本内容
  246. get_document_text() {
  247. local DOC_ID=$1
  248. print_step "获取文档解析文本"
  249. print_info "文档ID: $DOC_ID"
  250. print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
  251. RESPONSE=$(curl -s -w "\n%{http_code}" \
  252. -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
  253. --connect-timeout 10)
  254. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  255. BODY=$(echo "$RESPONSE" | sed '$d')
  256. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  257. if [ "$HTTP_CODE" = "200" ]; then
  258. if [ "$JQ_AVAILABLE" = true ]; then
  259. # 获取文件路径
  260. FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
  261. if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
  262. print_success "获取文本存储记录成功!"
  263. print_info "文件路径: $FILE_PATH"
  264. # 读取文件内容
  265. if [ -f "$FILE_PATH" ]; then
  266. DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
  267. TEXT_LENGTH=${#DOCUMENT_TEXT}
  268. print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
  269. # 显示前200个字符
  270. echo -e "${YELLOW}文本预览:${NC}"
  271. echo "${DOCUMENT_TEXT:0:200}..."
  272. return 0
  273. else
  274. print_error "文件不存在: $FILE_PATH"
  275. return 1
  276. fi
  277. else
  278. print_error "响应中无文件路径"
  279. echo "$BODY" | jq . 2>/dev/null
  280. return 1
  281. fi
  282. fi
  283. else
  284. print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
  285. echo "$BODY"
  286. return 1
  287. fi
  288. }
  289. # 向量提取(RAG 索引)
  290. test_vector_extraction() {
  291. local DOC_ID=$1
  292. local TEXT=$2
  293. print_step "向量提取 (RAG 索引)"
  294. print_info "文档ID: $DOC_ID"
  295. print_info "文本长度: ${#TEXT} 字符"
  296. print_info "请求URL: $RAG_INDEX_URL"
  297. # 构建请求JSON(需要转义文本中的特殊字符)
  298. if [ "$JQ_AVAILABLE" = true ]; then
  299. REQUEST_BODY=$(jq -n \
  300. --arg docId "$DOC_ID" \
  301. --arg text "$TEXT" \
  302. '{documentId: $docId, text: $text}')
  303. else
  304. # 简单转义
  305. ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
  306. REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
  307. fi
  308. RESPONSE=$(curl -s -w "\n%{http_code}" \
  309. -X POST "$RAG_INDEX_URL" \
  310. -H "Content-Type: application/json" \
  311. -d "$REQUEST_BODY" \
  312. --connect-timeout 30 \
  313. --max-time 300)
  314. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  315. BODY=$(echo "$RESPONSE" | sed '$d')
  316. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  317. echo -e "${YELLOW}响应内容:${NC}"
  318. if [ "$JQ_AVAILABLE" = true ]; then
  319. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  320. else
  321. echo "$BODY"
  322. fi
  323. if [ "$HTTP_CODE" = "200" ]; then
  324. if [ "$JQ_AVAILABLE" = true ]; then
  325. CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
  326. if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
  327. print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
  328. else
  329. print_success "向量提取成功!"
  330. fi
  331. else
  332. print_success "向量提取成功!"
  333. fi
  334. return 0
  335. else
  336. print_error "向量提取失败 (HTTP $HTTP_CODE)"
  337. return 1
  338. fi
  339. }
  340. # NER 提取
  341. test_ner_extraction() {
  342. local DOC_ID=$1
  343. print_step "NER 提取 (命名实体识别)"
  344. print_info "文档ID: $DOC_ID"
  345. print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
  346. RESPONSE=$(curl -s -w "\n%{http_code}" \
  347. -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
  348. -H "Content-Type: application/json" \
  349. --connect-timeout 30 \
  350. --max-time 300)
  351. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  352. BODY=$(echo "$RESPONSE" | sed '$d')
  353. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  354. echo -e "${YELLOW}响应内容:${NC}"
  355. if [ "$JQ_AVAILABLE" = true ]; then
  356. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  357. else
  358. echo "$BODY"
  359. fi
  360. if [ "$HTTP_CODE" = "200" ]; then
  361. if [ "$JQ_AVAILABLE" = true ]; then
  362. ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
  363. RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
  364. if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
  365. print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
  366. else
  367. print_success "NER 提取成功!"
  368. fi
  369. else
  370. print_success "NER 提取成功!"
  371. fi
  372. return 0
  373. else
  374. print_error "NER 提取失败 (HTTP $HTTP_CODE)"
  375. return 1
  376. fi
  377. }
  378. # 显示使用帮助
  379. show_help() {
  380. echo "使用方法: $0 [选项] [host] [port]"
  381. echo ""
  382. echo "端到端测试流程: 上传文件 -> 等待解析 -> 向量提取 -> NER提取"
  383. echo ""
  384. echo "选项:"
  385. echo " -h, --help 显示帮助信息"
  386. echo " -e, --e2e 执行完整端到端测试 (默认)"
  387. echo " -u, --upload-only 仅执行上传测试"
  388. echo " -s, --status 仅查询上次上传的文档状态"
  389. echo " -v, --vector 仅执行向量提取(使用上次的文档)"
  390. echo " -n, --ner 仅执行NER提取(使用上次的文档)"
  391. echo ""
  392. echo "示例:"
  393. echo " $0 # 使用默认配置执行完整端到端测试"
  394. echo " $0 192.168.1.100 5232 # 指定服务器地址"
  395. echo " $0 -u # 仅上传文件"
  396. echo " $0 -s # 查询上次上传的状态"
  397. echo " $0 -v # 对上次文档执行向量提取"
  398. echo " $0 -n # 对上次文档执行NER提取"
  399. }
  400. # 主函数
  401. main() {
  402. local MODE="e2e" # 默认执行完整端到端测试
  403. # 解析参数
  404. while [[ $# -gt 0 ]]; do
  405. case $1 in
  406. -h|--help)
  407. show_help
  408. exit 0
  409. ;;
  410. -e|--e2e)
  411. MODE="e2e"
  412. shift
  413. ;;
  414. -u|--upload-only)
  415. MODE="upload"
  416. shift
  417. ;;
  418. -s|--status)
  419. MODE="status"
  420. shift
  421. ;;
  422. -v|--vector)
  423. MODE="vector"
  424. shift
  425. ;;
  426. -n|--ner)
  427. MODE="ner"
  428. shift
  429. ;;
  430. -p|--poll)
  431. # 兼容旧参数,等同于e2e
  432. MODE="e2e"
  433. shift
  434. ;;
  435. *)
  436. if [[ ! "$1" =~ ^- ]]; then
  437. if [[ -z "$HOST_SET" ]]; then
  438. HOST=$1
  439. HOST_SET=true
  440. else
  441. PORT=$1
  442. fi
  443. fi
  444. shift
  445. ;;
  446. esac
  447. done
  448. # 更新URL
  449. BASE_URL="http://${HOST}:${PORT}"
  450. UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
  451. STATUS_URL="${BASE_URL}/parse/status"
  452. REGISTER_URL="${BASE_URL}/auth/register"
  453. TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
  454. RAG_INDEX_URL="${BASE_URL}/api/rag/index"
  455. NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
  456. print_header "文件上传端到端测试"
  457. echo "目标服务: $BASE_URL"
  458. echo "测试模式: $MODE"
  459. echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
  460. check_dependencies
  461. # 根据模式执行不同操作
  462. case $MODE in
  463. status)
  464. # 仅查询状态
  465. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  466. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  467. print_header "查询解析状态"
  468. test_parse_status "$DOCUMENT_ID"
  469. else
  470. print_error "未找到上次上传的文档ID"
  471. exit 1
  472. fi
  473. ;;
  474. vector)
  475. # 仅执行向量提取
  476. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  477. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  478. print_header "向量提取测试"
  479. if get_document_text "$DOCUMENT_ID"; then
  480. test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
  481. fi
  482. else
  483. print_error "未找到上次上传的文档ID"
  484. exit 1
  485. fi
  486. ;;
  487. ner)
  488. # 仅执行NER提取
  489. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  490. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  491. print_header "NER 提取测试"
  492. test_ner_extraction "$DOCUMENT_ID"
  493. else
  494. print_error "未找到上次上传的文档ID"
  495. exit 1
  496. fi
  497. ;;
  498. upload)
  499. # 仅上传
  500. check_test_file
  501. check_service
  502. register_test_user
  503. test_upload
  504. ;;
  505. e2e)
  506. # 完整端到端测试
  507. check_test_file
  508. check_service
  509. register_test_user
  510. print_header "步骤 1/4: 文件上传"
  511. test_upload
  512. if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  513. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  514. fi
  515. if [ -z "$DOCUMENT_ID" ]; then
  516. print_error "无法获取文档ID,终止测试"
  517. exit 1
  518. fi
  519. print_header "步骤 2/4: 等待解析完成"
  520. if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
  521. print_error "解析未完成,终止测试"
  522. exit 1
  523. fi
  524. print_header "步骤 3/4: 向量提取"
  525. if get_document_text "$DOCUMENT_ID"; then
  526. test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
  527. else
  528. print_info "跳过向量提取(无法获取文本)"
  529. fi
  530. print_header "步骤 4/4: NER 提取"
  531. test_ner_extraction "$DOCUMENT_ID"
  532. ;;
  533. esac
  534. print_header "测试完成"
  535. echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
  536. echo "可使用以下命令进行后续操作:"
  537. echo " $0 -s # 查询状态"
  538. echo " $0 -v # 重新向量提取"
  539. echo " $0 -n # 重新NER提取"
  540. }
  541. # 运行主函数
  542. main "$@"