test_upload_api.sh 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578
  1. #!/bin/bash
  2. # ============================================
  3. # 文件上传端到端测试脚本
  4. # ============================================
  5. # 测试流程: 上传 -> 解析等待 -> 自动处理(向量/NER/结构化)-> 数据源操作
  6. # 使用方法: ./test_upload_api.sh [host] [port]
  7. # 示例: ./test_upload_api.sh localhost 5232
  8. # ============================================
  9. # 配置参数
  10. HOST=${1:-localhost}
  11. PORT=${2:-5232}
  12. BASE_URL="http://${HOST}:${PORT}"
  13. UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
  14. STATUS_URL="${BASE_URL}/parse/status"
  15. REGISTER_URL="${BASE_URL}/auth/register"
  16. TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
  17. RAG_INDEX_URL="${BASE_URL}/api/rag/index"
  18. NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
  19. STRUCTURED_URL="${BASE_URL}/parse/structured"
  20. ELEMENTS_URL="${BASE_URL}/parse/elements"
  21. DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
  22. GRAPH_URL="${BASE_URL}/api/graph"
  23. TASK_CENTER_URL="${BASE_URL}/api/v1/tasks"
  24. # 测试文件路径(相对于脚本所在目录)
  25. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  26. TEST_FILE="${SCRIPT_DIR}/test.docx"
  27. # 测试用户信息
  28. TIMESTAMP=$(date +%s)
  29. TEST_USERNAME="testuser_${TIMESTAMP}"
  30. TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
  31. TEST_PASSWORD="Test123456!"
  32. USER_ID=""
  33. DOCUMENT_ID=""
  34. DOCUMENT_TEXT=""
  35. # 颜色定义
  36. RED='\033[0;31m'
  37. GREEN='\033[0;32m'
  38. YELLOW='\033[1;33m'
  39. BLUE='\033[0;34m'
  40. CYAN='\033[0;36m'
  41. NC='\033[0m' # No Color
  42. # 输出函数
  43. print_header() {
  44. echo -e "\n${BLUE}============================================${NC}"
  45. echo -e "${BLUE}$1${NC}"
  46. echo -e "${BLUE}============================================${NC}"
  47. }
  48. print_step() {
  49. echo -e "\n${CYAN}>>> $1${NC}"
  50. }
  51. print_success() {
  52. echo -e "${GREEN}✓ $1${NC}"
  53. }
  54. print_error() {
  55. echo -e "${RED}✗ $1${NC}"
  56. }
  57. print_info() {
  58. echo -e "${YELLOW}➤ $1${NC}"
  59. }
  60. # 检查依赖
  61. check_dependencies() {
  62. print_header "检查依赖"
  63. if ! command -v curl &> /dev/null; then
  64. print_error "curl 未安装"
  65. exit 1
  66. fi
  67. print_success "curl 已安装"
  68. if ! command -v jq &> /dev/null; then
  69. print_info "jq 未安装,JSON格式化将不可用"
  70. JQ_AVAILABLE=false
  71. else
  72. print_success "jq 已安装"
  73. JQ_AVAILABLE=true
  74. fi
  75. }
  76. # 检查测试文件
  77. check_test_file() {
  78. print_header "检查测试文件"
  79. if [ ! -f "$TEST_FILE" ]; then
  80. print_error "测试文件不存在: $TEST_FILE"
  81. exit 1
  82. fi
  83. FILE_SIZE=$(stat -c%s "$TEST_FILE" 2>/dev/null || stat -f%z "$TEST_FILE" 2>/dev/null)
  84. print_success "测试文件存在: $TEST_FILE"
  85. print_info "文件大小: $FILE_SIZE bytes"
  86. }
  87. # 检查服务是否可用
  88. check_service() {
  89. print_header "检查服务状态"
  90. print_info "测试服务: $BASE_URL"
  91. HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${BASE_URL}/actuator/health" 2>/dev/null)
  92. if [ "$HTTP_CODE" = "200" ]; then
  93. print_success "服务正常运行 (HTTP $HTTP_CODE)"
  94. elif [ "$HTTP_CODE" = "000" ]; then
  95. print_error "无法连接到服务 $BASE_URL"
  96. print_info "请确保 parse-service 正在运行"
  97. exit 1
  98. else
  99. print_info "健康检查返回 HTTP $HTTP_CODE,继续测试..."
  100. fi
  101. }
  102. # 注册测试用户
  103. register_test_user() {
  104. print_header "注册测试用户"
  105. print_info "用户名: $TEST_USERNAME"
  106. print_info "邮箱: $TEST_EMAIL"
  107. print_info "注册URL: $REGISTER_URL"
  108. RESPONSE=$(curl -s -w "\n%{http_code}" \
  109. -X POST "$REGISTER_URL" \
  110. -H "Content-Type: application/json" \
  111. -d "{\"username\":\"${TEST_USERNAME}\",\"email\":\"${TEST_EMAIL}\",\"password\":\"${TEST_PASSWORD}\",\"confirmPassword\":\"${TEST_PASSWORD}\"}" \
  112. --connect-timeout 10)
  113. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  114. BODY=$(echo "$RESPONSE" | sed '$d')
  115. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  116. if [ "$JQ_AVAILABLE" = true ]; then
  117. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  118. else
  119. echo "$BODY"
  120. fi
  121. # 解析用户ID
  122. if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
  123. print_success "用户注册成功!"
  124. if [ "$JQ_AVAILABLE" = true ]; then
  125. USER_ID=$(echo "$BODY" | jq -r '.data.user.id // .data.userId // .userId // empty' 2>/dev/null)
  126. if [ -z "$USER_ID" ] || [ "$USER_ID" = "null" ]; then
  127. # 尝试其他可能的字段
  128. USER_ID=$(echo "$BODY" | jq -r '.data.id // .id // empty' 2>/dev/null)
  129. fi
  130. if [ -n "$USER_ID" ] && [ "$USER_ID" != "null" ]; then
  131. print_info "用户ID: $USER_ID"
  132. echo "$USER_ID" > "${SCRIPT_DIR}/.last_user_id"
  133. else
  134. print_error "无法从响应中获取用户ID"
  135. echo "响应内容: $BODY"
  136. exit 1
  137. fi
  138. fi
  139. else
  140. print_error "用户注册失败 (HTTP $HTTP_CODE)"
  141. print_info "响应: $BODY"
  142. exit 1
  143. fi
  144. }
  145. # 测试文件上传
  146. test_upload() {
  147. print_step "文件上传"
  148. print_info "上传URL: $UPLOAD_URL"
  149. print_info "用户ID: $USER_ID"
  150. print_info "文件: $TEST_FILE"
  151. echo -e "\n发送请求..."
  152. RESPONSE=$(curl -s -w "\n%{http_code}" \
  153. -X POST "$UPLOAD_URL" \
  154. -H "Content-Type: multipart/form-data" \
  155. -F "file=@${TEST_FILE}" \
  156. -F "userId=${USER_ID}" \
  157. --connect-timeout 10 \
  158. --max-time 300)
  159. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  160. BODY=$(echo "$RESPONSE" | sed '$d')
  161. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  162. echo -e "${YELLOW}响应内容:${NC}"
  163. if [ "$JQ_AVAILABLE" = true ]; then
  164. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  165. else
  166. echo "$BODY"
  167. fi
  168. # 解析响应
  169. if [ "$HTTP_CODE" = "200" ]; then
  170. print_success "文件上传成功!"
  171. # 提取documentId用于后续操作
  172. if [ "$JQ_AVAILABLE" = true ]; then
  173. DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
  174. if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
  175. print_info "文档ID: $DOCUMENT_ID"
  176. echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
  177. else
  178. print_error "无法从响应中获取文档ID"
  179. return 1
  180. fi
  181. fi
  182. return 0
  183. else
  184. print_error "文件上传失败 (HTTP $HTTP_CODE)"
  185. return 1
  186. fi
  187. }
  188. # 测试解析状态查询(单次)
  189. test_parse_status() {
  190. local DOC_ID=$1
  191. print_info "文档ID: $DOC_ID"
  192. print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
  193. RESPONSE=$(curl -s -w "\n%{http_code}" \
  194. -X GET "${STATUS_URL}/${DOC_ID}" \
  195. --connect-timeout 10)
  196. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  197. BODY=$(echo "$RESPONSE" | sed '$d')
  198. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  199. echo -e "${YELLOW}响应内容:${NC}"
  200. if [ "$JQ_AVAILABLE" = true ]; then
  201. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  202. # 显示关键状态信息
  203. if [ "$HTTP_CODE" = "200" ]; then
  204. STATUS=$(echo "$BODY" | jq -r '.data.status // empty' 2>/dev/null)
  205. PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
  206. CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // empty' 2>/dev/null)
  207. print_info "状态: $STATUS, 进度: ${PROGRESS}%, 当前步骤: $CURRENT_STEP"
  208. fi
  209. else
  210. echo "$BODY"
  211. fi
  212. if [ "$HTTP_CODE" = "200" ]; then
  213. print_success "状态查询成功!"
  214. else
  215. print_error "状态查询失败 (HTTP $HTTP_CODE)"
  216. fi
  217. }
  218. # 轮询解析状态直到完成
  219. poll_parse_status() {
  220. local DOC_ID=$1
  221. local MAX_ATTEMPTS=${2:-60}
  222. local INTERVAL=${3:-3}
  223. print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
  224. for ((i=1; i<=MAX_ATTEMPTS; i++)); do
  225. RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
  226. if [ "$JQ_AVAILABLE" = true ]; then
  227. # 状态字段为 status,值为: pending/processing/completed/failed
  228. STATUS=$(echo "$RESPONSE" | jq -r '.data.status // .status // empty' 2>/dev/null)
  229. PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // .progress // 0' 2>/dev/null)
  230. echo -ne "\r第 $i 次查询... 状态: $STATUS, 进度: ${PROGRESS}% "
  231. if [ "$STATUS" = "completed" ] || [ "$STATUS" = "COMPLETED" ]; then
  232. echo ""
  233. print_success "解析完成!"
  234. return 0
  235. elif [ "$STATUS" = "failed" ] || [ "$STATUS" = "FAILED" ]; then
  236. echo ""
  237. print_error "解析失败!"
  238. echo "$RESPONSE" | jq .
  239. return 1
  240. fi
  241. else
  242. echo "$RESPONSE"
  243. fi
  244. sleep $INTERVAL
  245. done
  246. echo ""
  247. print_error "轮询超时,解析未完成"
  248. return 1
  249. }
  250. # 获取解析后的文本内容
  251. get_document_text() {
  252. local DOC_ID=$1
  253. print_step "获取文档解析文本"
  254. print_info "文档ID: $DOC_ID"
  255. print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
  256. RESPONSE=$(curl -s -w "\n%{http_code}" \
  257. -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
  258. --connect-timeout 10)
  259. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  260. BODY=$(echo "$RESPONSE" | sed '$d')
  261. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  262. if [ "$HTTP_CODE" = "200" ]; then
  263. if [ "$JQ_AVAILABLE" = true ]; then
  264. # 获取文件路径
  265. FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
  266. if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
  267. print_success "获取文本存储记录成功!"
  268. print_info "文件路径: $FILE_PATH"
  269. # 读取文件内容
  270. if [ -f "$FILE_PATH" ]; then
  271. DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
  272. TEXT_LENGTH=${#DOCUMENT_TEXT}
  273. print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
  274. # 显示前200个字符
  275. echo -e "${YELLOW}文本预览:${NC}"
  276. echo "${DOCUMENT_TEXT:0:200}..."
  277. return 0
  278. else
  279. print_error "文件不存在: $FILE_PATH"
  280. return 1
  281. fi
  282. else
  283. print_error "响应中无文件路径"
  284. echo "$BODY" | jq . 2>/dev/null
  285. return 1
  286. fi
  287. fi
  288. else
  289. print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
  290. echo "$BODY"
  291. return 1
  292. fi
  293. }
  294. # 向量提取(RAG 索引)
  295. test_vector_extraction() {
  296. local DOC_ID=$1
  297. local TEXT=$2
  298. print_step "向量提取 (RAG 索引)"
  299. print_info "文档ID: $DOC_ID"
  300. print_info "文本长度: ${#TEXT} 字符"
  301. print_info "请求URL: $RAG_INDEX_URL"
  302. # 构建请求JSON(需要转义文本中的特殊字符)
  303. if [ "$JQ_AVAILABLE" = true ]; then
  304. REQUEST_BODY=$(jq -n \
  305. --arg docId "$DOC_ID" \
  306. --arg text "$TEXT" \
  307. '{documentId: $docId, text: $text}')
  308. else
  309. # 简单转义
  310. ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
  311. REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
  312. fi
  313. RESPONSE=$(curl -s -w "\n%{http_code}" \
  314. -X POST "$RAG_INDEX_URL" \
  315. -H "Content-Type: application/json" \
  316. -d "$REQUEST_BODY" \
  317. --connect-timeout 30 \
  318. --max-time 300)
  319. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  320. BODY=$(echo "$RESPONSE" | sed '$d')
  321. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  322. echo -e "${YELLOW}响应内容:${NC}"
  323. if [ "$JQ_AVAILABLE" = true ]; then
  324. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  325. else
  326. echo "$BODY"
  327. fi
  328. if [ "$HTTP_CODE" = "200" ]; then
  329. if [ "$JQ_AVAILABLE" = true ]; then
  330. CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
  331. if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
  332. print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
  333. else
  334. print_success "向量提取成功!"
  335. fi
  336. else
  337. print_success "向量提取成功!"
  338. fi
  339. return 0
  340. else
  341. print_error "向量提取失败 (HTTP $HTTP_CODE)"
  342. return 1
  343. fi
  344. }
  345. # NER 提取
  346. test_ner_extraction() {
  347. local DOC_ID=$1
  348. print_step "NER 提取 (命名实体识别)"
  349. print_info "文档ID: $DOC_ID"
  350. print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
  351. RESPONSE=$(curl -s -w "\n%{http_code}" \
  352. -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
  353. -H "Content-Type: application/json" \
  354. --connect-timeout 30 \
  355. --max-time 300)
  356. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  357. BODY=$(echo "$RESPONSE" | sed '$d')
  358. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  359. echo -e "${YELLOW}响应内容:${NC}"
  360. if [ "$JQ_AVAILABLE" = true ]; then
  361. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  362. else
  363. echo "$BODY"
  364. fi
  365. if [ "$HTTP_CODE" = "200" ]; then
  366. if [ "$JQ_AVAILABLE" = true ]; then
  367. ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
  368. RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
  369. if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
  370. print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
  371. else
  372. print_success "NER 提取成功!"
  373. fi
  374. else
  375. print_success "NER 提取成功!"
  376. fi
  377. return 0
  378. else
  379. print_error "NER 提取失败 (HTTP $HTTP_CODE)"
  380. return 1
  381. fi
  382. }
  383. # 结构化解析(提取图片和表格)
  384. test_structured_extraction() {
  385. local DOC_ID=$1
  386. print_step "结构化解析 (提取段落、图片、表格)"
  387. print_info "文档ID: $DOC_ID"
  388. print_info "请求URL: ${STRUCTURED_URL}/${DOC_ID}"
  389. RESPONSE=$(curl -s -w "\n%{http_code}" \
  390. -X GET "${STRUCTURED_URL}/${DOC_ID}" \
  391. --connect-timeout 30 \
  392. --max-time 300)
  393. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  394. BODY=$(echo "$RESPONSE" | sed '$d')
  395. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  396. if [ "$HTTP_CODE" = "200" ]; then
  397. if [ "$JQ_AVAILABLE" = true ]; then
  398. TOTAL=$(echo "$BODY" | jq -r '.data.totalElements // 0' 2>/dev/null)
  399. IMAGE_COUNT=$(echo "$BODY" | jq -r '.data.imageCount // 0' 2>/dev/null)
  400. TABLE_COUNT=$(echo "$BODY" | jq -r '.data.tableCount // 0' 2>/dev/null)
  401. print_success "结构化解析成功!"
  402. print_info "总元素: $TOTAL, 图片: $IMAGE_COUNT, 表格: $TABLE_COUNT"
  403. # 显示图片列表
  404. if [ "$IMAGE_COUNT" -gt 0 ]; then
  405. echo -e "\n${YELLOW}图片列表:${NC}"
  406. echo "$BODY" | jq -r '.data.elements[] | select(.type == "image") | " - \(.imageUrl) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
  407. fi
  408. # 显示表格摘要
  409. if [ "$TABLE_COUNT" -gt 0 ]; then
  410. echo -e "\n${YELLOW}表格列表:${NC}"
  411. echo "$BODY" | jq -r '.data.elements[] | select(.type == "table") | " - 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
  412. fi
  413. else
  414. print_success "结构化解析成功!"
  415. echo "$BODY"
  416. fi
  417. return 0
  418. else
  419. print_error "结构化解析失败 (HTTP $HTTP_CODE)"
  420. if [ "$JQ_AVAILABLE" = true ]; then
  421. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  422. else
  423. echo "$BODY"
  424. fi
  425. return 1
  426. fi
  427. }
  428. # 获取图片列表
  429. test_get_images() {
  430. local DOC_ID=$1
  431. print_step "获取文档图片"
  432. print_info "文档ID: $DOC_ID"
  433. print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/images"
  434. RESPONSE=$(curl -s -w "\n%{http_code}" \
  435. -X GET "${ELEMENTS_URL}/${DOC_ID}/images" \
  436. --connect-timeout 10)
  437. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  438. BODY=$(echo "$RESPONSE" | sed '$d')
  439. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  440. if [ "$HTTP_CODE" = "200" ]; then
  441. if [ "$JQ_AVAILABLE" = true ]; then
  442. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  443. print_success "获取图片成功! 共 $COUNT 张"
  444. if [ "$COUNT" -gt 0 ]; then
  445. echo -e "${YELLOW}图片详情:${NC}"
  446. echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] \(.imageUrl) - \(.imageFormat) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
  447. fi
  448. else
  449. print_success "获取图片成功!"
  450. echo "$BODY"
  451. fi
  452. return 0
  453. else
  454. print_error "获取图片失败 (HTTP $HTTP_CODE)"
  455. return 1
  456. fi
  457. }
  458. # 获取表格列表
  459. test_get_tables() {
  460. local DOC_ID=$1
  461. print_step "获取文档表格"
  462. print_info "文档ID: $DOC_ID"
  463. print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/tables"
  464. RESPONSE=$(curl -s -w "\n%{http_code}" \
  465. -X GET "${ELEMENTS_URL}/${DOC_ID}/tables" \
  466. --connect-timeout 10)
  467. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  468. BODY=$(echo "$RESPONSE" | sed '$d')
  469. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  470. if [ "$HTTP_CODE" = "200" ]; then
  471. if [ "$JQ_AVAILABLE" = true ]; then
  472. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  473. print_success "获取表格成功! 共 $COUNT 个"
  474. if [ "$COUNT" -gt 0 ]; then
  475. echo -e "${YELLOW}表格详情:${NC}"
  476. echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
  477. fi
  478. else
  479. print_success "获取表格成功!"
  480. echo "$BODY"
  481. fi
  482. return 0
  483. else
  484. print_error "获取表格失败 (HTTP $HTTP_CODE)"
  485. return 1
  486. fi
  487. }
  488. # ============================================
  489. # 数据源相关测试函数
  490. # ============================================
  491. # 获取文档的 GraphNode 列表
  492. test_get_graph_nodes() {
  493. local DOC_ID=$1
  494. print_step "获取文档 GraphNode 列表"
  495. print_info "文档ID: $DOC_ID"
  496. print_info "请求URL: ${GRAPH_URL}/documents/${DOC_ID}/nodes"
  497. RESPONSE=$(curl -s -w "\n%{http_code}" \
  498. -X GET "${GRAPH_URL}/documents/${DOC_ID}/nodes" \
  499. --connect-timeout 10)
  500. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  501. BODY=$(echo "$RESPONSE" | sed '$d')
  502. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  503. if [ "$HTTP_CODE" = "200" ]; then
  504. if [ "$JQ_AVAILABLE" = true ]; then
  505. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  506. print_success "获取 GraphNode 成功! 共 $COUNT 个"
  507. if [ "$COUNT" -gt 0 ]; then
  508. echo -e "${YELLOW}节点列表 (前10个):${NC}"
  509. echo "$BODY" | jq -r '.data[:10][] | " [\(.id)] \(.nodeType): \(.name)"' 2>/dev/null
  510. # 保存第一个节点ID供后续测试使用
  511. FIRST_NODE_ID=$(echo "$BODY" | jq -r '.data[0].id // empty' 2>/dev/null)
  512. if [ -n "$FIRST_NODE_ID" ] && [ "$FIRST_NODE_ID" != "null" ]; then
  513. echo "$FIRST_NODE_ID" > "${SCRIPT_DIR}/.last_node_id"
  514. print_info "已保存第一个节点ID: $FIRST_NODE_ID"
  515. fi
  516. fi
  517. else
  518. print_success "获取 GraphNode 成功!"
  519. echo "$BODY"
  520. fi
  521. return 0
  522. else
  523. print_error "获取 GraphNode 失败 (HTTP $HTTP_CODE)"
  524. return 1
  525. fi
  526. }
  527. # 获取文档的数据源列表
  528. test_get_datasources() {
  529. local DOC_ID=$1
  530. print_step "获取文档数据源列表"
  531. print_info "文档ID: $DOC_ID"
  532. print_info "请求URL: ${DATASOURCE_URL}/document/${DOC_ID}"
  533. RESPONSE=$(curl -s -w "\n%{http_code}" \
  534. -X GET "${DATASOURCE_URL}/document/${DOC_ID}" \
  535. --connect-timeout 10)
  536. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  537. BODY=$(echo "$RESPONSE" | sed '$d')
  538. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  539. if [ "$HTTP_CODE" = "200" ]; then
  540. if [ "$JQ_AVAILABLE" = true ]; then
  541. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  542. print_success "获取数据源成功! 共 $COUNT 个"
  543. if [ "$COUNT" -gt 0 ]; then
  544. echo -e "${YELLOW}数据源列表:${NC}"
  545. echo "$BODY" | jq -r '.data[] | " [\(.id)] \(.name) (\(.type)) - 值类型: \(.valueType), 聚合: \(.aggregateType)"' 2>/dev/null
  546. fi
  547. else
  548. print_success "获取数据源成功!"
  549. echo "$BODY"
  550. fi
  551. return 0
  552. else
  553. print_error "获取数据源失败 (HTTP $HTTP_CODE)"
  554. return 1
  555. fi
  556. }
  557. # 创建数据源
  558. test_create_datasource() {
  559. local DOC_ID=$1
  560. local NAME=$2
  561. local TYPE=$3
  562. local VALUE_TYPE=${4:-text}
  563. local AGGREGATE_TYPE=${5:-first}
  564. print_step "创建数据源"
  565. print_info "文档ID: $DOC_ID"
  566. print_info "名称: $NAME"
  567. print_info "类型: $TYPE"
  568. print_info "值类型: $VALUE_TYPE"
  569. print_info "聚合方式: $AGGREGATE_TYPE"
  570. # 获取用户ID
  571. local ACTUAL_USER_ID="default-user"
  572. if [ -f "${SCRIPT_DIR}/.last_user_id" ]; then
  573. ACTUAL_USER_ID=$(cat "${SCRIPT_DIR}/.last_user_id")
  574. fi
  575. REQUEST_BODY=$(cat <<EOF
  576. {
  577. "documentId": "$DOC_ID",
  578. "name": "$NAME",
  579. "type": "$TYPE",
  580. "valueType": "$VALUE_TYPE",
  581. "aggregateType": "$AGGREGATE_TYPE"
  582. }
  583. EOF
  584. )
  585. RESPONSE=$(curl -s -w "\n%{http_code}" \
  586. -X POST "$DATASOURCE_URL" \
  587. -H "Content-Type: application/json" \
  588. -H "X-User-Id: $ACTUAL_USER_ID" \
  589. -d "$REQUEST_BODY" \
  590. --connect-timeout 10)
  591. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  592. BODY=$(echo "$RESPONSE" | sed '$d')
  593. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  594. if [ "$HTTP_CODE" = "200" ]; then
  595. if [ "$JQ_AVAILABLE" = true ]; then
  596. DS_ID=$(echo "$BODY" | jq -r '.data.id // empty' 2>/dev/null)
  597. print_success "创建数据源成功!"
  598. print_info "数据源ID: $DS_ID"
  599. echo "$DS_ID" > "${SCRIPT_DIR}/.last_datasource_id"
  600. echo "$BODY" | jq '.data' 2>/dev/null
  601. else
  602. print_success "创建数据源成功!"
  603. echo "$BODY"
  604. fi
  605. return 0
  606. else
  607. print_error "创建数据源失败 (HTTP $HTTP_CODE)"
  608. if [ "$JQ_AVAILABLE" = true ]; then
  609. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  610. else
  611. echo "$BODY"
  612. fi
  613. return 1
  614. fi
  615. }
  616. # 绑定节点到数据源
  617. test_bind_nodes_to_datasource() {
  618. local DS_ID=$1
  619. local NODE_TYPE=$2 # graph_node 或 document_element
  620. local NODE_IDS=$3 # 逗号分隔的节点ID列表
  621. local MODE=${4:-append} # replace/append/remove
  622. print_step "绑定节点到数据源"
  623. print_info "数据源ID: $DS_ID"
  624. print_info "节点类型: $NODE_TYPE"
  625. print_info "节点IDs: $NODE_IDS"
  626. print_info "模式: $MODE"
  627. # 构建 refs 数组
  628. local REFS_ARRAY="["
  629. local FIRST=true
  630. IFS=',' read -ra IDS <<< "$NODE_IDS"
  631. for id in "${IDS[@]}"; do
  632. if [ "$FIRST" = true ]; then
  633. FIRST=false
  634. else
  635. REFS_ARRAY+=","
  636. fi
  637. REFS_ARRAY+="{\"type\":\"$NODE_TYPE\",\"id\":\"$id\"}"
  638. done
  639. REFS_ARRAY+="]"
  640. REQUEST_BODY=$(cat <<EOF
  641. {
  642. "mode": "$MODE",
  643. "refs": $REFS_ARRAY
  644. }
  645. EOF
  646. )
  647. RESPONSE=$(curl -s -w "\n%{http_code}" \
  648. -X PUT "${DATASOURCE_URL}/${DS_ID}/refs" \
  649. -H "Content-Type: application/json" \
  650. -d "$REQUEST_BODY" \
  651. --connect-timeout 10)
  652. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  653. BODY=$(echo "$RESPONSE" | sed '$d')
  654. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  655. if [ "$HTTP_CODE" = "200" ]; then
  656. print_success "绑定节点成功!"
  657. if [ "$JQ_AVAILABLE" = true ]; then
  658. echo "$BODY" | jq '.data' 2>/dev/null
  659. else
  660. echo "$BODY"
  661. fi
  662. return 0
  663. else
  664. print_error "绑定节点失败 (HTTP $HTTP_CODE)"
  665. if [ "$JQ_AVAILABLE" = true ]; then
  666. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  667. else
  668. echo "$BODY"
  669. fi
  670. return 1
  671. fi
  672. }
  673. # 获取数据源的值
  674. test_get_datasource_value() {
  675. local DS_ID=$1
  676. print_step "获取数据源值"
  677. print_info "数据源ID: $DS_ID"
  678. print_info "请求URL: ${DATASOURCE_URL}/${DS_ID}/value"
  679. RESPONSE=$(curl -s -w "\n%{http_code}" \
  680. -X GET "${DATASOURCE_URL}/${DS_ID}/value" \
  681. --connect-timeout 10)
  682. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  683. BODY=$(echo "$RESPONSE" | sed '$d')
  684. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  685. if [ "$HTTP_CODE" = "200" ]; then
  686. print_success "获取数据源值成功!"
  687. if [ "$JQ_AVAILABLE" = true ]; then
  688. echo "$BODY" | jq '.data' 2>/dev/null
  689. else
  690. echo "$BODY"
  691. fi
  692. return 0
  693. else
  694. print_error "获取数据源值失败 (HTTP $HTTP_CODE)"
  695. if [ "$JQ_AVAILABLE" = true ]; then
  696. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  697. else
  698. echo "$BODY"
  699. fi
  700. return 1
  701. fi
  702. }
  703. # 数据源完整测试流程
  704. test_datasource_flow() {
  705. local DOC_ID=$1
  706. print_header "数据源完整测试流程"
  707. # 1. 获取 GraphNode 列表
  708. test_get_graph_nodes "$DOC_ID"
  709. # 2. 获取现有数据源
  710. test_get_datasources "$DOC_ID"
  711. # 3. 创建一个测试数据源
  712. print_step "创建测试数据源"
  713. if test_create_datasource "$DOC_ID" "测试数据源_$(date +%s)" "entity" "text" "concat"; then
  714. DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id" 2>/dev/null)
  715. # 4. 如果有节点,绑定到数据源
  716. if [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
  717. NODE_ID=$(cat "${SCRIPT_DIR}/.last_node_id")
  718. test_bind_nodes_to_datasource "$DS_ID" "graph_node" "$NODE_ID" "append"
  719. # 5. 获取数据源值
  720. test_get_datasource_value "$DS_ID"
  721. else
  722. print_info "没有可用的节点ID,跳过绑定测试"
  723. fi
  724. fi
  725. }
  726. # ============================================
  727. # 任务中心相关测试函数
  728. # ============================================
  729. # 获取任务详情(按文档ID)
  730. test_get_task_detail() {
  731. local DOC_ID=$1
  732. print_step "获取任务详情"
  733. print_info "文档ID: $DOC_ID"
  734. print_info "请求URL: ${TASK_CENTER_URL}/by-document/${DOC_ID}"
  735. RESPONSE=$(curl -s -w "\n%{http_code}" \
  736. -X GET "${TASK_CENTER_URL}/by-document/${DOC_ID}" \
  737. --connect-timeout 10)
  738. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  739. BODY=$(echo "$RESPONSE" | sed '$d')
  740. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  741. if [ "$HTTP_CODE" = "200" ]; then
  742. if [ "$JQ_AVAILABLE" = true ]; then
  743. # 提取关键信息
  744. STATUS=$(echo "$BODY" | jq -r '.data.status // "unknown"' 2>/dev/null)
  745. PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
  746. CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // "-"' 2>/dev/null)
  747. print_success "获取任务详情成功!"
  748. echo -e "${CYAN}══════════════════════════════════════════${NC}"
  749. echo -e " 状态: ${GREEN}$STATUS${NC}"
  750. echo -e " 总进度: ${GREEN}${PROGRESS}%${NC}"
  751. echo -e " 当前步骤: ${GREEN}$CURRENT_STEP${NC}"
  752. echo -e "${CYAN}══════════════════════════════════════════${NC}"
  753. # 显示各阶段详情
  754. echo -e "\n${YELLOW}阶段详情:${NC}"
  755. echo "$BODY" | jq -r '.data.stages[] | " [\(.status | if . == "completed" then "✓" elif . == "in_progress" then "●" elif . == "failed" then "✗" else "○" end)] \(.displayName) - \(.progress)%" + (if .resultSummary then " (\(.resultSummary))" else "" end)' 2>/dev/null
  756. else
  757. print_success "获取任务详情成功!"
  758. echo "$BODY"
  759. fi
  760. return 0
  761. else
  762. print_error "获取任务详情失败 (HTTP $HTTP_CODE)"
  763. if [ "$JQ_AVAILABLE" = true ]; then
  764. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  765. else
  766. echo "$BODY"
  767. fi
  768. return 1
  769. fi
  770. }
  771. # 获取任务列表
  772. test_get_task_list() {
  773. local STATUS_FILTER=${1:-}
  774. local PAGE_NUM=${2:-1}
  775. local PAGE_SIZE=${3:-10}
  776. print_step "获取任务列表"
  777. local URL="${TASK_CENTER_URL}/list?pageNum=${PAGE_NUM}&pageSize=${PAGE_SIZE}"
  778. if [ -n "$STATUS_FILTER" ]; then
  779. URL="${URL}&status=${STATUS_FILTER}"
  780. fi
  781. print_info "请求URL: $URL"
  782. RESPONSE=$(curl -s -w "\n%{http_code}" \
  783. -X GET "$URL" \
  784. --connect-timeout 10)
  785. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  786. BODY=$(echo "$RESPONSE" | sed '$d')
  787. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  788. if [ "$HTTP_CODE" = "200" ]; then
  789. if [ "$JQ_AVAILABLE" = true ]; then
  790. TOTAL=$(echo "$BODY" | jq -r '.data.total // 0' 2>/dev/null)
  791. print_success "获取任务列表成功! 共 $TOTAL 条"
  792. # 显示任务列表
  793. echo -e "\n${YELLOW}任务列表:${NC}"
  794. echo "$BODY" | jq -r '.data.records[] | " [\(.status)] \(.documentId) - 进度: \(.progress)% - \(.currentStep // "-")"' 2>/dev/null
  795. else
  796. print_success "获取任务列表成功!"
  797. echo "$BODY"
  798. fi
  799. return 0
  800. else
  801. print_error "获取任务列表失败 (HTTP $HTTP_CODE)"
  802. return 1
  803. fi
  804. }
  805. # 获取任务统计
  806. test_get_task_statistics() {
  807. print_step "获取任务统计"
  808. print_info "请求URL: ${TASK_CENTER_URL}/statistics"
  809. RESPONSE=$(curl -s -w "\n%{http_code}" \
  810. -X GET "${TASK_CENTER_URL}/statistics" \
  811. --connect-timeout 10)
  812. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  813. BODY=$(echo "$RESPONSE" | sed '$d')
  814. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  815. if [ "$HTTP_CODE" = "200" ]; then
  816. if [ "$JQ_AVAILABLE" = true ]; then
  817. print_success "获取任务统计成功!"
  818. echo -e "${CYAN}══════════════════════════════════════════${NC}"
  819. echo "$BODY" | jq -r '.data | " 总数: \(.total)\n 等待中: \(.pending)\n 处理中: \(.processing)\n 已完成: \(.completed)\n 失败: \(.failed)"' 2>/dev/null
  820. echo -e "${CYAN}══════════════════════════════════════════${NC}"
  821. else
  822. print_success "获取任务统计成功!"
  823. echo "$BODY"
  824. fi
  825. return 0
  826. else
  827. print_error "获取任务统计失败 (HTTP $HTTP_CODE)"
  828. return 1
  829. fi
  830. }
  831. # 轮询任务进度(带实时显示)
  832. poll_task_progress() {
  833. local DOC_ID=$1
  834. local MAX_ATTEMPTS=${2:-120} # 默认最多等待6分钟(120 * 3秒)
  835. local INTERVAL=${3:-3}
  836. print_step "轮询任务进度 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
  837. local LAST_PROGRESS=-1
  838. local LAST_STAGE=""
  839. for ((i=1; i<=MAX_ATTEMPTS; i++)); do
  840. RESPONSE=$(curl -s "${TASK_CENTER_URL}/by-document/${DOC_ID}" --connect-timeout 10)
  841. if [ "$JQ_AVAILABLE" = true ]; then
  842. STATUS=$(echo "$RESPONSE" | jq -r '.data.status // "unknown"' 2>/dev/null)
  843. PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // 0' 2>/dev/null)
  844. CURRENT_STEP=$(echo "$RESPONSE" | jq -r '.data.currentStep // "-"' 2>/dev/null)
  845. # 只在进度变化时显示
  846. if [ "$PROGRESS" != "$LAST_PROGRESS" ] || [ "$CURRENT_STEP" != "$LAST_STAGE" ]; then
  847. # 获取当前阶段的详细进度
  848. STAGE_INFO=$(echo "$RESPONSE" | jq -r '.data.stages[] | select(.status == "in_progress") | "\(.displayName): \(.progress)%"' 2>/dev/null | head -1)
  849. echo -e "\r\033[K${CYAN}[${i}/${MAX_ATTEMPTS}]${NC} 总进度: ${GREEN}${PROGRESS}%${NC} | 阶段: ${YELLOW}${CURRENT_STEP}${NC} | ${STAGE_INFO:-等待中...}"
  850. LAST_PROGRESS=$PROGRESS
  851. LAST_STAGE=$CURRENT_STEP
  852. fi
  853. if [ "$STATUS" = "completed" ]; then
  854. echo ""
  855. print_success "任务完成!"
  856. # 显示最终结果
  857. echo -e "\n${YELLOW}最终结果:${NC}"
  858. echo "$RESPONSE" | jq -r '.data.stages[] | select(.resultSummary != null) | " \(.displayName): \(.resultSummary)"' 2>/dev/null
  859. return 0
  860. elif [ "$STATUS" = "failed" ]; then
  861. echo ""
  862. print_error "任务失败!"
  863. ERROR_MSG=$(echo "$RESPONSE" | jq -r '.data.errorMessage // "未知错误"' 2>/dev/null)
  864. echo -e "${RED}错误信息: $ERROR_MSG${NC}"
  865. return 1
  866. fi
  867. else
  868. echo "第 $i 次查询..."
  869. echo "$RESPONSE"
  870. fi
  871. sleep $INTERVAL
  872. done
  873. echo ""
  874. print_error "轮询超时,任务未完成"
  875. return 1
  876. }
  877. # 显示使用帮助
  878. show_help() {
  879. echo "使用方法: $0 [选项] [host] [port]"
  880. echo ""
  881. echo "端到端测试流程: 上传文件 -> 等待解析 -> 自动处理 -> 数据源操作"
  882. echo ""
  883. echo "选项:"
  884. echo " -h, --help 显示帮助信息"
  885. echo " -e, --e2e 执行完整端到端测试 (默认)"
  886. echo " -u, --upload-only 仅执行上传测试"
  887. echo " -s, --status 仅查询上次上传的文档状态"
  888. echo " -v, --vector 仅执行向量提取(使用上次的文档)"
  889. echo " -n, --ner 仅执行NER提取(使用上次的文档)"
  890. echo " -x, --structured 仅执行结构化解析(提取图片和表格)"
  891. echo " -i, --images 仅获取文档图片列表"
  892. echo " -t, --tables 仅获取文档表格列表"
  893. echo " -g, --nodes 获取文档的 GraphNode 列表"
  894. echo " -d, --datasource 获取文档的数据源列表"
  895. echo " --ds-create 创建数据源 (需要 --name 和 --type)"
  896. echo " --ds-bind 绑定节点到数据源"
  897. echo " --ds-value 获取数据源的值"
  898. echo " --ds-flow 执行数据源完整测试流程"
  899. echo ""
  900. echo " === 任务中心 ==="
  901. echo " --task 获取任务详情(按文档ID)"
  902. echo " --task-list 获取任务列表"
  903. echo " --task-stats 获取任务统计"
  904. echo " --task-poll 轮询任务进度直到完成"
  905. echo ""
  906. echo "数据源相关参数:"
  907. echo " --name NAME 数据源名称"
  908. echo " --type TYPE 数据源类型 (entity/paragraph/image/table)"
  909. echo " --value-type TYPE 值类型 (text/image/table/mixed)"
  910. echo " --aggregate TYPE 聚合方式 (first/last/concat/sum/avg/list)"
  911. echo " --ds-id ID 数据源ID"
  912. echo " --node-type TYPE 节点类型 (graph_node/document_element)"
  913. echo " --node-ids IDS 节点ID列表 (逗号分隔)"
  914. echo ""
  915. echo "示例:"
  916. echo " $0 # 完整端到端测试"
  917. echo " $0 192.168.1.100 5232 # 指定服务器地址"
  918. echo " $0 -u # 仅上传文件"
  919. echo " $0 -s # 查询上次上传的状态"
  920. echo " $0 -g # 获取文档的 GraphNode 列表"
  921. echo " $0 -d # 获取文档的数据源列表"
  922. echo " $0 --ds-flow # 执行数据源完整测试流程"
  923. echo " $0 --ds-create --name '报告编号' --type entity"
  924. echo " $0 --ds-bind --ds-id xxx --node-type graph_node --node-ids 'id1,id2'"
  925. echo " $0 --ds-value --ds-id xxx"
  926. echo ""
  927. echo " === 任务中心示例 ==="
  928. echo " $0 --task # 获取上次文档的任务详情"
  929. echo " $0 --task-list # 获取任务列表"
  930. echo " $0 --task-stats # 获取任务统计"
  931. echo " $0 --task-poll # 轮询任务进度直到完成"
  932. }
  933. # 主函数
  934. main() {
  935. local MODE="e2e" # 默认执行完整端到端测试
  936. # 数据源相关参数
  937. local DS_NAME=""
  938. local DS_TYPE=""
  939. local DS_VALUE_TYPE="text"
  940. local DS_AGGREGATE="first"
  941. local DS_ID=""
  942. local NODE_TYPE="graph_node"
  943. local NODE_IDS=""
  944. # 解析参数
  945. while [[ $# -gt 0 ]]; do
  946. case $1 in
  947. -h|--help)
  948. show_help
  949. exit 0
  950. ;;
  951. -e|--e2e)
  952. MODE="e2e"
  953. shift
  954. ;;
  955. -u|--upload-only)
  956. MODE="upload"
  957. shift
  958. ;;
  959. -s|--status)
  960. MODE="status"
  961. shift
  962. ;;
  963. -v|--vector)
  964. MODE="vector"
  965. shift
  966. ;;
  967. -n|--ner)
  968. MODE="ner"
  969. shift
  970. ;;
  971. -x|--structured)
  972. MODE="structured"
  973. shift
  974. ;;
  975. -i|--images)
  976. MODE="images"
  977. shift
  978. ;;
  979. -t|--tables)
  980. MODE="tables"
  981. shift
  982. ;;
  983. -g|--nodes)
  984. MODE="nodes"
  985. shift
  986. ;;
  987. -d|--datasource)
  988. MODE="datasource"
  989. shift
  990. ;;
  991. --ds-create)
  992. MODE="ds-create"
  993. shift
  994. ;;
  995. --ds-bind)
  996. MODE="ds-bind"
  997. shift
  998. ;;
  999. --ds-value)
  1000. MODE="ds-value"
  1001. shift
  1002. ;;
  1003. --ds-flow)
  1004. MODE="ds-flow"
  1005. shift
  1006. ;;
  1007. --task)
  1008. MODE="task"
  1009. shift
  1010. ;;
  1011. --task-list)
  1012. MODE="task-list"
  1013. shift
  1014. ;;
  1015. --task-stats)
  1016. MODE="task-stats"
  1017. shift
  1018. ;;
  1019. --task-poll)
  1020. MODE="task-poll"
  1021. shift
  1022. ;;
  1023. --name)
  1024. DS_NAME="$2"
  1025. shift 2
  1026. ;;
  1027. --type)
  1028. DS_TYPE="$2"
  1029. shift 2
  1030. ;;
  1031. --value-type)
  1032. DS_VALUE_TYPE="$2"
  1033. shift 2
  1034. ;;
  1035. --aggregate)
  1036. DS_AGGREGATE="$2"
  1037. shift 2
  1038. ;;
  1039. --ds-id)
  1040. DS_ID="$2"
  1041. shift 2
  1042. ;;
  1043. --node-type)
  1044. NODE_TYPE="$2"
  1045. shift 2
  1046. ;;
  1047. --node-ids)
  1048. NODE_IDS="$2"
  1049. shift 2
  1050. ;;
  1051. -p|--poll)
  1052. # 兼容旧参数,等同于e2e
  1053. MODE="e2e"
  1054. shift
  1055. ;;
  1056. *)
  1057. if [[ ! "$1" =~ ^- ]]; then
  1058. if [[ -z "$HOST_SET" ]]; then
  1059. HOST=$1
  1060. HOST_SET=true
  1061. else
  1062. PORT=$1
  1063. fi
  1064. fi
  1065. shift
  1066. ;;
  1067. esac
  1068. done
  1069. # 更新URL
  1070. BASE_URL="http://${HOST}:${PORT}"
  1071. UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
  1072. STATUS_URL="${BASE_URL}/parse/status"
  1073. REGISTER_URL="${BASE_URL}/auth/register"
  1074. TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
  1075. RAG_INDEX_URL="${BASE_URL}/api/rag/index"
  1076. NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
  1077. STRUCTURED_URL="${BASE_URL}/parse/structured"
  1078. ELEMENTS_URL="${BASE_URL}/parse/elements"
  1079. DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
  1080. GRAPH_URL="${BASE_URL}/api/graph"
  1081. TASK_CENTER_URL="${BASE_URL}/api/v1/tasks"
  1082. print_header "文件上传端到端测试"
  1083. echo "目标服务: $BASE_URL"
  1084. echo "测试模式: $MODE"
  1085. echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
  1086. check_dependencies
  1087. # 根据模式执行不同操作
  1088. case $MODE in
  1089. status)
  1090. # 仅查询状态
  1091. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1092. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1093. print_header "查询解析状态"
  1094. test_parse_status "$DOCUMENT_ID"
  1095. else
  1096. print_error "未找到上次上传的文档ID"
  1097. exit 1
  1098. fi
  1099. ;;
  1100. vector)
  1101. # 仅执行向量提取
  1102. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1103. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1104. print_header "向量提取测试"
  1105. if get_document_text "$DOCUMENT_ID"; then
  1106. test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
  1107. fi
  1108. else
  1109. print_error "未找到上次上传的文档ID"
  1110. exit 1
  1111. fi
  1112. ;;
  1113. ner)
  1114. # 仅执行NER提取
  1115. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1116. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1117. print_header "NER 提取测试"
  1118. test_ner_extraction "$DOCUMENT_ID"
  1119. else
  1120. print_error "未找到上次上传的文档ID"
  1121. exit 1
  1122. fi
  1123. ;;
  1124. structured)
  1125. # 仅执行结构化解析
  1126. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1127. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1128. print_header "结构化解析测试"
  1129. test_structured_extraction "$DOCUMENT_ID"
  1130. else
  1131. print_error "未找到上次上传的文档ID"
  1132. exit 1
  1133. fi
  1134. ;;
  1135. images)
  1136. # 仅获取图片列表
  1137. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1138. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1139. print_header "获取文档图片"
  1140. test_get_images "$DOCUMENT_ID"
  1141. else
  1142. print_error "未找到上次上传的文档ID"
  1143. exit 1
  1144. fi
  1145. ;;
  1146. tables)
  1147. # 仅获取表格列表
  1148. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1149. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1150. print_header "获取文档表格"
  1151. test_get_tables "$DOCUMENT_ID"
  1152. else
  1153. print_error "未找到上次上传的文档ID"
  1154. exit 1
  1155. fi
  1156. ;;
  1157. nodes)
  1158. # 获取 GraphNode 列表
  1159. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1160. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1161. print_header "获取文档 GraphNode"
  1162. test_get_graph_nodes "$DOCUMENT_ID"
  1163. else
  1164. print_error "未找到上次上传的文档ID"
  1165. exit 1
  1166. fi
  1167. ;;
  1168. datasource)
  1169. # 获取数据源列表
  1170. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1171. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1172. print_header "获取文档数据源"
  1173. test_get_datasources "$DOCUMENT_ID"
  1174. else
  1175. print_error "未找到上次上传的文档ID"
  1176. exit 1
  1177. fi
  1178. ;;
  1179. ds-create)
  1180. # 创建数据源
  1181. if [ -z "$DS_NAME" ]; then
  1182. print_error "请指定数据源名称 (--name)"
  1183. exit 1
  1184. fi
  1185. if [ -z "$DS_TYPE" ]; then
  1186. print_error "请指定数据源类型 (--type)"
  1187. exit 1
  1188. fi
  1189. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1190. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1191. print_header "创建数据源"
  1192. test_create_datasource "$DOCUMENT_ID" "$DS_NAME" "$DS_TYPE" "$DS_VALUE_TYPE" "$DS_AGGREGATE"
  1193. else
  1194. print_error "未找到上次上传的文档ID"
  1195. exit 1
  1196. fi
  1197. ;;
  1198. ds-bind)
  1199. # 绑定节点到数据源
  1200. if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
  1201. DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
  1202. fi
  1203. if [ -z "$DS_ID" ]; then
  1204. print_error "请指定数据源ID (--ds-id)"
  1205. exit 1
  1206. fi
  1207. if [ -z "$NODE_IDS" ] && [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
  1208. NODE_IDS=$(cat "${SCRIPT_DIR}/.last_node_id")
  1209. fi
  1210. if [ -z "$NODE_IDS" ]; then
  1211. print_error "请指定节点ID (--node-ids)"
  1212. exit 1
  1213. fi
  1214. print_header "绑定节点到数据源"
  1215. test_bind_nodes_to_datasource "$DS_ID" "$NODE_TYPE" "$NODE_IDS" "append"
  1216. ;;
  1217. ds-value)
  1218. # 获取数据源值
  1219. if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
  1220. DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
  1221. fi
  1222. if [ -z "$DS_ID" ]; then
  1223. print_error "请指定数据源ID (--ds-id)"
  1224. exit 1
  1225. fi
  1226. print_header "获取数据源值"
  1227. test_get_datasource_value "$DS_ID"
  1228. ;;
  1229. ds-flow)
  1230. # 数据源完整测试流程
  1231. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1232. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1233. test_datasource_flow "$DOCUMENT_ID"
  1234. else
  1235. print_error "未找到上次上传的文档ID,请先上传文档"
  1236. exit 1
  1237. fi
  1238. ;;
  1239. task)
  1240. # 获取任务详情
  1241. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1242. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1243. print_header "任务详情"
  1244. test_get_task_detail "$DOCUMENT_ID"
  1245. else
  1246. print_error "未找到上次上传的文档ID"
  1247. exit 1
  1248. fi
  1249. ;;
  1250. task-list)
  1251. # 获取任务列表
  1252. print_header "任务列表"
  1253. test_get_task_list
  1254. ;;
  1255. task-stats)
  1256. # 获取任务统计
  1257. print_header "任务统计"
  1258. test_get_task_statistics
  1259. ;;
  1260. task-poll)
  1261. # 轮询任务进度
  1262. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1263. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1264. print_header "轮询任务进度"
  1265. poll_task_progress "$DOCUMENT_ID"
  1266. else
  1267. print_error "未找到上次上传的文档ID"
  1268. exit 1
  1269. fi
  1270. ;;
  1271. upload)
  1272. # 仅上传
  1273. check_test_file
  1274. check_service
  1275. register_test_user
  1276. test_upload
  1277. ;;
  1278. e2e)
  1279. # 完整端到端测试
  1280. # 注意:上传后会自动触发 RAG向量化、结构化解析、NER提取
  1281. check_test_file
  1282. check_service
  1283. register_test_user
  1284. print_header "步骤 1/3: 文件上传"
  1285. print_info "上传后将自动触发: RAG向量化 + 结构化解析 + NER提取"
  1286. test_upload
  1287. if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1288. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1289. fi
  1290. if [ -z "$DOCUMENT_ID" ]; then
  1291. print_error "无法获取文档ID,终止测试"
  1292. exit 1
  1293. fi
  1294. print_header "步骤 2/3: 等待解析完成"
  1295. if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
  1296. print_error "解析未完成,终止测试"
  1297. exit 1
  1298. fi
  1299. # 等待自动处理完成(RAG + 结构化解析 + NER)
  1300. # NER 可能需要较长时间(约4-5分钟),这里先等待一小段时间查看初始结果
  1301. print_header "步骤 3/3: 等待后台自动处理并查看结果"
  1302. print_info "后台正在自动执行: RAG向量化、结构化解析、NER提取"
  1303. print_info "NER 提取可能需要几分钟,可稍后使用 -g 查看 GraphNode 列表"
  1304. print_info "等待 10 秒后查看初始结果..."
  1305. sleep 10
  1306. # 查看结构化解析结果(通常很快完成)
  1307. print_step "查看结构化解析结果"
  1308. test_get_images "$DOCUMENT_ID"
  1309. test_get_tables "$DOCUMENT_ID"
  1310. # 查看 GraphNode(如果 NER 还未完成,可能为空)
  1311. print_step "查看 GraphNode 列表 (NER 结果)"
  1312. test_get_graph_nodes "$DOCUMENT_ID"
  1313. print_info ""
  1314. print_info "提示: NER 提取需要几分钟,可稍后运行 '$0 -g' 查看完整结果"
  1315. ;;
  1316. esac
  1317. print_header "测试完成"
  1318. if [ -n "$DOCUMENT_ID" ]; then
  1319. echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
  1320. fi
  1321. if [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
  1322. echo -e "${GREEN}最后数据源ID: $(cat ${SCRIPT_DIR}/.last_datasource_id)${NC}"
  1323. fi
  1324. echo ""
  1325. echo "可使用以下命令进行后续操作:"
  1326. echo ""
  1327. echo " === 文档处理 ==="
  1328. echo " $0 -s # 查询解析状态"
  1329. echo " $0 -v # 重新向量提取"
  1330. echo " $0 -n # 重新NER提取"
  1331. echo " $0 -x # 结构化解析"
  1332. echo " $0 -i # 获取图片列表"
  1333. echo " $0 -t # 获取表格列表"
  1334. echo ""
  1335. echo " === 数据源操作 ==="
  1336. echo " $0 -g # 获取 GraphNode 列表"
  1337. echo " $0 -d # 获取数据源列表"
  1338. echo " $0 --ds-flow # 执行数据源完整测试"
  1339. echo " $0 --ds-create --name '名称' --type entity"
  1340. echo " $0 --ds-bind --node-ids 'id1,id2'"
  1341. echo " $0 --ds-value"
  1342. echo ""
  1343. echo " === 任务中心 ==="
  1344. echo " $0 --task # 获取任务详情"
  1345. echo " $0 --task-list # 获取任务列表"
  1346. echo " $0 --task-stats # 获取任务统计"
  1347. echo " $0 --task-poll # 轮询任务进度直到完成"
  1348. }
  1349. # 运行主函数
  1350. main "$@"