test_upload_api.sh 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. #!/bin/bash
  2. # ============================================
  3. # 文件上传端到端测试脚本
  4. # ============================================
  5. # 测试流程: 上传 -> 解析等待 -> 自动处理(向量/NER/结构化)-> 数据源操作
  6. # 使用方法: ./test_upload_api.sh [host] [port]
  7. # 示例: ./test_upload_api.sh localhost 5232
  8. # ============================================
  9. # 配置参数
  10. HOST=${1:-localhost}
  11. PORT=${2:-5232}
  12. BASE_URL="http://${HOST}:${PORT}"
  13. UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
  14. STATUS_URL="${BASE_URL}/parse/status"
  15. REGISTER_URL="${BASE_URL}/auth/register"
  16. TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
  17. RAG_INDEX_URL="${BASE_URL}/api/rag/index"
  18. NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
  19. STRUCTURED_URL="${BASE_URL}/parse/structured"
  20. ELEMENTS_URL="${BASE_URL}/parse/elements"
  21. DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
  22. GRAPH_URL="${BASE_URL}/api/graph"
  23. # 测试文件路径(相对于脚本所在目录)
  24. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  25. TEST_FILE="${SCRIPT_DIR}/test.docx"
  26. # 测试用户信息
  27. TIMESTAMP=$(date +%s)
  28. TEST_USERNAME="testuser_${TIMESTAMP}"
  29. TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
  30. TEST_PASSWORD="Test123456!"
  31. USER_ID=""
  32. DOCUMENT_ID=""
  33. DOCUMENT_TEXT=""
  34. # 颜色定义
  35. RED='\033[0;31m'
  36. GREEN='\033[0;32m'
  37. YELLOW='\033[1;33m'
  38. BLUE='\033[0;34m'
  39. CYAN='\033[0;36m'
  40. NC='\033[0m' # No Color
  41. # 输出函数
  42. print_header() {
  43. echo -e "\n${BLUE}============================================${NC}"
  44. echo -e "${BLUE}$1${NC}"
  45. echo -e "${BLUE}============================================${NC}"
  46. }
  47. print_step() {
  48. echo -e "\n${CYAN}>>> $1${NC}"
  49. }
  50. print_success() {
  51. echo -e "${GREEN}✓ $1${NC}"
  52. }
  53. print_error() {
  54. echo -e "${RED}✗ $1${NC}"
  55. }
  56. print_info() {
  57. echo -e "${YELLOW}➤ $1${NC}"
  58. }
  59. # 检查依赖
  60. check_dependencies() {
  61. print_header "检查依赖"
  62. if ! command -v curl &> /dev/null; then
  63. print_error "curl 未安装"
  64. exit 1
  65. fi
  66. print_success "curl 已安装"
  67. if ! command -v jq &> /dev/null; then
  68. print_info "jq 未安装,JSON格式化将不可用"
  69. JQ_AVAILABLE=false
  70. else
  71. print_success "jq 已安装"
  72. JQ_AVAILABLE=true
  73. fi
  74. }
  75. # 检查测试文件
  76. check_test_file() {
  77. print_header "检查测试文件"
  78. if [ ! -f "$TEST_FILE" ]; then
  79. print_error "测试文件不存在: $TEST_FILE"
  80. exit 1
  81. fi
  82. FILE_SIZE=$(stat -c%s "$TEST_FILE" 2>/dev/null || stat -f%z "$TEST_FILE" 2>/dev/null)
  83. print_success "测试文件存在: $TEST_FILE"
  84. print_info "文件大小: $FILE_SIZE bytes"
  85. }
  86. # 检查服务是否可用
  87. check_service() {
  88. print_header "检查服务状态"
  89. print_info "测试服务: $BASE_URL"
  90. HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${BASE_URL}/actuator/health" 2>/dev/null)
  91. if [ "$HTTP_CODE" = "200" ]; then
  92. print_success "服务正常运行 (HTTP $HTTP_CODE)"
  93. elif [ "$HTTP_CODE" = "000" ]; then
  94. print_error "无法连接到服务 $BASE_URL"
  95. print_info "请确保 parse-service 正在运行"
  96. exit 1
  97. else
  98. print_info "健康检查返回 HTTP $HTTP_CODE,继续测试..."
  99. fi
  100. }
  101. # 注册测试用户
  102. register_test_user() {
  103. print_header "注册测试用户"
  104. print_info "用户名: $TEST_USERNAME"
  105. print_info "邮箱: $TEST_EMAIL"
  106. print_info "注册URL: $REGISTER_URL"
  107. RESPONSE=$(curl -s -w "\n%{http_code}" \
  108. -X POST "$REGISTER_URL" \
  109. -H "Content-Type: application/json" \
  110. -d "{\"username\":\"${TEST_USERNAME}\",\"email\":\"${TEST_EMAIL}\",\"password\":\"${TEST_PASSWORD}\",\"confirmPassword\":\"${TEST_PASSWORD}\"}" \
  111. --connect-timeout 10)
  112. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  113. BODY=$(echo "$RESPONSE" | sed '$d')
  114. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  115. if [ "$JQ_AVAILABLE" = true ]; then
  116. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  117. else
  118. echo "$BODY"
  119. fi
  120. # 解析用户ID
  121. if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
  122. print_success "用户注册成功!"
  123. if [ "$JQ_AVAILABLE" = true ]; then
  124. USER_ID=$(echo "$BODY" | jq -r '.data.user.id // .data.userId // .userId // empty' 2>/dev/null)
  125. if [ -z "$USER_ID" ] || [ "$USER_ID" = "null" ]; then
  126. # 尝试其他可能的字段
  127. USER_ID=$(echo "$BODY" | jq -r '.data.id // .id // empty' 2>/dev/null)
  128. fi
  129. if [ -n "$USER_ID" ] && [ "$USER_ID" != "null" ]; then
  130. print_info "用户ID: $USER_ID"
  131. echo "$USER_ID" > "${SCRIPT_DIR}/.last_user_id"
  132. else
  133. print_error "无法从响应中获取用户ID"
  134. echo "响应内容: $BODY"
  135. exit 1
  136. fi
  137. fi
  138. else
  139. print_error "用户注册失败 (HTTP $HTTP_CODE)"
  140. print_info "响应: $BODY"
  141. exit 1
  142. fi
  143. }
  144. # 测试文件上传
  145. test_upload() {
  146. print_step "文件上传"
  147. print_info "上传URL: $UPLOAD_URL"
  148. print_info "用户ID: $USER_ID"
  149. print_info "文件: $TEST_FILE"
  150. echo -e "\n发送请求..."
  151. RESPONSE=$(curl -s -w "\n%{http_code}" \
  152. -X POST "$UPLOAD_URL" \
  153. -H "Content-Type: multipart/form-data" \
  154. -F "file=@${TEST_FILE}" \
  155. -F "userId=${USER_ID}" \
  156. --connect-timeout 10 \
  157. --max-time 300)
  158. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  159. BODY=$(echo "$RESPONSE" | sed '$d')
  160. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  161. echo -e "${YELLOW}响应内容:${NC}"
  162. if [ "$JQ_AVAILABLE" = true ]; then
  163. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  164. else
  165. echo "$BODY"
  166. fi
  167. # 解析响应
  168. if [ "$HTTP_CODE" = "200" ]; then
  169. print_success "文件上传成功!"
  170. # 提取documentId用于后续操作
  171. if [ "$JQ_AVAILABLE" = true ]; then
  172. DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
  173. if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
  174. print_info "文档ID: $DOCUMENT_ID"
  175. echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
  176. else
  177. print_error "无法从响应中获取文档ID"
  178. return 1
  179. fi
  180. fi
  181. return 0
  182. else
  183. print_error "文件上传失败 (HTTP $HTTP_CODE)"
  184. return 1
  185. fi
  186. }
  187. # 测试解析状态查询(单次)
  188. test_parse_status() {
  189. local DOC_ID=$1
  190. print_info "文档ID: $DOC_ID"
  191. print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
  192. RESPONSE=$(curl -s -w "\n%{http_code}" \
  193. -X GET "${STATUS_URL}/${DOC_ID}" \
  194. --connect-timeout 10)
  195. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  196. BODY=$(echo "$RESPONSE" | sed '$d')
  197. echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
  198. echo -e "${YELLOW}响应内容:${NC}"
  199. if [ "$JQ_AVAILABLE" = true ]; then
  200. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  201. # 显示关键状态信息
  202. if [ "$HTTP_CODE" = "200" ]; then
  203. STATUS=$(echo "$BODY" | jq -r '.data.status // empty' 2>/dev/null)
  204. PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
  205. CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // empty' 2>/dev/null)
  206. print_info "状态: $STATUS, 进度: ${PROGRESS}%, 当前步骤: $CURRENT_STEP"
  207. fi
  208. else
  209. echo "$BODY"
  210. fi
  211. if [ "$HTTP_CODE" = "200" ]; then
  212. print_success "状态查询成功!"
  213. else
  214. print_error "状态查询失败 (HTTP $HTTP_CODE)"
  215. fi
  216. }
  217. # 轮询解析状态直到完成
  218. poll_parse_status() {
  219. local DOC_ID=$1
  220. local MAX_ATTEMPTS=${2:-60}
  221. local INTERVAL=${3:-3}
  222. print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
  223. for ((i=1; i<=MAX_ATTEMPTS; i++)); do
  224. RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
  225. if [ "$JQ_AVAILABLE" = true ]; then
  226. # 状态字段为 status,值为: pending/processing/completed/failed
  227. STATUS=$(echo "$RESPONSE" | jq -r '.data.status // .status // empty' 2>/dev/null)
  228. PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // .progress // 0' 2>/dev/null)
  229. echo -ne "\r第 $i 次查询... 状态: $STATUS, 进度: ${PROGRESS}% "
  230. if [ "$STATUS" = "completed" ] || [ "$STATUS" = "COMPLETED" ]; then
  231. echo ""
  232. print_success "解析完成!"
  233. return 0
  234. elif [ "$STATUS" = "failed" ] || [ "$STATUS" = "FAILED" ]; then
  235. echo ""
  236. print_error "解析失败!"
  237. echo "$RESPONSE" | jq .
  238. return 1
  239. fi
  240. else
  241. echo "$RESPONSE"
  242. fi
  243. sleep $INTERVAL
  244. done
  245. echo ""
  246. print_error "轮询超时,解析未完成"
  247. return 1
  248. }
  249. # 获取解析后的文本内容
  250. get_document_text() {
  251. local DOC_ID=$1
  252. print_step "获取文档解析文本"
  253. print_info "文档ID: $DOC_ID"
  254. print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
  255. RESPONSE=$(curl -s -w "\n%{http_code}" \
  256. -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
  257. --connect-timeout 10)
  258. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  259. BODY=$(echo "$RESPONSE" | sed '$d')
  260. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  261. if [ "$HTTP_CODE" = "200" ]; then
  262. if [ "$JQ_AVAILABLE" = true ]; then
  263. # 获取文件路径
  264. FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
  265. if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
  266. print_success "获取文本存储记录成功!"
  267. print_info "文件路径: $FILE_PATH"
  268. # 读取文件内容
  269. if [ -f "$FILE_PATH" ]; then
  270. DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
  271. TEXT_LENGTH=${#DOCUMENT_TEXT}
  272. print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
  273. # 显示前200个字符
  274. echo -e "${YELLOW}文本预览:${NC}"
  275. echo "${DOCUMENT_TEXT:0:200}..."
  276. return 0
  277. else
  278. print_error "文件不存在: $FILE_PATH"
  279. return 1
  280. fi
  281. else
  282. print_error "响应中无文件路径"
  283. echo "$BODY" | jq . 2>/dev/null
  284. return 1
  285. fi
  286. fi
  287. else
  288. print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
  289. echo "$BODY"
  290. return 1
  291. fi
  292. }
  293. # 向量提取(RAG 索引)
  294. test_vector_extraction() {
  295. local DOC_ID=$1
  296. local TEXT=$2
  297. print_step "向量提取 (RAG 索引)"
  298. print_info "文档ID: $DOC_ID"
  299. print_info "文本长度: ${#TEXT} 字符"
  300. print_info "请求URL: $RAG_INDEX_URL"
  301. # 构建请求JSON(需要转义文本中的特殊字符)
  302. if [ "$JQ_AVAILABLE" = true ]; then
  303. REQUEST_BODY=$(jq -n \
  304. --arg docId "$DOC_ID" \
  305. --arg text "$TEXT" \
  306. '{documentId: $docId, text: $text}')
  307. else
  308. # 简单转义
  309. ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
  310. REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
  311. fi
  312. RESPONSE=$(curl -s -w "\n%{http_code}" \
  313. -X POST "$RAG_INDEX_URL" \
  314. -H "Content-Type: application/json" \
  315. -d "$REQUEST_BODY" \
  316. --connect-timeout 30 \
  317. --max-time 300)
  318. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  319. BODY=$(echo "$RESPONSE" | sed '$d')
  320. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  321. echo -e "${YELLOW}响应内容:${NC}"
  322. if [ "$JQ_AVAILABLE" = true ]; then
  323. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  324. else
  325. echo "$BODY"
  326. fi
  327. if [ "$HTTP_CODE" = "200" ]; then
  328. if [ "$JQ_AVAILABLE" = true ]; then
  329. CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
  330. if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
  331. print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
  332. else
  333. print_success "向量提取成功!"
  334. fi
  335. else
  336. print_success "向量提取成功!"
  337. fi
  338. return 0
  339. else
  340. print_error "向量提取失败 (HTTP $HTTP_CODE)"
  341. return 1
  342. fi
  343. }
  344. # NER 提取
  345. test_ner_extraction() {
  346. local DOC_ID=$1
  347. print_step "NER 提取 (命名实体识别)"
  348. print_info "文档ID: $DOC_ID"
  349. print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
  350. RESPONSE=$(curl -s -w "\n%{http_code}" \
  351. -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
  352. -H "Content-Type: application/json" \
  353. --connect-timeout 30 \
  354. --max-time 300)
  355. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  356. BODY=$(echo "$RESPONSE" | sed '$d')
  357. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  358. echo -e "${YELLOW}响应内容:${NC}"
  359. if [ "$JQ_AVAILABLE" = true ]; then
  360. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  361. else
  362. echo "$BODY"
  363. fi
  364. if [ "$HTTP_CODE" = "200" ]; then
  365. if [ "$JQ_AVAILABLE" = true ]; then
  366. ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
  367. RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
  368. if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
  369. print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
  370. else
  371. print_success "NER 提取成功!"
  372. fi
  373. else
  374. print_success "NER 提取成功!"
  375. fi
  376. return 0
  377. else
  378. print_error "NER 提取失败 (HTTP $HTTP_CODE)"
  379. return 1
  380. fi
  381. }
  382. # 结构化解析(提取图片和表格)
  383. test_structured_extraction() {
  384. local DOC_ID=$1
  385. print_step "结构化解析 (提取段落、图片、表格)"
  386. print_info "文档ID: $DOC_ID"
  387. print_info "请求URL: ${STRUCTURED_URL}/${DOC_ID}"
  388. RESPONSE=$(curl -s -w "\n%{http_code}" \
  389. -X GET "${STRUCTURED_URL}/${DOC_ID}" \
  390. --connect-timeout 30 \
  391. --max-time 300)
  392. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  393. BODY=$(echo "$RESPONSE" | sed '$d')
  394. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  395. if [ "$HTTP_CODE" = "200" ]; then
  396. if [ "$JQ_AVAILABLE" = true ]; then
  397. TOTAL=$(echo "$BODY" | jq -r '.data.totalElements // 0' 2>/dev/null)
  398. IMAGE_COUNT=$(echo "$BODY" | jq -r '.data.imageCount // 0' 2>/dev/null)
  399. TABLE_COUNT=$(echo "$BODY" | jq -r '.data.tableCount // 0' 2>/dev/null)
  400. print_success "结构化解析成功!"
  401. print_info "总元素: $TOTAL, 图片: $IMAGE_COUNT, 表格: $TABLE_COUNT"
  402. # 显示图片列表
  403. if [ "$IMAGE_COUNT" -gt 0 ]; then
  404. echo -e "\n${YELLOW}图片列表:${NC}"
  405. echo "$BODY" | jq -r '.data.elements[] | select(.type == "image") | " - \(.imageUrl) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
  406. fi
  407. # 显示表格摘要
  408. if [ "$TABLE_COUNT" -gt 0 ]; then
  409. echo -e "\n${YELLOW}表格列表:${NC}"
  410. echo "$BODY" | jq -r '.data.elements[] | select(.type == "table") | " - 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
  411. fi
  412. else
  413. print_success "结构化解析成功!"
  414. echo "$BODY"
  415. fi
  416. return 0
  417. else
  418. print_error "结构化解析失败 (HTTP $HTTP_CODE)"
  419. if [ "$JQ_AVAILABLE" = true ]; then
  420. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  421. else
  422. echo "$BODY"
  423. fi
  424. return 1
  425. fi
  426. }
  427. # 获取图片列表
  428. test_get_images() {
  429. local DOC_ID=$1
  430. print_step "获取文档图片"
  431. print_info "文档ID: $DOC_ID"
  432. print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/images"
  433. RESPONSE=$(curl -s -w "\n%{http_code}" \
  434. -X GET "${ELEMENTS_URL}/${DOC_ID}/images" \
  435. --connect-timeout 10)
  436. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  437. BODY=$(echo "$RESPONSE" | sed '$d')
  438. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  439. if [ "$HTTP_CODE" = "200" ]; then
  440. if [ "$JQ_AVAILABLE" = true ]; then
  441. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  442. print_success "获取图片成功! 共 $COUNT 张"
  443. if [ "$COUNT" -gt 0 ]; then
  444. echo -e "${YELLOW}图片详情:${NC}"
  445. echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] \(.imageUrl) - \(.imageFormat) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
  446. fi
  447. else
  448. print_success "获取图片成功!"
  449. echo "$BODY"
  450. fi
  451. return 0
  452. else
  453. print_error "获取图片失败 (HTTP $HTTP_CODE)"
  454. return 1
  455. fi
  456. }
  457. # 获取表格列表
  458. test_get_tables() {
  459. local DOC_ID=$1
  460. print_step "获取文档表格"
  461. print_info "文档ID: $DOC_ID"
  462. print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/tables"
  463. RESPONSE=$(curl -s -w "\n%{http_code}" \
  464. -X GET "${ELEMENTS_URL}/${DOC_ID}/tables" \
  465. --connect-timeout 10)
  466. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  467. BODY=$(echo "$RESPONSE" | sed '$d')
  468. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  469. if [ "$HTTP_CODE" = "200" ]; then
  470. if [ "$JQ_AVAILABLE" = true ]; then
  471. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  472. print_success "获取表格成功! 共 $COUNT 个"
  473. if [ "$COUNT" -gt 0 ]; then
  474. echo -e "${YELLOW}表格详情:${NC}"
  475. echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
  476. fi
  477. else
  478. print_success "获取表格成功!"
  479. echo "$BODY"
  480. fi
  481. return 0
  482. else
  483. print_error "获取表格失败 (HTTP $HTTP_CODE)"
  484. return 1
  485. fi
  486. }
  487. # ============================================
  488. # 数据源相关测试函数
  489. # ============================================
  490. # 获取文档的 GraphNode 列表
  491. test_get_graph_nodes() {
  492. local DOC_ID=$1
  493. print_step "获取文档 GraphNode 列表"
  494. print_info "文档ID: $DOC_ID"
  495. print_info "请求URL: ${GRAPH_URL}/documents/${DOC_ID}/nodes"
  496. RESPONSE=$(curl -s -w "\n%{http_code}" \
  497. -X GET "${GRAPH_URL}/documents/${DOC_ID}/nodes" \
  498. --connect-timeout 10)
  499. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  500. BODY=$(echo "$RESPONSE" | sed '$d')
  501. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  502. if [ "$HTTP_CODE" = "200" ]; then
  503. if [ "$JQ_AVAILABLE" = true ]; then
  504. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  505. print_success "获取 GraphNode 成功! 共 $COUNT 个"
  506. if [ "$COUNT" -gt 0 ]; then
  507. echo -e "${YELLOW}节点列表 (前10个):${NC}"
  508. echo "$BODY" | jq -r '.data[:10][] | " [\(.id)] \(.nodeType): \(.name)"' 2>/dev/null
  509. # 保存第一个节点ID供后续测试使用
  510. FIRST_NODE_ID=$(echo "$BODY" | jq -r '.data[0].id // empty' 2>/dev/null)
  511. if [ -n "$FIRST_NODE_ID" ] && [ "$FIRST_NODE_ID" != "null" ]; then
  512. echo "$FIRST_NODE_ID" > "${SCRIPT_DIR}/.last_node_id"
  513. print_info "已保存第一个节点ID: $FIRST_NODE_ID"
  514. fi
  515. fi
  516. else
  517. print_success "获取 GraphNode 成功!"
  518. echo "$BODY"
  519. fi
  520. return 0
  521. else
  522. print_error "获取 GraphNode 失败 (HTTP $HTTP_CODE)"
  523. return 1
  524. fi
  525. }
  526. # 获取文档的数据源列表
  527. test_get_datasources() {
  528. local DOC_ID=$1
  529. print_step "获取文档数据源列表"
  530. print_info "文档ID: $DOC_ID"
  531. print_info "请求URL: ${DATASOURCE_URL}/document/${DOC_ID}"
  532. RESPONSE=$(curl -s -w "\n%{http_code}" \
  533. -X GET "${DATASOURCE_URL}/document/${DOC_ID}" \
  534. --connect-timeout 10)
  535. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  536. BODY=$(echo "$RESPONSE" | sed '$d')
  537. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  538. if [ "$HTTP_CODE" = "200" ]; then
  539. if [ "$JQ_AVAILABLE" = true ]; then
  540. COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
  541. print_success "获取数据源成功! 共 $COUNT 个"
  542. if [ "$COUNT" -gt 0 ]; then
  543. echo -e "${YELLOW}数据源列表:${NC}"
  544. echo "$BODY" | jq -r '.data[] | " [\(.id)] \(.name) (\(.type)) - 值类型: \(.valueType), 聚合: \(.aggregateType)"' 2>/dev/null
  545. fi
  546. else
  547. print_success "获取数据源成功!"
  548. echo "$BODY"
  549. fi
  550. return 0
  551. else
  552. print_error "获取数据源失败 (HTTP $HTTP_CODE)"
  553. return 1
  554. fi
  555. }
  556. # 创建数据源
  557. test_create_datasource() {
  558. local DOC_ID=$1
  559. local NAME=$2
  560. local TYPE=$3
  561. local VALUE_TYPE=${4:-text}
  562. local AGGREGATE_TYPE=${5:-first}
  563. print_step "创建数据源"
  564. print_info "文档ID: $DOC_ID"
  565. print_info "名称: $NAME"
  566. print_info "类型: $TYPE"
  567. print_info "值类型: $VALUE_TYPE"
  568. print_info "聚合方式: $AGGREGATE_TYPE"
  569. # 获取用户ID
  570. local ACTUAL_USER_ID="default-user"
  571. if [ -f "${SCRIPT_DIR}/.last_user_id" ]; then
  572. ACTUAL_USER_ID=$(cat "${SCRIPT_DIR}/.last_user_id")
  573. fi
  574. REQUEST_BODY=$(cat <<EOF
  575. {
  576. "documentId": "$DOC_ID",
  577. "name": "$NAME",
  578. "type": "$TYPE",
  579. "valueType": "$VALUE_TYPE",
  580. "aggregateType": "$AGGREGATE_TYPE"
  581. }
  582. EOF
  583. )
  584. RESPONSE=$(curl -s -w "\n%{http_code}" \
  585. -X POST "$DATASOURCE_URL" \
  586. -H "Content-Type: application/json" \
  587. -H "X-User-Id: $ACTUAL_USER_ID" \
  588. -d "$REQUEST_BODY" \
  589. --connect-timeout 10)
  590. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  591. BODY=$(echo "$RESPONSE" | sed '$d')
  592. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  593. if [ "$HTTP_CODE" = "200" ]; then
  594. if [ "$JQ_AVAILABLE" = true ]; then
  595. DS_ID=$(echo "$BODY" | jq -r '.data.id // empty' 2>/dev/null)
  596. print_success "创建数据源成功!"
  597. print_info "数据源ID: $DS_ID"
  598. echo "$DS_ID" > "${SCRIPT_DIR}/.last_datasource_id"
  599. echo "$BODY" | jq '.data' 2>/dev/null
  600. else
  601. print_success "创建数据源成功!"
  602. echo "$BODY"
  603. fi
  604. return 0
  605. else
  606. print_error "创建数据源失败 (HTTP $HTTP_CODE)"
  607. if [ "$JQ_AVAILABLE" = true ]; then
  608. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  609. else
  610. echo "$BODY"
  611. fi
  612. return 1
  613. fi
  614. }
  615. # 绑定节点到数据源
  616. test_bind_nodes_to_datasource() {
  617. local DS_ID=$1
  618. local NODE_TYPE=$2 # graph_node 或 document_element
  619. local NODE_IDS=$3 # 逗号分隔的节点ID列表
  620. local MODE=${4:-append} # replace/append/remove
  621. print_step "绑定节点到数据源"
  622. print_info "数据源ID: $DS_ID"
  623. print_info "节点类型: $NODE_TYPE"
  624. print_info "节点IDs: $NODE_IDS"
  625. print_info "模式: $MODE"
  626. # 构建 refs 数组
  627. local REFS_ARRAY="["
  628. local FIRST=true
  629. IFS=',' read -ra IDS <<< "$NODE_IDS"
  630. for id in "${IDS[@]}"; do
  631. if [ "$FIRST" = true ]; then
  632. FIRST=false
  633. else
  634. REFS_ARRAY+=","
  635. fi
  636. REFS_ARRAY+="{\"type\":\"$NODE_TYPE\",\"id\":\"$id\"}"
  637. done
  638. REFS_ARRAY+="]"
  639. REQUEST_BODY=$(cat <<EOF
  640. {
  641. "mode": "$MODE",
  642. "refs": $REFS_ARRAY
  643. }
  644. EOF
  645. )
  646. RESPONSE=$(curl -s -w "\n%{http_code}" \
  647. -X PUT "${DATASOURCE_URL}/${DS_ID}/refs" \
  648. -H "Content-Type: application/json" \
  649. -d "$REQUEST_BODY" \
  650. --connect-timeout 10)
  651. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  652. BODY=$(echo "$RESPONSE" | sed '$d')
  653. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  654. if [ "$HTTP_CODE" = "200" ]; then
  655. print_success "绑定节点成功!"
  656. if [ "$JQ_AVAILABLE" = true ]; then
  657. echo "$BODY" | jq '.data' 2>/dev/null
  658. else
  659. echo "$BODY"
  660. fi
  661. return 0
  662. else
  663. print_error "绑定节点失败 (HTTP $HTTP_CODE)"
  664. if [ "$JQ_AVAILABLE" = true ]; then
  665. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  666. else
  667. echo "$BODY"
  668. fi
  669. return 1
  670. fi
  671. }
  672. # 获取数据源的值
  673. test_get_datasource_value() {
  674. local DS_ID=$1
  675. print_step "获取数据源值"
  676. print_info "数据源ID: $DS_ID"
  677. print_info "请求URL: ${DATASOURCE_URL}/${DS_ID}/value"
  678. RESPONSE=$(curl -s -w "\n%{http_code}" \
  679. -X GET "${DATASOURCE_URL}/${DS_ID}/value" \
  680. --connect-timeout 10)
  681. HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  682. BODY=$(echo "$RESPONSE" | sed '$d')
  683. echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
  684. if [ "$HTTP_CODE" = "200" ]; then
  685. print_success "获取数据源值成功!"
  686. if [ "$JQ_AVAILABLE" = true ]; then
  687. echo "$BODY" | jq '.data' 2>/dev/null
  688. else
  689. echo "$BODY"
  690. fi
  691. return 0
  692. else
  693. print_error "获取数据源值失败 (HTTP $HTTP_CODE)"
  694. if [ "$JQ_AVAILABLE" = true ]; then
  695. echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
  696. else
  697. echo "$BODY"
  698. fi
  699. return 1
  700. fi
  701. }
  702. # 数据源完整测试流程
  703. test_datasource_flow() {
  704. local DOC_ID=$1
  705. print_header "数据源完整测试流程"
  706. # 1. 获取 GraphNode 列表
  707. test_get_graph_nodes "$DOC_ID"
  708. # 2. 获取现有数据源
  709. test_get_datasources "$DOC_ID"
  710. # 3. 创建一个测试数据源
  711. print_step "创建测试数据源"
  712. if test_create_datasource "$DOC_ID" "测试数据源_$(date +%s)" "entity" "text" "concat"; then
  713. DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id" 2>/dev/null)
  714. # 4. 如果有节点,绑定到数据源
  715. if [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
  716. NODE_ID=$(cat "${SCRIPT_DIR}/.last_node_id")
  717. test_bind_nodes_to_datasource "$DS_ID" "graph_node" "$NODE_ID" "append"
  718. # 5. 获取数据源值
  719. test_get_datasource_value "$DS_ID"
  720. else
  721. print_info "没有可用的节点ID,跳过绑定测试"
  722. fi
  723. fi
  724. }
  725. # 显示使用帮助
  726. show_help() {
  727. echo "使用方法: $0 [选项] [host] [port]"
  728. echo ""
  729. echo "端到端测试流程: 上传文件 -> 等待解析 -> 自动处理 -> 数据源操作"
  730. echo ""
  731. echo "选项:"
  732. echo " -h, --help 显示帮助信息"
  733. echo " -e, --e2e 执行完整端到端测试 (默认)"
  734. echo " -u, --upload-only 仅执行上传测试"
  735. echo " -s, --status 仅查询上次上传的文档状态"
  736. echo " -v, --vector 仅执行向量提取(使用上次的文档)"
  737. echo " -n, --ner 仅执行NER提取(使用上次的文档)"
  738. echo " -x, --structured 仅执行结构化解析(提取图片和表格)"
  739. echo " -i, --images 仅获取文档图片列表"
  740. echo " -t, --tables 仅获取文档表格列表"
  741. echo " -g, --nodes 获取文档的 GraphNode 列表"
  742. echo " -d, --datasource 获取文档的数据源列表"
  743. echo " --ds-create 创建数据源 (需要 --name 和 --type)"
  744. echo " --ds-bind 绑定节点到数据源"
  745. echo " --ds-value 获取数据源的值"
  746. echo " --ds-flow 执行数据源完整测试流程"
  747. echo ""
  748. echo "数据源相关参数:"
  749. echo " --name NAME 数据源名称"
  750. echo " --type TYPE 数据源类型 (entity/paragraph/image/table)"
  751. echo " --value-type TYPE 值类型 (text/image/table/mixed)"
  752. echo " --aggregate TYPE 聚合方式 (first/last/concat/sum/avg/list)"
  753. echo " --ds-id ID 数据源ID"
  754. echo " --node-type TYPE 节点类型 (graph_node/document_element)"
  755. echo " --node-ids IDS 节点ID列表 (逗号分隔)"
  756. echo ""
  757. echo "示例:"
  758. echo " $0 # 完整端到端测试"
  759. echo " $0 192.168.1.100 5232 # 指定服务器地址"
  760. echo " $0 -u # 仅上传文件"
  761. echo " $0 -s # 查询上次上传的状态"
  762. echo " $0 -g # 获取文档的 GraphNode 列表"
  763. echo " $0 -d # 获取文档的数据源列表"
  764. echo " $0 --ds-flow # 执行数据源完整测试流程"
  765. echo " $0 --ds-create --name '报告编号' --type entity"
  766. echo " $0 --ds-bind --ds-id xxx --node-type graph_node --node-ids 'id1,id2'"
  767. echo " $0 --ds-value --ds-id xxx"
  768. }
  769. # 主函数
  770. main() {
  771. local MODE="e2e" # 默认执行完整端到端测试
  772. # 数据源相关参数
  773. local DS_NAME=""
  774. local DS_TYPE=""
  775. local DS_VALUE_TYPE="text"
  776. local DS_AGGREGATE="first"
  777. local DS_ID=""
  778. local NODE_TYPE="graph_node"
  779. local NODE_IDS=""
  780. # 解析参数
  781. while [[ $# -gt 0 ]]; do
  782. case $1 in
  783. -h|--help)
  784. show_help
  785. exit 0
  786. ;;
  787. -e|--e2e)
  788. MODE="e2e"
  789. shift
  790. ;;
  791. -u|--upload-only)
  792. MODE="upload"
  793. shift
  794. ;;
  795. -s|--status)
  796. MODE="status"
  797. shift
  798. ;;
  799. -v|--vector)
  800. MODE="vector"
  801. shift
  802. ;;
  803. -n|--ner)
  804. MODE="ner"
  805. shift
  806. ;;
  807. -x|--structured)
  808. MODE="structured"
  809. shift
  810. ;;
  811. -i|--images)
  812. MODE="images"
  813. shift
  814. ;;
  815. -t|--tables)
  816. MODE="tables"
  817. shift
  818. ;;
  819. -g|--nodes)
  820. MODE="nodes"
  821. shift
  822. ;;
  823. -d|--datasource)
  824. MODE="datasource"
  825. shift
  826. ;;
  827. --ds-create)
  828. MODE="ds-create"
  829. shift
  830. ;;
  831. --ds-bind)
  832. MODE="ds-bind"
  833. shift
  834. ;;
  835. --ds-value)
  836. MODE="ds-value"
  837. shift
  838. ;;
  839. --ds-flow)
  840. MODE="ds-flow"
  841. shift
  842. ;;
  843. --name)
  844. DS_NAME="$2"
  845. shift 2
  846. ;;
  847. --type)
  848. DS_TYPE="$2"
  849. shift 2
  850. ;;
  851. --value-type)
  852. DS_VALUE_TYPE="$2"
  853. shift 2
  854. ;;
  855. --aggregate)
  856. DS_AGGREGATE="$2"
  857. shift 2
  858. ;;
  859. --ds-id)
  860. DS_ID="$2"
  861. shift 2
  862. ;;
  863. --node-type)
  864. NODE_TYPE="$2"
  865. shift 2
  866. ;;
  867. --node-ids)
  868. NODE_IDS="$2"
  869. shift 2
  870. ;;
  871. -p|--poll)
  872. # 兼容旧参数,等同于e2e
  873. MODE="e2e"
  874. shift
  875. ;;
  876. *)
  877. if [[ ! "$1" =~ ^- ]]; then
  878. if [[ -z "$HOST_SET" ]]; then
  879. HOST=$1
  880. HOST_SET=true
  881. else
  882. PORT=$1
  883. fi
  884. fi
  885. shift
  886. ;;
  887. esac
  888. done
  889. # 更新URL
  890. BASE_URL="http://${HOST}:${PORT}"
  891. UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
  892. STATUS_URL="${BASE_URL}/parse/status"
  893. REGISTER_URL="${BASE_URL}/auth/register"
  894. TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
  895. RAG_INDEX_URL="${BASE_URL}/api/rag/index"
  896. NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
  897. STRUCTURED_URL="${BASE_URL}/parse/structured"
  898. ELEMENTS_URL="${BASE_URL}/parse/elements"
  899. DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
  900. GRAPH_URL="${BASE_URL}/api/graph"
  901. print_header "文件上传端到端测试"
  902. echo "目标服务: $BASE_URL"
  903. echo "测试模式: $MODE"
  904. echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
  905. check_dependencies
  906. # 根据模式执行不同操作
  907. case $MODE in
  908. status)
  909. # 仅查询状态
  910. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  911. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  912. print_header "查询解析状态"
  913. test_parse_status "$DOCUMENT_ID"
  914. else
  915. print_error "未找到上次上传的文档ID"
  916. exit 1
  917. fi
  918. ;;
  919. vector)
  920. # 仅执行向量提取
  921. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  922. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  923. print_header "向量提取测试"
  924. if get_document_text "$DOCUMENT_ID"; then
  925. test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
  926. fi
  927. else
  928. print_error "未找到上次上传的文档ID"
  929. exit 1
  930. fi
  931. ;;
  932. ner)
  933. # 仅执行NER提取
  934. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  935. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  936. print_header "NER 提取测试"
  937. test_ner_extraction "$DOCUMENT_ID"
  938. else
  939. print_error "未找到上次上传的文档ID"
  940. exit 1
  941. fi
  942. ;;
  943. structured)
  944. # 仅执行结构化解析
  945. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  946. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  947. print_header "结构化解析测试"
  948. test_structured_extraction "$DOCUMENT_ID"
  949. else
  950. print_error "未找到上次上传的文档ID"
  951. exit 1
  952. fi
  953. ;;
  954. images)
  955. # 仅获取图片列表
  956. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  957. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  958. print_header "获取文档图片"
  959. test_get_images "$DOCUMENT_ID"
  960. else
  961. print_error "未找到上次上传的文档ID"
  962. exit 1
  963. fi
  964. ;;
  965. tables)
  966. # 仅获取表格列表
  967. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  968. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  969. print_header "获取文档表格"
  970. test_get_tables "$DOCUMENT_ID"
  971. else
  972. print_error "未找到上次上传的文档ID"
  973. exit 1
  974. fi
  975. ;;
  976. nodes)
  977. # 获取 GraphNode 列表
  978. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  979. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  980. print_header "获取文档 GraphNode"
  981. test_get_graph_nodes "$DOCUMENT_ID"
  982. else
  983. print_error "未找到上次上传的文档ID"
  984. exit 1
  985. fi
  986. ;;
  987. datasource)
  988. # 获取数据源列表
  989. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  990. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  991. print_header "获取文档数据源"
  992. test_get_datasources "$DOCUMENT_ID"
  993. else
  994. print_error "未找到上次上传的文档ID"
  995. exit 1
  996. fi
  997. ;;
  998. ds-create)
  999. # 创建数据源
  1000. if [ -z "$DS_NAME" ]; then
  1001. print_error "请指定数据源名称 (--name)"
  1002. exit 1
  1003. fi
  1004. if [ -z "$DS_TYPE" ]; then
  1005. print_error "请指定数据源类型 (--type)"
  1006. exit 1
  1007. fi
  1008. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1009. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1010. print_header "创建数据源"
  1011. test_create_datasource "$DOCUMENT_ID" "$DS_NAME" "$DS_TYPE" "$DS_VALUE_TYPE" "$DS_AGGREGATE"
  1012. else
  1013. print_error "未找到上次上传的文档ID"
  1014. exit 1
  1015. fi
  1016. ;;
  1017. ds-bind)
  1018. # 绑定节点到数据源
  1019. if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
  1020. DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
  1021. fi
  1022. if [ -z "$DS_ID" ]; then
  1023. print_error "请指定数据源ID (--ds-id)"
  1024. exit 1
  1025. fi
  1026. if [ -z "$NODE_IDS" ] && [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
  1027. NODE_IDS=$(cat "${SCRIPT_DIR}/.last_node_id")
  1028. fi
  1029. if [ -z "$NODE_IDS" ]; then
  1030. print_error "请指定节点ID (--node-ids)"
  1031. exit 1
  1032. fi
  1033. print_header "绑定节点到数据源"
  1034. test_bind_nodes_to_datasource "$DS_ID" "$NODE_TYPE" "$NODE_IDS" "append"
  1035. ;;
  1036. ds-value)
  1037. # 获取数据源值
  1038. if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
  1039. DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
  1040. fi
  1041. if [ -z "$DS_ID" ]; then
  1042. print_error "请指定数据源ID (--ds-id)"
  1043. exit 1
  1044. fi
  1045. print_header "获取数据源值"
  1046. test_get_datasource_value "$DS_ID"
  1047. ;;
  1048. ds-flow)
  1049. # 数据源完整测试流程
  1050. if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1051. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1052. test_datasource_flow "$DOCUMENT_ID"
  1053. else
  1054. print_error "未找到上次上传的文档ID,请先上传文档"
  1055. exit 1
  1056. fi
  1057. ;;
  1058. upload)
  1059. # 仅上传
  1060. check_test_file
  1061. check_service
  1062. register_test_user
  1063. test_upload
  1064. ;;
  1065. e2e)
  1066. # 完整端到端测试
  1067. # 注意:上传后会自动触发 RAG向量化、结构化解析、NER提取
  1068. check_test_file
  1069. check_service
  1070. register_test_user
  1071. print_header "步骤 1/3: 文件上传"
  1072. print_info "上传后将自动触发: RAG向量化 + 结构化解析 + NER提取"
  1073. test_upload
  1074. if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
  1075. DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
  1076. fi
  1077. if [ -z "$DOCUMENT_ID" ]; then
  1078. print_error "无法获取文档ID,终止测试"
  1079. exit 1
  1080. fi
  1081. print_header "步骤 2/3: 等待解析完成"
  1082. if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
  1083. print_error "解析未完成,终止测试"
  1084. exit 1
  1085. fi
  1086. # 等待自动处理完成(RAG + 结构化解析 + NER)
  1087. # NER 可能需要较长时间(约4-5分钟),这里先等待一小段时间查看初始结果
  1088. print_header "步骤 3/3: 等待后台自动处理并查看结果"
  1089. print_info "后台正在自动执行: RAG向量化、结构化解析、NER提取"
  1090. print_info "NER 提取可能需要几分钟,可稍后使用 -g 查看 GraphNode 列表"
  1091. print_info "等待 10 秒后查看初始结果..."
  1092. sleep 10
  1093. # 查看结构化解析结果(通常很快完成)
  1094. print_step "查看结构化解析结果"
  1095. test_get_images "$DOCUMENT_ID"
  1096. test_get_tables "$DOCUMENT_ID"
  1097. # 查看 GraphNode(如果 NER 还未完成,可能为空)
  1098. print_step "查看 GraphNode 列表 (NER 结果)"
  1099. test_get_graph_nodes "$DOCUMENT_ID"
  1100. print_info ""
  1101. print_info "提示: NER 提取需要几分钟,可稍后运行 '$0 -g' 查看完整结果"
  1102. ;;
  1103. esac
  1104. print_header "测试完成"
  1105. if [ -n "$DOCUMENT_ID" ]; then
  1106. echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
  1107. fi
  1108. if [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
  1109. echo -e "${GREEN}最后数据源ID: $(cat ${SCRIPT_DIR}/.last_datasource_id)${NC}"
  1110. fi
  1111. echo ""
  1112. echo "可使用以下命令进行后续操作:"
  1113. echo ""
  1114. echo " === 文档处理 ==="
  1115. echo " $0 -s # 查询解析状态"
  1116. echo " $0 -v # 重新向量提取"
  1117. echo " $0 -n # 重新NER提取"
  1118. echo " $0 -x # 结构化解析"
  1119. echo " $0 -i # 获取图片列表"
  1120. echo " $0 -t # 获取表格列表"
  1121. echo ""
  1122. echo " === 数据源操作 ==="
  1123. echo " $0 -g # 获取 GraphNode 列表"
  1124. echo " $0 -d # 获取数据源列表"
  1125. echo " $0 --ds-flow # 执行数据源完整测试"
  1126. echo " $0 --ds-create --name '名称' --type entity"
  1127. echo " $0 --ds-bind --node-ids 'id1,id2'"
  1128. echo " $0 --ds-value"
  1129. }
  1130. # 运行主函数
  1131. main "$@"