| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589 |
- #!/bin/bash
- # ============================================
- # 文件上传端到端测试脚本
- # ============================================
- # 测试流程: 上传 -> 解析等待 -> 自动处理(向量/NER/结构化)-> 数据源操作
- # 使用方法: ./test_upload_api.sh [host] [port]
- # 示例: ./test_upload_api.sh localhost 5232
- # ============================================
- # 配置参数(默认值,会在 main 函数中根据参数更新)
- HOST="localhost"
- PORT="5232"
- BASE_URL=""
- UPLOAD_URL=""
- STATUS_URL=""
- REGISTER_URL=""
- TEXT_STORAGE_URL=""
- RAG_INDEX_URL=""
- NER_DOCUMENT_URL=""
- STRUCTURED_URL=""
- ELEMENTS_URL=""
- DATASOURCE_URL=""
- GRAPH_URL=""
- TASK_CENTER_URL=""
- # 测试文件路径(相对于脚本所在目录)
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- TEST_FILE="${SCRIPT_DIR}/test.docx"
- # 测试用户信息
- TIMESTAMP=$(date +%s)
- TEST_USERNAME="testuser_${TIMESTAMP}"
- TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
- TEST_PASSWORD="Test123456!"
- USER_ID=""
- DOCUMENT_ID=""
- DOCUMENT_TEXT=""
- # 颜色定义
- RED='\033[0;31m'
- GREEN='\033[0;32m'
- YELLOW='\033[1;33m'
- BLUE='\033[0;34m'
- CYAN='\033[0;36m'
- NC='\033[0m' # No Color
- # 输出函数
- print_header() {
- echo -e "\n${BLUE}============================================${NC}"
- echo -e "${BLUE}$1${NC}"
- echo -e "${BLUE}============================================${NC}"
- }
- print_step() {
- echo -e "\n${CYAN}>>> $1${NC}"
- }
- print_success() {
- echo -e "${GREEN}✓ $1${NC}"
- }
- print_error() {
- echo -e "${RED}✗ $1${NC}"
- }
- print_info() {
- echo -e "${YELLOW}➤ $1${NC}"
- }
- # 检查依赖
- check_dependencies() {
- print_header "检查依赖"
-
- if ! command -v curl &> /dev/null; then
- print_error "curl 未安装"
- exit 1
- fi
- print_success "curl 已安装"
-
- if ! command -v jq &> /dev/null; then
- print_info "jq 未安装,JSON格式化将不可用"
- JQ_AVAILABLE=false
- else
- print_success "jq 已安装"
- JQ_AVAILABLE=true
- fi
- }
- # 检查测试文件
- check_test_file() {
- print_header "检查测试文件"
-
- if [ ! -f "$TEST_FILE" ]; then
- print_error "测试文件不存在: $TEST_FILE"
- exit 1
- fi
-
- FILE_SIZE=$(stat -c%s "$TEST_FILE" 2>/dev/null || stat -f%z "$TEST_FILE" 2>/dev/null)
- print_success "测试文件存在: $TEST_FILE"
- print_info "文件大小: $FILE_SIZE bytes"
- }
- # 检查服务是否可用
- check_service() {
- print_header "检查服务状态"
-
- print_info "测试服务: $BASE_URL"
-
- HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${BASE_URL}/actuator/health" 2>/dev/null)
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "服务正常运行 (HTTP $HTTP_CODE)"
- elif [ "$HTTP_CODE" = "000" ]; then
- print_error "无法连接到服务 $BASE_URL"
- print_info "请确保 parse-service 正在运行"
- exit 1
- else
- print_info "健康检查返回 HTTP $HTTP_CODE,继续测试..."
- fi
- }
- # 注册测试用户
- register_test_user() {
- print_header "注册测试用户"
-
- print_info "用户名: $TEST_USERNAME"
- print_info "邮箱: $TEST_EMAIL"
- print_info "注册URL: $REGISTER_URL"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$REGISTER_URL" \
- -H "Content-Type: application/json" \
- -d "{\"username\":\"${TEST_USERNAME}\",\"email\":\"${TEST_EMAIL}\",\"password\":\"${TEST_PASSWORD}\",\"confirmPassword\":\"${TEST_PASSWORD}\"}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- # 解析用户ID
- if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
- print_success "用户注册成功!"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- USER_ID=$(echo "$BODY" | jq -r '.data.user.id // .data.userId // .userId // empty' 2>/dev/null)
- if [ -z "$USER_ID" ] || [ "$USER_ID" = "null" ]; then
- # 尝试其他可能的字段
- USER_ID=$(echo "$BODY" | jq -r '.data.id // .id // empty' 2>/dev/null)
- fi
-
- if [ -n "$USER_ID" ] && [ "$USER_ID" != "null" ]; then
- print_info "用户ID: $USER_ID"
- echo "$USER_ID" > "${SCRIPT_DIR}/.last_user_id"
- else
- print_error "无法从响应中获取用户ID"
- echo "响应内容: $BODY"
- exit 1
- fi
- fi
- else
- print_error "用户注册失败 (HTTP $HTTP_CODE)"
- print_info "响应: $BODY"
- exit 1
- fi
- }
- # 测试文件上传
- test_upload() {
- print_step "文件上传"
-
- print_info "上传URL: $UPLOAD_URL"
- print_info "用户ID: $USER_ID"
- print_info "文件: $TEST_FILE"
-
- echo -e "\n发送请求..."
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$UPLOAD_URL" \
- -H "Content-Type: multipart/form-data" \
- -F "file=@${TEST_FILE}" \
- -F "userId=${USER_ID}" \
- --connect-timeout 10 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- # 解析响应
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "文件上传成功!"
-
- # 提取documentId用于后续操作
- if [ "$JQ_AVAILABLE" = true ]; then
- DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
- if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
- print_info "文档ID: $DOCUMENT_ID"
- echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
- else
- print_error "无法从响应中获取文档ID"
- return 1
- fi
- fi
- return 0
- else
- print_error "文件上传失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 测试解析状态查询(单次)
- test_parse_status() {
- local DOC_ID=$1
-
- print_info "文档ID: $DOC_ID"
- print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${STATUS_URL}/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
-
- # 显示关键状态信息
- if [ "$HTTP_CODE" = "200" ]; then
- STATUS=$(echo "$BODY" | jq -r '.data.status // empty' 2>/dev/null)
- PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
- CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // empty' 2>/dev/null)
- print_info "状态: $STATUS, 进度: ${PROGRESS}%, 当前步骤: $CURRENT_STEP"
- fi
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "状态查询成功!"
- else
- print_error "状态查询失败 (HTTP $HTTP_CODE)"
- fi
- }
- # 轮询解析状态直到完成
- poll_parse_status() {
- local DOC_ID=$1
- local MAX_ATTEMPTS=${2:-60}
- local INTERVAL=${3:-3}
-
- print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
-
- for ((i=1; i<=MAX_ATTEMPTS; i++)); do
- RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
-
- if [ "$JQ_AVAILABLE" = true ]; then
- # 状态字段为 status,值为: pending/processing/completed/failed
- STATUS=$(echo "$RESPONSE" | jq -r '.data.status // .status // empty' 2>/dev/null)
- PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // .progress // 0' 2>/dev/null)
- echo -ne "\r第 $i 次查询... 状态: $STATUS, 进度: ${PROGRESS}% "
-
- if [ "$STATUS" = "completed" ] || [ "$STATUS" = "COMPLETED" ]; then
- echo ""
- print_success "解析完成!"
- return 0
- elif [ "$STATUS" = "failed" ] || [ "$STATUS" = "FAILED" ]; then
- echo ""
- print_error "解析失败!"
- echo "$RESPONSE" | jq .
- return 1
- fi
- else
- echo "$RESPONSE"
- fi
-
- sleep $INTERVAL
- done
-
- echo ""
- print_error "轮询超时,解析未完成"
- return 1
- }
- # 获取解析后的文本内容
- get_document_text() {
- local DOC_ID=$1
-
- print_step "获取文档解析文本"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- # 获取文件路径
- FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
- if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
- print_success "获取文本存储记录成功!"
- print_info "文件路径: $FILE_PATH"
-
- # 读取文件内容
- if [ -f "$FILE_PATH" ]; then
- DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
- TEXT_LENGTH=${#DOCUMENT_TEXT}
- print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
-
- # 显示前200个字符
- echo -e "${YELLOW}文本预览:${NC}"
- echo "${DOCUMENT_TEXT:0:200}..."
- return 0
- else
- print_error "文件不存在: $FILE_PATH"
- return 1
- fi
- else
- print_error "响应中无文件路径"
- echo "$BODY" | jq . 2>/dev/null
- return 1
- fi
- fi
- else
- print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
- echo "$BODY"
- return 1
- fi
- }
- # 向量提取(RAG 索引)
- test_vector_extraction() {
- local DOC_ID=$1
- local TEXT=$2
-
- print_step "向量提取 (RAG 索引)"
-
- print_info "文档ID: $DOC_ID"
- print_info "文本长度: ${#TEXT} 字符"
- print_info "请求URL: $RAG_INDEX_URL"
-
- # 构建请求JSON(需要转义文本中的特殊字符)
- if [ "$JQ_AVAILABLE" = true ]; then
- REQUEST_BODY=$(jq -n \
- --arg docId "$DOC_ID" \
- --arg text "$TEXT" \
- '{documentId: $docId, text: $text}')
- else
- # 简单转义
- ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
- REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
- fi
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$RAG_INDEX_URL" \
- -H "Content-Type: application/json" \
- -d "$REQUEST_BODY" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
- if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
- print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
- else
- print_success "向量提取成功!"
- fi
- else
- print_success "向量提取成功!"
- fi
- return 0
- else
- print_error "向量提取失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # NER 提取
- test_ner_extraction() {
- local DOC_ID=$1
-
- print_step "NER 提取 (命名实体识别)"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
- -H "Content-Type: application/json" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
- RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
- if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
- print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
- else
- print_success "NER 提取成功!"
- fi
- else
- print_success "NER 提取成功!"
- fi
- return 0
- else
- print_error "NER 提取失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 结构化解析(提取图片和表格)
- test_structured_extraction() {
- local DOC_ID=$1
-
- print_step "结构化解析 (提取段落、图片、表格)"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${STRUCTURED_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${STRUCTURED_URL}/${DOC_ID}" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- TOTAL=$(echo "$BODY" | jq -r '.data.totalElements // 0' 2>/dev/null)
- IMAGE_COUNT=$(echo "$BODY" | jq -r '.data.imageCount // 0' 2>/dev/null)
- TABLE_COUNT=$(echo "$BODY" | jq -r '.data.tableCount // 0' 2>/dev/null)
- print_success "结构化解析成功!"
- print_info "总元素: $TOTAL, 图片: $IMAGE_COUNT, 表格: $TABLE_COUNT"
-
- # 显示图片列表
- if [ "$IMAGE_COUNT" -gt 0 ]; then
- echo -e "\n${YELLOW}图片列表:${NC}"
- echo "$BODY" | jq -r '.data.elements[] | select(.type == "image") | " - \(.imageUrl) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
- fi
-
- # 显示表格摘要
- if [ "$TABLE_COUNT" -gt 0 ]; then
- echo -e "\n${YELLOW}表格列表:${NC}"
- echo "$BODY" | jq -r '.data.elements[] | select(.type == "table") | " - 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
- fi
- else
- print_success "结构化解析成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "结构化解析失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 获取图片列表
- test_get_images() {
- local DOC_ID=$1
-
- print_step "获取文档图片"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/images"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${ELEMENTS_URL}/${DOC_ID}/images" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取图片成功! 共 $COUNT 张"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}图片详情:${NC}"
- echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] \(.imageUrl) - \(.imageFormat) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
- fi
- else
- print_success "获取图片成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取图片失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 获取表格列表
- test_get_tables() {
- local DOC_ID=$1
-
- print_step "获取文档表格"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/tables"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${ELEMENTS_URL}/${DOC_ID}/tables" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取表格成功! 共 $COUNT 个"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}表格详情:${NC}"
- echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
- fi
- else
- print_success "获取表格成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取表格失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # ============================================
- # 数据源相关测试函数
- # ============================================
- # 获取文档的 GraphNode 列表
- test_get_graph_nodes() {
- local DOC_ID=$1
-
- print_step "获取文档 GraphNode 列表"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${GRAPH_URL}/documents/${DOC_ID}/nodes"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${GRAPH_URL}/documents/${DOC_ID}/nodes" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取 GraphNode 成功! 共 $COUNT 个"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}节点列表 (前10个):${NC}"
- echo "$BODY" | jq -r '.data[:10][] | " [\(.id)] \(.nodeType): \(.name)"' 2>/dev/null
-
- # 保存第一个节点ID供后续测试使用
- FIRST_NODE_ID=$(echo "$BODY" | jq -r '.data[0].id // empty' 2>/dev/null)
- if [ -n "$FIRST_NODE_ID" ] && [ "$FIRST_NODE_ID" != "null" ]; then
- echo "$FIRST_NODE_ID" > "${SCRIPT_DIR}/.last_node_id"
- print_info "已保存第一个节点ID: $FIRST_NODE_ID"
- fi
- fi
- else
- print_success "获取 GraphNode 成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取 GraphNode 失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 获取文档的数据源列表
- test_get_datasources() {
- local DOC_ID=$1
-
- print_step "获取文档数据源列表"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${DATASOURCE_URL}/document/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${DATASOURCE_URL}/document/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取数据源成功! 共 $COUNT 个"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}数据源列表:${NC}"
- echo "$BODY" | jq -r '.data[] | " [\(.id)] \(.name) (\(.type)) - 值类型: \(.valueType), 聚合: \(.aggregateType)"' 2>/dev/null
- fi
- else
- print_success "获取数据源成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取数据源失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 创建数据源
- test_create_datasource() {
- local DOC_ID=$1
- local NAME=$2
- local TYPE=$3
- local VALUE_TYPE=${4:-text}
- local AGGREGATE_TYPE=${5:-first}
-
- print_step "创建数据源"
-
- print_info "文档ID: $DOC_ID"
- print_info "名称: $NAME"
- print_info "类型: $TYPE"
- print_info "值类型: $VALUE_TYPE"
- print_info "聚合方式: $AGGREGATE_TYPE"
-
- # 获取用户ID
- local ACTUAL_USER_ID="default-user"
- if [ -f "${SCRIPT_DIR}/.last_user_id" ]; then
- ACTUAL_USER_ID=$(cat "${SCRIPT_DIR}/.last_user_id")
- fi
-
- REQUEST_BODY=$(cat <<EOF
- {
- "documentId": "$DOC_ID",
- "name": "$NAME",
- "type": "$TYPE",
- "valueType": "$VALUE_TYPE",
- "aggregateType": "$AGGREGATE_TYPE"
- }
- EOF
- )
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$DATASOURCE_URL" \
- -H "Content-Type: application/json" \
- -H "X-User-Id: $ACTUAL_USER_ID" \
- -d "$REQUEST_BODY" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- DS_ID=$(echo "$BODY" | jq -r '.data.id // empty' 2>/dev/null)
- print_success "创建数据源成功!"
- print_info "数据源ID: $DS_ID"
- echo "$DS_ID" > "${SCRIPT_DIR}/.last_datasource_id"
- echo "$BODY" | jq '.data' 2>/dev/null
- else
- print_success "创建数据源成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "创建数据源失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 绑定节点到数据源
- test_bind_nodes_to_datasource() {
- local DS_ID=$1
- local NODE_TYPE=$2 # graph_node 或 document_element
- local NODE_IDS=$3 # 逗号分隔的节点ID列表
- local MODE=${4:-append} # replace/append/remove
-
- print_step "绑定节点到数据源"
-
- print_info "数据源ID: $DS_ID"
- print_info "节点类型: $NODE_TYPE"
- print_info "节点IDs: $NODE_IDS"
- print_info "模式: $MODE"
-
- # 构建 refs 数组
- local REFS_ARRAY="["
- local FIRST=true
- IFS=',' read -ra IDS <<< "$NODE_IDS"
- for id in "${IDS[@]}"; do
- if [ "$FIRST" = true ]; then
- FIRST=false
- else
- REFS_ARRAY+=","
- fi
- REFS_ARRAY+="{\"type\":\"$NODE_TYPE\",\"id\":\"$id\"}"
- done
- REFS_ARRAY+="]"
-
- REQUEST_BODY=$(cat <<EOF
- {
- "mode": "$MODE",
- "refs": $REFS_ARRAY
- }
- EOF
- )
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X PUT "${DATASOURCE_URL}/${DS_ID}/refs" \
- -H "Content-Type: application/json" \
- -d "$REQUEST_BODY" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "绑定节点成功!"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq '.data' 2>/dev/null
- else
- echo "$BODY"
- fi
- return 0
- else
- print_error "绑定节点失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 获取数据源的值
- test_get_datasource_value() {
- local DS_ID=$1
-
- print_step "获取数据源值"
-
- print_info "数据源ID: $DS_ID"
- print_info "请求URL: ${DATASOURCE_URL}/${DS_ID}/value"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${DATASOURCE_URL}/${DS_ID}/value" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "获取数据源值成功!"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq '.data' 2>/dev/null
- else
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取数据源值失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 数据源完整测试流程
- test_datasource_flow() {
- local DOC_ID=$1
-
- print_header "数据源完整测试流程"
-
- # 1. 获取 GraphNode 列表
- test_get_graph_nodes "$DOC_ID"
-
- # 2. 获取现有数据源
- test_get_datasources "$DOC_ID"
-
- # 3. 创建一个测试数据源
- print_step "创建测试数据源"
- if test_create_datasource "$DOC_ID" "测试数据源_$(date +%s)" "entity" "text" "concat"; then
- DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id" 2>/dev/null)
-
- # 4. 如果有节点,绑定到数据源
- if [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
- NODE_ID=$(cat "${SCRIPT_DIR}/.last_node_id")
- test_bind_nodes_to_datasource "$DS_ID" "graph_node" "$NODE_ID" "append"
-
- # 5. 获取数据源值
- test_get_datasource_value "$DS_ID"
- else
- print_info "没有可用的节点ID,跳过绑定测试"
- fi
- fi
- }
- # ============================================
- # 任务中心相关测试函数
- # ============================================
- # 获取任务详情(按文档ID)
- test_get_task_detail() {
- local DOC_ID=$1
-
- print_step "获取任务详情"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${TASK_CENTER_URL}/by-document/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${TASK_CENTER_URL}/by-document/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- # 提取关键信息
- STATUS=$(echo "$BODY" | jq -r '.data.status // "unknown"' 2>/dev/null)
- PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
- CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // "-"' 2>/dev/null)
-
- print_success "获取任务详情成功!"
- echo -e "${CYAN}══════════════════════════════════════════${NC}"
- echo -e " 状态: ${GREEN}$STATUS${NC}"
- echo -e " 总进度: ${GREEN}${PROGRESS}%${NC}"
- echo -e " 当前步骤: ${GREEN}$CURRENT_STEP${NC}"
- echo -e "${CYAN}══════════════════════════════════════════${NC}"
-
- # 显示各阶段详情
- echo -e "\n${YELLOW}阶段详情:${NC}"
- echo "$BODY" | jq -r '.data.stages[] | " [\(.status | if . == "completed" then "✓" elif . == "in_progress" then "●" elif . == "failed" then "✗" else "○" end)] \(.displayName) - \(.progress)%" + (if .resultSummary then " (\(.resultSummary))" else "" end)' 2>/dev/null
-
- else
- print_success "获取任务详情成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取任务详情失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 获取任务列表
- test_get_task_list() {
- local STATUS_FILTER=${1:-}
- local PAGE_NUM=${2:-1}
- local PAGE_SIZE=${3:-10}
-
- print_step "获取任务列表"
-
- local URL="${TASK_CENTER_URL}/list?pageNum=${PAGE_NUM}&pageSize=${PAGE_SIZE}"
- if [ -n "$STATUS_FILTER" ]; then
- URL="${URL}&status=${STATUS_FILTER}"
- fi
-
- print_info "请求URL: $URL"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "$URL" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- TOTAL=$(echo "$BODY" | jq -r '.data.total // 0' 2>/dev/null)
- print_success "获取任务列表成功! 共 $TOTAL 条"
-
- # 显示任务列表
- echo -e "\n${YELLOW}任务列表:${NC}"
- echo "$BODY" | jq -r '.data.records[] | " [\(.status)] \(.documentId) - 进度: \(.progress)% - \(.currentStep // "-")"' 2>/dev/null
- else
- print_success "获取任务列表成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取任务列表失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 获取任务统计
- test_get_task_statistics() {
- print_step "获取任务统计"
-
- print_info "请求URL: ${TASK_CENTER_URL}/statistics"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${TASK_CENTER_URL}/statistics" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- print_success "获取任务统计成功!"
- echo -e "${CYAN}══════════════════════════════════════════${NC}"
- echo "$BODY" | jq -r '.data | " 总数: \(.total)\n 等待中: \(.pending)\n 处理中: \(.processing)\n 已完成: \(.completed)\n 失败: \(.failed)"' 2>/dev/null
- echo -e "${CYAN}══════════════════════════════════════════${NC}"
- else
- print_success "获取任务统计成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取任务统计失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 轮询任务进度(带实时显示)
- poll_task_progress() {
- local DOC_ID=$1
- local MAX_ATTEMPTS=${2:-120} # 默认最多等待6分钟(120 * 3秒)
- local INTERVAL=${3:-3}
-
- print_step "轮询任务进度 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
-
- local LAST_PROGRESS=-1
- local LAST_STAGE=""
-
- for ((i=1; i<=MAX_ATTEMPTS; i++)); do
- RESPONSE=$(curl -s "${TASK_CENTER_URL}/by-document/${DOC_ID}" --connect-timeout 10)
-
- if [ "$JQ_AVAILABLE" = true ]; then
- STATUS=$(echo "$RESPONSE" | jq -r '.data.status // "unknown"' 2>/dev/null)
- PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // 0' 2>/dev/null)
- CURRENT_STEP=$(echo "$RESPONSE" | jq -r '.data.currentStep // "-"' 2>/dev/null)
-
- # 只在进度变化时显示
- if [ "$PROGRESS" != "$LAST_PROGRESS" ] || [ "$CURRENT_STEP" != "$LAST_STAGE" ]; then
- # 获取当前阶段的详细进度
- STAGE_INFO=$(echo "$RESPONSE" | jq -r '.data.stages[] | select(.status == "in_progress") | "\(.displayName): \(.progress)%"' 2>/dev/null | head -1)
-
- echo -e "\r\033[K${CYAN}[${i}/${MAX_ATTEMPTS}]${NC} 总进度: ${GREEN}${PROGRESS}%${NC} | 阶段: ${YELLOW}${CURRENT_STEP}${NC} | ${STAGE_INFO:-等待中...}"
-
- LAST_PROGRESS=$PROGRESS
- LAST_STAGE=$CURRENT_STEP
- fi
-
- if [ "$STATUS" = "completed" ]; then
- echo ""
- print_success "任务完成!"
-
- # 显示最终结果
- echo -e "\n${YELLOW}最终结果:${NC}"
- echo "$RESPONSE" | jq -r '.data.stages[] | select(.resultSummary != null) | " \(.displayName): \(.resultSummary)"' 2>/dev/null
-
- return 0
- elif [ "$STATUS" = "failed" ]; then
- echo ""
- print_error "任务失败!"
- ERROR_MSG=$(echo "$RESPONSE" | jq -r '.data.errorMessage // "未知错误"' 2>/dev/null)
- echo -e "${RED}错误信息: $ERROR_MSG${NC}"
- return 1
- fi
- else
- echo "第 $i 次查询..."
- echo "$RESPONSE"
- fi
-
- sleep $INTERVAL
- done
-
- echo ""
- print_error "轮询超时,任务未完成"
- return 1
- }
- # 显示使用帮助
- show_help() {
- echo "使用方法: $0 [选项] [host] [port]"
- echo ""
- echo "端到端测试流程: 上传文件 -> 等待解析 -> 自动处理 -> 数据源操作"
- echo ""
- echo "选项:"
- echo " -h, --help 显示帮助信息"
- echo " -e, --e2e 执行完整端到端测试 (默认)"
- echo " -u, --upload-only 仅执行上传测试"
- echo " -s, --status 仅查询上次上传的文档状态"
- echo " -v, --vector 仅执行向量提取(使用上次的文档)"
- echo " -n, --ner 仅执行NER提取(使用上次的文档)"
- echo " -x, --structured 仅执行结构化解析(提取图片和表格)"
- echo " -i, --images 仅获取文档图片列表"
- echo " -t, --tables 仅获取文档表格列表"
- echo " -g, --nodes 获取文档的 GraphNode 列表"
- echo " -d, --datasource 获取文档的数据源列表"
- echo " --ds-create 创建数据源 (需要 --name 和 --type)"
- echo " --ds-bind 绑定节点到数据源"
- echo " --ds-value 获取数据源的值"
- echo " --ds-flow 执行数据源完整测试流程"
- echo ""
- echo " === 任务中心 ==="
- echo " --task 获取任务详情(按文档ID)"
- echo " --task-list 获取任务列表"
- echo " --task-stats 获取任务统计"
- echo " --task-poll 轮询任务进度直到完成"
- echo " --doc-id ID 指定文档ID(用于 --task/--task-poll 等)"
- echo ""
- echo "数据源相关参数:"
- echo " --name NAME 数据源名称"
- echo " --type TYPE 数据源类型 (entity/paragraph/image/table)"
- echo " --value-type TYPE 值类型 (text/image/table/mixed)"
- echo " --aggregate TYPE 聚合方式 (first/last/concat/sum/avg/list)"
- echo " --ds-id ID 数据源ID"
- echo " --node-type TYPE 节点类型 (graph_node/document_element)"
- echo " --node-ids IDS 节点ID列表 (逗号分隔)"
- echo ""
- echo "示例:"
- echo " $0 # 完整端到端测试"
- echo " $0 192.168.1.100 5232 # 指定服务器地址"
- echo " $0 -u # 仅上传文件"
- echo " $0 -s # 查询上次上传的状态"
- echo " $0 -g # 获取文档的 GraphNode 列表"
- echo " $0 -d # 获取文档的数据源列表"
- echo " $0 --ds-flow # 执行数据源完整测试流程"
- echo " $0 --ds-create --name '报告编号' --type entity"
- echo " $0 --ds-bind --ds-id xxx --node-type graph_node --node-ids 'id1,id2'"
- echo " $0 --ds-value --ds-id xxx"
- echo ""
- echo " === 任务中心示例 ==="
- echo " $0 --task # 获取上次文档的任务详情"
- echo " $0 --task-list # 获取任务列表"
- echo " $0 --task-stats # 获取任务统计"
- echo " $0 --task-poll # 轮询任务进度直到完成"
- }
- # 主函数
- main() {
- local MODE="e2e" # 默认执行完整端到端测试
-
- # 数据源相关参数
- local DS_NAME=""
- local DS_TYPE=""
- local DS_VALUE_TYPE="text"
- local DS_AGGREGATE="first"
- local DS_ID=""
- local NODE_TYPE="graph_node"
- local NODE_IDS=""
-
- # 指定文档ID
- local SPECIFIED_DOC_ID=""
-
- # 解析参数
- while [[ $# -gt 0 ]]; do
- case $1 in
- -h|--help)
- show_help
- exit 0
- ;;
- -e|--e2e)
- MODE="e2e"
- shift
- ;;
- -u|--upload-only)
- MODE="upload"
- shift
- ;;
- -s|--status)
- MODE="status"
- shift
- ;;
- -v|--vector)
- MODE="vector"
- shift
- ;;
- -n|--ner)
- MODE="ner"
- shift
- ;;
- -x|--structured)
- MODE="structured"
- shift
- ;;
- -i|--images)
- MODE="images"
- shift
- ;;
- -t|--tables)
- MODE="tables"
- shift
- ;;
- -g|--nodes)
- MODE="nodes"
- shift
- ;;
- -d|--datasource)
- MODE="datasource"
- shift
- ;;
- --ds-create)
- MODE="ds-create"
- shift
- ;;
- --ds-bind)
- MODE="ds-bind"
- shift
- ;;
- --ds-value)
- MODE="ds-value"
- shift
- ;;
- --ds-flow)
- MODE="ds-flow"
- shift
- ;;
- --task)
- MODE="task"
- shift
- ;;
- --task-list)
- MODE="task-list"
- shift
- ;;
- --task-stats)
- MODE="task-stats"
- shift
- ;;
- --task-poll)
- MODE="task-poll"
- shift
- ;;
- --doc-id)
- SPECIFIED_DOC_ID="$2"
- shift 2
- ;;
- --name)
- DS_NAME="$2"
- shift 2
- ;;
- --type)
- DS_TYPE="$2"
- shift 2
- ;;
- --value-type)
- DS_VALUE_TYPE="$2"
- shift 2
- ;;
- --aggregate)
- DS_AGGREGATE="$2"
- shift 2
- ;;
- --ds-id)
- DS_ID="$2"
- shift 2
- ;;
- --node-type)
- NODE_TYPE="$2"
- shift 2
- ;;
- --node-ids)
- NODE_IDS="$2"
- shift 2
- ;;
- -p|--poll)
- # 兼容旧参数,等同于e2e
- MODE="e2e"
- shift
- ;;
- *)
- if [[ ! "$1" =~ ^- ]]; then
- if [[ -z "$HOST_SET" ]]; then
- HOST=$1
- HOST_SET=true
- else
- PORT=$1
- fi
- fi
- shift
- ;;
- esac
- done
-
- # 更新URL
- BASE_URL="http://${HOST}:${PORT}"
- UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
- STATUS_URL="${BASE_URL}/parse/status"
- REGISTER_URL="${BASE_URL}/auth/register"
- TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
- RAG_INDEX_URL="${BASE_URL}/api/rag/index"
- NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
- STRUCTURED_URL="${BASE_URL}/parse/structured"
- ELEMENTS_URL="${BASE_URL}/parse/elements"
- DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
- GRAPH_URL="${BASE_URL}/api/graph"
- TASK_CENTER_URL="${BASE_URL}/api/v1/tasks"
-
- print_header "文件上传端到端测试"
- echo "目标服务: $BASE_URL"
- echo "测试模式: $MODE"
- echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
-
- check_dependencies
-
- # 根据模式执行不同操作
- case $MODE in
- status)
- # 仅查询状态
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "查询解析状态"
- test_parse_status "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- vector)
- # 仅执行向量提取
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "向量提取测试"
- if get_document_text "$DOCUMENT_ID"; then
- test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
- fi
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ner)
- # 仅执行NER提取
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "NER 提取测试"
- test_ner_extraction "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- structured)
- # 仅执行结构化解析
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "结构化解析测试"
- test_structured_extraction "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- images)
- # 仅获取图片列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档图片"
- test_get_images "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- tables)
- # 仅获取表格列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档表格"
- test_get_tables "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- nodes)
- # 获取 GraphNode 列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档 GraphNode"
- test_get_graph_nodes "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- datasource)
- # 获取数据源列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档数据源"
- test_get_datasources "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ds-create)
- # 创建数据源
- if [ -z "$DS_NAME" ]; then
- print_error "请指定数据源名称 (--name)"
- exit 1
- fi
- if [ -z "$DS_TYPE" ]; then
- print_error "请指定数据源类型 (--type)"
- exit 1
- fi
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "创建数据源"
- test_create_datasource "$DOCUMENT_ID" "$DS_NAME" "$DS_TYPE" "$DS_VALUE_TYPE" "$DS_AGGREGATE"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ds-bind)
- # 绑定节点到数据源
- if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
- DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
- fi
- if [ -z "$DS_ID" ]; then
- print_error "请指定数据源ID (--ds-id)"
- exit 1
- fi
- if [ -z "$NODE_IDS" ] && [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
- NODE_IDS=$(cat "${SCRIPT_DIR}/.last_node_id")
- fi
- if [ -z "$NODE_IDS" ]; then
- print_error "请指定节点ID (--node-ids)"
- exit 1
- fi
- print_header "绑定节点到数据源"
- test_bind_nodes_to_datasource "$DS_ID" "$NODE_TYPE" "$NODE_IDS" "append"
- ;;
-
- ds-value)
- # 获取数据源值
- if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
- DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
- fi
- if [ -z "$DS_ID" ]; then
- print_error "请指定数据源ID (--ds-id)"
- exit 1
- fi
- print_header "获取数据源值"
- test_get_datasource_value "$DS_ID"
- ;;
-
- ds-flow)
- # 数据源完整测试流程
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- test_datasource_flow "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID,请先上传文档"
- exit 1
- fi
- ;;
-
- task)
- # 获取任务详情
- if [ -n "$SPECIFIED_DOC_ID" ]; then
- DOCUMENT_ID="$SPECIFIED_DOC_ID"
- elif [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- else
- print_error "未找到文档ID,请使用 --doc-id 指定"
- exit 1
- fi
- print_header "任务详情"
- test_get_task_detail "$DOCUMENT_ID"
- ;;
-
- task-list)
- # 获取任务列表
- print_header "任务列表"
- test_get_task_list
- ;;
-
- task-stats)
- # 获取任务统计
- print_header "任务统计"
- test_get_task_statistics
- ;;
-
- task-poll)
- # 轮询任务进度
- if [ -n "$SPECIFIED_DOC_ID" ]; then
- DOCUMENT_ID="$SPECIFIED_DOC_ID"
- elif [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- else
- print_error "未找到文档ID,请使用 --doc-id 指定"
- exit 1
- fi
- print_header "轮询任务进度"
- poll_task_progress "$DOCUMENT_ID"
- ;;
-
- upload)
- # 仅上传
- check_test_file
- check_service
- register_test_user
- test_upload
- ;;
-
- e2e)
- # 完整端到端测试
- # 上传后自动轮询任务进度直到完成
- check_test_file
- check_service
- register_test_user
-
- print_header "步骤 1/2: 文件上传"
- print_info "上传后将自动触发: RAG向量化 + 结构化解析 + NER提取"
- test_upload
-
- if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- fi
-
- if [ -z "$DOCUMENT_ID" ]; then
- print_error "无法获取文档ID,终止测试"
- exit 1
- fi
-
- print_header "步骤 2/2: 轮询任务进度直到完成"
- print_info "正在等待所有处理阶段完成..."
- print_info "阶段: 文本解析 → RAG向量化 → 结构化解析 → NER实体提取 → 图构建"
- print_info "NER 提取可能需要 5-10 分钟,请耐心等待..."
- echo ""
-
- # 轮询任务进度直到完成(最多等待15分钟)
- if poll_task_progress "$DOCUMENT_ID" 300 3; then
- echo ""
- print_header "查看最终结果"
-
- # 查看结构化解析结果
- print_step "结构化解析结果"
- test_get_images "$DOCUMENT_ID"
- test_get_tables "$DOCUMENT_ID"
-
- # 查看 GraphNode
- print_step "NER 提取结果 (GraphNode)"
- test_get_graph_nodes "$DOCUMENT_ID"
- else
- print_error "任务未在预期时间内完成"
- print_info "可以稍后使用以下命令查看进度:"
- print_info " $0 --task --doc-id $DOCUMENT_ID"
- fi
- ;;
- esac
-
- print_header "测试完成"
- if [ -n "$DOCUMENT_ID" ]; then
- echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
- fi
- if [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
- echo -e "${GREEN}最后数据源ID: $(cat ${SCRIPT_DIR}/.last_datasource_id)${NC}"
- fi
- echo ""
- echo "可使用以下命令进行后续操作:"
- echo ""
- echo " === 文档处理 ==="
- echo " $0 -s # 查询解析状态"
- echo " $0 -v # 重新向量提取"
- echo " $0 -n # 重新NER提取"
- echo " $0 -x # 结构化解析"
- echo " $0 -i # 获取图片列表"
- echo " $0 -t # 获取表格列表"
- echo ""
- echo " === 数据源操作 ==="
- echo " $0 -g # 获取 GraphNode 列表"
- echo " $0 -d # 获取数据源列表"
- echo " $0 --ds-flow # 执行数据源完整测试"
- echo " $0 --ds-create --name '名称' --type entity"
- echo " $0 --ds-bind --node-ids 'id1,id2'"
- echo " $0 --ds-value"
- echo ""
- echo " === 任务中心 ==="
- echo " $0 --task # 获取任务详情"
- echo " $0 --task-list # 获取任务列表"
- echo " $0 --task-stats # 获取任务统计"
- echo " $0 --task-poll # 轮询任务进度直到完成"
- }
- # 运行主函数
- main "$@"
|