| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317 |
- #!/bin/bash
- # ============================================
- # 文件上传端到端测试脚本
- # ============================================
- # 测试流程: 上传 -> 解析等待 -> 自动处理(向量/NER/结构化)-> 数据源操作
- # 使用方法: ./test_upload_api.sh [host] [port]
- # 示例: ./test_upload_api.sh localhost 5232
- # ============================================
- # 配置参数
- HOST=${1:-localhost}
- PORT=${2:-5232}
- BASE_URL="http://${HOST}:${PORT}"
- UPLOAD_URL="${BASE_URL}/api/v1/files/upload"
- STATUS_URL="${BASE_URL}/parse/status"
- REGISTER_URL="${BASE_URL}/auth/register"
- TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
- RAG_INDEX_URL="${BASE_URL}/api/rag/index"
- NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
- STRUCTURED_URL="${BASE_URL}/parse/structured"
- ELEMENTS_URL="${BASE_URL}/parse/elements"
- DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
- GRAPH_URL="${BASE_URL}/api/graph"
- # 测试文件路径(相对于脚本所在目录)
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- TEST_FILE="${SCRIPT_DIR}/test.docx"
- # 测试用户信息
- TIMESTAMP=$(date +%s)
- TEST_USERNAME="testuser_${TIMESTAMP}"
- TEST_EMAIL="testuser_${TIMESTAMP}@test.com"
- TEST_PASSWORD="Test123456!"
- USER_ID=""
- DOCUMENT_ID=""
- DOCUMENT_TEXT=""
- # 颜色定义
- RED='\033[0;31m'
- GREEN='\033[0;32m'
- YELLOW='\033[1;33m'
- BLUE='\033[0;34m'
- CYAN='\033[0;36m'
- NC='\033[0m' # No Color
- # 输出函数
- print_header() {
- echo -e "\n${BLUE}============================================${NC}"
- echo -e "${BLUE}$1${NC}"
- echo -e "${BLUE}============================================${NC}"
- }
- print_step() {
- echo -e "\n${CYAN}>>> $1${NC}"
- }
- print_success() {
- echo -e "${GREEN}✓ $1${NC}"
- }
- print_error() {
- echo -e "${RED}✗ $1${NC}"
- }
- print_info() {
- echo -e "${YELLOW}➤ $1${NC}"
- }
- # 检查依赖
- check_dependencies() {
- print_header "检查依赖"
-
- if ! command -v curl &> /dev/null; then
- print_error "curl 未安装"
- exit 1
- fi
- print_success "curl 已安装"
-
- if ! command -v jq &> /dev/null; then
- print_info "jq 未安装,JSON格式化将不可用"
- JQ_AVAILABLE=false
- else
- print_success "jq 已安装"
- JQ_AVAILABLE=true
- fi
- }
- # 检查测试文件
- check_test_file() {
- print_header "检查测试文件"
-
- if [ ! -f "$TEST_FILE" ]; then
- print_error "测试文件不存在: $TEST_FILE"
- exit 1
- fi
-
- FILE_SIZE=$(stat -c%s "$TEST_FILE" 2>/dev/null || stat -f%z "$TEST_FILE" 2>/dev/null)
- print_success "测试文件存在: $TEST_FILE"
- print_info "文件大小: $FILE_SIZE bytes"
- }
- # 检查服务是否可用
- check_service() {
- print_header "检查服务状态"
-
- print_info "测试服务: $BASE_URL"
-
- HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${BASE_URL}/actuator/health" 2>/dev/null)
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "服务正常运行 (HTTP $HTTP_CODE)"
- elif [ "$HTTP_CODE" = "000" ]; then
- print_error "无法连接到服务 $BASE_URL"
- print_info "请确保 parse-service 正在运行"
- exit 1
- else
- print_info "健康检查返回 HTTP $HTTP_CODE,继续测试..."
- fi
- }
- # 注册测试用户
- register_test_user() {
- print_header "注册测试用户"
-
- print_info "用户名: $TEST_USERNAME"
- print_info "邮箱: $TEST_EMAIL"
- print_info "注册URL: $REGISTER_URL"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$REGISTER_URL" \
- -H "Content-Type: application/json" \
- -d "{\"username\":\"${TEST_USERNAME}\",\"email\":\"${TEST_EMAIL}\",\"password\":\"${TEST_PASSWORD}\",\"confirmPassword\":\"${TEST_PASSWORD}\"}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- # 解析用户ID
- if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
- print_success "用户注册成功!"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- USER_ID=$(echo "$BODY" | jq -r '.data.user.id // .data.userId // .userId // empty' 2>/dev/null)
- if [ -z "$USER_ID" ] || [ "$USER_ID" = "null" ]; then
- # 尝试其他可能的字段
- USER_ID=$(echo "$BODY" | jq -r '.data.id // .id // empty' 2>/dev/null)
- fi
-
- if [ -n "$USER_ID" ] && [ "$USER_ID" != "null" ]; then
- print_info "用户ID: $USER_ID"
- echo "$USER_ID" > "${SCRIPT_DIR}/.last_user_id"
- else
- print_error "无法从响应中获取用户ID"
- echo "响应内容: $BODY"
- exit 1
- fi
- fi
- else
- print_error "用户注册失败 (HTTP $HTTP_CODE)"
- print_info "响应: $BODY"
- exit 1
- fi
- }
- # 测试文件上传
- test_upload() {
- print_step "文件上传"
-
- print_info "上传URL: $UPLOAD_URL"
- print_info "用户ID: $USER_ID"
- print_info "文件: $TEST_FILE"
-
- echo -e "\n发送请求..."
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$UPLOAD_URL" \
- -H "Content-Type: multipart/form-data" \
- -F "file=@${TEST_FILE}" \
- -F "userId=${USER_ID}" \
- --connect-timeout 10 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- # 解析响应
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "文件上传成功!"
-
- # 提取documentId用于后续操作
- if [ "$JQ_AVAILABLE" = true ]; then
- DOCUMENT_ID=$(echo "$BODY" | jq -r '.data.documentId // .documentId // empty' 2>/dev/null)
- if [ -n "$DOCUMENT_ID" ] && [ "$DOCUMENT_ID" != "null" ]; then
- print_info "文档ID: $DOCUMENT_ID"
- echo "$DOCUMENT_ID" > "${SCRIPT_DIR}/.last_document_id"
- else
- print_error "无法从响应中获取文档ID"
- return 1
- fi
- fi
- return 0
- else
- print_error "文件上传失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 测试解析状态查询(单次)
- test_parse_status() {
- local DOC_ID=$1
-
- print_info "文档ID: $DOC_ID"
- print_info "状态URL: ${STATUS_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${STATUS_URL}/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "\n${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
-
- # 显示关键状态信息
- if [ "$HTTP_CODE" = "200" ]; then
- STATUS=$(echo "$BODY" | jq -r '.data.status // empty' 2>/dev/null)
- PROGRESS=$(echo "$BODY" | jq -r '.data.progress // 0' 2>/dev/null)
- CURRENT_STEP=$(echo "$BODY" | jq -r '.data.currentStep // empty' 2>/dev/null)
- print_info "状态: $STATUS, 进度: ${PROGRESS}%, 当前步骤: $CURRENT_STEP"
- fi
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "状态查询成功!"
- else
- print_error "状态查询失败 (HTTP $HTTP_CODE)"
- fi
- }
- # 轮询解析状态直到完成
- poll_parse_status() {
- local DOC_ID=$1
- local MAX_ATTEMPTS=${2:-60}
- local INTERVAL=${3:-3}
-
- print_step "轮询解析状态 (最多${MAX_ATTEMPTS}次, 间隔${INTERVAL}秒)"
-
- for ((i=1; i<=MAX_ATTEMPTS; i++)); do
- RESPONSE=$(curl -s "${STATUS_URL}/${DOC_ID}" --connect-timeout 10)
-
- if [ "$JQ_AVAILABLE" = true ]; then
- # 状态字段为 status,值为: pending/processing/completed/failed
- STATUS=$(echo "$RESPONSE" | jq -r '.data.status // .status // empty' 2>/dev/null)
- PROGRESS=$(echo "$RESPONSE" | jq -r '.data.progress // .progress // 0' 2>/dev/null)
- echo -ne "\r第 $i 次查询... 状态: $STATUS, 进度: ${PROGRESS}% "
-
- if [ "$STATUS" = "completed" ] || [ "$STATUS" = "COMPLETED" ]; then
- echo ""
- print_success "解析完成!"
- return 0
- elif [ "$STATUS" = "failed" ] || [ "$STATUS" = "FAILED" ]; then
- echo ""
- print_error "解析失败!"
- echo "$RESPONSE" | jq .
- return 1
- fi
- else
- echo "$RESPONSE"
- fi
-
- sleep $INTERVAL
- done
-
- echo ""
- print_error "轮询超时,解析未完成"
- return 1
- }
- # 获取解析后的文本内容
- get_document_text() {
- local DOC_ID=$1
-
- print_step "获取文档解析文本"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${TEXT_STORAGE_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${TEXT_STORAGE_URL}/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- # 获取文件路径
- FILE_PATH=$(echo "$BODY" | jq -r '.data.filePath // empty' 2>/dev/null)
- if [ -n "$FILE_PATH" ] && [ "$FILE_PATH" != "null" ]; then
- print_success "获取文本存储记录成功!"
- print_info "文件路径: $FILE_PATH"
-
- # 读取文件内容
- if [ -f "$FILE_PATH" ]; then
- DOCUMENT_TEXT=$(cat "$FILE_PATH" 2>/dev/null)
- TEXT_LENGTH=${#DOCUMENT_TEXT}
- print_success "读取文本成功 (长度: $TEXT_LENGTH 字符)"
-
- # 显示前200个字符
- echo -e "${YELLOW}文本预览:${NC}"
- echo "${DOCUMENT_TEXT:0:200}..."
- return 0
- else
- print_error "文件不存在: $FILE_PATH"
- return 1
- fi
- else
- print_error "响应中无文件路径"
- echo "$BODY" | jq . 2>/dev/null
- return 1
- fi
- fi
- else
- print_error "获取文本存储失败 (HTTP $HTTP_CODE)"
- echo "$BODY"
- return 1
- fi
- }
- # 向量提取(RAG 索引)
- test_vector_extraction() {
- local DOC_ID=$1
- local TEXT=$2
-
- print_step "向量提取 (RAG 索引)"
-
- print_info "文档ID: $DOC_ID"
- print_info "文本长度: ${#TEXT} 字符"
- print_info "请求URL: $RAG_INDEX_URL"
-
- # 构建请求JSON(需要转义文本中的特殊字符)
- if [ "$JQ_AVAILABLE" = true ]; then
- REQUEST_BODY=$(jq -n \
- --arg docId "$DOC_ID" \
- --arg text "$TEXT" \
- '{documentId: $docId, text: $text}')
- else
- # 简单转义
- ESCAPED_TEXT=$(echo "$TEXT" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
- REQUEST_BODY="{\"documentId\":\"${DOC_ID}\",\"text\":\"${ESCAPED_TEXT}\"}"
- fi
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$RAG_INDEX_URL" \
- -H "Content-Type: application/json" \
- -d "$REQUEST_BODY" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- CHUNK_COUNT=$(echo "$BODY" | jq -r '.data.chunkCount // empty' 2>/dev/null)
- if [ -n "$CHUNK_COUNT" ] && [ "$CHUNK_COUNT" != "null" ]; then
- print_success "向量提取成功! 生成 $CHUNK_COUNT 个分块"
- else
- print_success "向量提取成功!"
- fi
- else
- print_success "向量提取成功!"
- fi
- return 0
- else
- print_error "向量提取失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # NER 提取
- test_ner_extraction() {
- local DOC_ID=$1
-
- print_step "NER 提取 (命名实体识别)"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${NER_DOCUMENT_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "${NER_DOCUMENT_URL}/${DOC_ID}" \
- -H "Content-Type: application/json" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
- echo -e "${YELLOW}响应内容:${NC}"
-
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- ENTITY_COUNT=$(echo "$BODY" | jq -r '.data.entityCount // empty' 2>/dev/null)
- RELATION_COUNT=$(echo "$BODY" | jq -r '.data.relationCount // empty' 2>/dev/null)
- if [ -n "$ENTITY_COUNT" ] && [ "$ENTITY_COUNT" != "null" ]; then
- print_success "NER 提取成功! 实体: $ENTITY_COUNT, 关系: $RELATION_COUNT"
- else
- print_success "NER 提取成功!"
- fi
- else
- print_success "NER 提取成功!"
- fi
- return 0
- else
- print_error "NER 提取失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 结构化解析(提取图片和表格)
- test_structured_extraction() {
- local DOC_ID=$1
-
- print_step "结构化解析 (提取段落、图片、表格)"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${STRUCTURED_URL}/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${STRUCTURED_URL}/${DOC_ID}" \
- --connect-timeout 30 \
- --max-time 300)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- TOTAL=$(echo "$BODY" | jq -r '.data.totalElements // 0' 2>/dev/null)
- IMAGE_COUNT=$(echo "$BODY" | jq -r '.data.imageCount // 0' 2>/dev/null)
- TABLE_COUNT=$(echo "$BODY" | jq -r '.data.tableCount // 0' 2>/dev/null)
- print_success "结构化解析成功!"
- print_info "总元素: $TOTAL, 图片: $IMAGE_COUNT, 表格: $TABLE_COUNT"
-
- # 显示图片列表
- if [ "$IMAGE_COUNT" -gt 0 ]; then
- echo -e "\n${YELLOW}图片列表:${NC}"
- echo "$BODY" | jq -r '.data.elements[] | select(.type == "image") | " - \(.imageUrl) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
- fi
-
- # 显示表格摘要
- if [ "$TABLE_COUNT" -gt 0 ]; then
- echo -e "\n${YELLOW}表格列表:${NC}"
- echo "$BODY" | jq -r '.data.elements[] | select(.type == "table") | " - 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
- fi
- else
- print_success "结构化解析成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "结构化解析失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 获取图片列表
- test_get_images() {
- local DOC_ID=$1
-
- print_step "获取文档图片"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/images"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${ELEMENTS_URL}/${DOC_ID}/images" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取图片成功! 共 $COUNT 张"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}图片详情:${NC}"
- echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] \(.imageUrl) - \(.imageFormat) (\(.imageWidth)x\(.imageHeight))"' 2>/dev/null
- fi
- else
- print_success "获取图片成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取图片失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 获取表格列表
- test_get_tables() {
- local DOC_ID=$1
-
- print_step "获取文档表格"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${ELEMENTS_URL}/${DOC_ID}/tables"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${ELEMENTS_URL}/${DOC_ID}/tables" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取表格成功! 共 $COUNT 个"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}表格详情:${NC}"
- echo "$BODY" | jq -r '.data[] | " [\(.elementIndex)] 表格\(.tableIndex): \(.tableRowCount)行 x \(.tableColCount)列"' 2>/dev/null
- fi
- else
- print_success "获取表格成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取表格失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # ============================================
- # 数据源相关测试函数
- # ============================================
- # 获取文档的 GraphNode 列表
- test_get_graph_nodes() {
- local DOC_ID=$1
-
- print_step "获取文档 GraphNode 列表"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${GRAPH_URL}/documents/${DOC_ID}/nodes"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${GRAPH_URL}/documents/${DOC_ID}/nodes" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取 GraphNode 成功! 共 $COUNT 个"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}节点列表 (前10个):${NC}"
- echo "$BODY" | jq -r '.data[:10][] | " [\(.id)] \(.nodeType): \(.name)"' 2>/dev/null
-
- # 保存第一个节点ID供后续测试使用
- FIRST_NODE_ID=$(echo "$BODY" | jq -r '.data[0].id // empty' 2>/dev/null)
- if [ -n "$FIRST_NODE_ID" ] && [ "$FIRST_NODE_ID" != "null" ]; then
- echo "$FIRST_NODE_ID" > "${SCRIPT_DIR}/.last_node_id"
- print_info "已保存第一个节点ID: $FIRST_NODE_ID"
- fi
- fi
- else
- print_success "获取 GraphNode 成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取 GraphNode 失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 获取文档的数据源列表
- test_get_datasources() {
- local DOC_ID=$1
-
- print_step "获取文档数据源列表"
-
- print_info "文档ID: $DOC_ID"
- print_info "请求URL: ${DATASOURCE_URL}/document/${DOC_ID}"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${DATASOURCE_URL}/document/${DOC_ID}" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
- print_success "获取数据源成功! 共 $COUNT 个"
-
- if [ "$COUNT" -gt 0 ]; then
- echo -e "${YELLOW}数据源列表:${NC}"
- echo "$BODY" | jq -r '.data[] | " [\(.id)] \(.name) (\(.type)) - 值类型: \(.valueType), 聚合: \(.aggregateType)"' 2>/dev/null
- fi
- else
- print_success "获取数据源成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取数据源失败 (HTTP $HTTP_CODE)"
- return 1
- fi
- }
- # 创建数据源
- test_create_datasource() {
- local DOC_ID=$1
- local NAME=$2
- local TYPE=$3
- local VALUE_TYPE=${4:-text}
- local AGGREGATE_TYPE=${5:-first}
-
- print_step "创建数据源"
-
- print_info "文档ID: $DOC_ID"
- print_info "名称: $NAME"
- print_info "类型: $TYPE"
- print_info "值类型: $VALUE_TYPE"
- print_info "聚合方式: $AGGREGATE_TYPE"
-
- # 获取用户ID
- local ACTUAL_USER_ID="default-user"
- if [ -f "${SCRIPT_DIR}/.last_user_id" ]; then
- ACTUAL_USER_ID=$(cat "${SCRIPT_DIR}/.last_user_id")
- fi
-
- REQUEST_BODY=$(cat <<EOF
- {
- "documentId": "$DOC_ID",
- "name": "$NAME",
- "type": "$TYPE",
- "valueType": "$VALUE_TYPE",
- "aggregateType": "$AGGREGATE_TYPE"
- }
- EOF
- )
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X POST "$DATASOURCE_URL" \
- -H "Content-Type: application/json" \
- -H "X-User-Id: $ACTUAL_USER_ID" \
- -d "$REQUEST_BODY" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- if [ "$JQ_AVAILABLE" = true ]; then
- DS_ID=$(echo "$BODY" | jq -r '.data.id // empty' 2>/dev/null)
- print_success "创建数据源成功!"
- print_info "数据源ID: $DS_ID"
- echo "$DS_ID" > "${SCRIPT_DIR}/.last_datasource_id"
- echo "$BODY" | jq '.data' 2>/dev/null
- else
- print_success "创建数据源成功!"
- echo "$BODY"
- fi
- return 0
- else
- print_error "创建数据源失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 绑定节点到数据源
- test_bind_nodes_to_datasource() {
- local DS_ID=$1
- local NODE_TYPE=$2 # graph_node 或 document_element
- local NODE_IDS=$3 # 逗号分隔的节点ID列表
- local MODE=${4:-append} # replace/append/remove
-
- print_step "绑定节点到数据源"
-
- print_info "数据源ID: $DS_ID"
- print_info "节点类型: $NODE_TYPE"
- print_info "节点IDs: $NODE_IDS"
- print_info "模式: $MODE"
-
- # 构建 refs 数组
- local REFS_ARRAY="["
- local FIRST=true
- IFS=',' read -ra IDS <<< "$NODE_IDS"
- for id in "${IDS[@]}"; do
- if [ "$FIRST" = true ]; then
- FIRST=false
- else
- REFS_ARRAY+=","
- fi
- REFS_ARRAY+="{\"type\":\"$NODE_TYPE\",\"id\":\"$id\"}"
- done
- REFS_ARRAY+="]"
-
- REQUEST_BODY=$(cat <<EOF
- {
- "mode": "$MODE",
- "refs": $REFS_ARRAY
- }
- EOF
- )
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X PUT "${DATASOURCE_URL}/${DS_ID}/refs" \
- -H "Content-Type: application/json" \
- -d "$REQUEST_BODY" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "绑定节点成功!"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq '.data' 2>/dev/null
- else
- echo "$BODY"
- fi
- return 0
- else
- print_error "绑定节点失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 获取数据源的值
- test_get_datasource_value() {
- local DS_ID=$1
-
- print_step "获取数据源值"
-
- print_info "数据源ID: $DS_ID"
- print_info "请求URL: ${DATASOURCE_URL}/${DS_ID}/value"
-
- RESPONSE=$(curl -s -w "\n%{http_code}" \
- -X GET "${DATASOURCE_URL}/${DS_ID}/value" \
- --connect-timeout 10)
-
- HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
- BODY=$(echo "$RESPONSE" | sed '$d')
-
- echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
-
- if [ "$HTTP_CODE" = "200" ]; then
- print_success "获取数据源值成功!"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq '.data' 2>/dev/null
- else
- echo "$BODY"
- fi
- return 0
- else
- print_error "获取数据源值失败 (HTTP $HTTP_CODE)"
- if [ "$JQ_AVAILABLE" = true ]; then
- echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
- else
- echo "$BODY"
- fi
- return 1
- fi
- }
- # 数据源完整测试流程
- test_datasource_flow() {
- local DOC_ID=$1
-
- print_header "数据源完整测试流程"
-
- # 1. 获取 GraphNode 列表
- test_get_graph_nodes "$DOC_ID"
-
- # 2. 获取现有数据源
- test_get_datasources "$DOC_ID"
-
- # 3. 创建一个测试数据源
- print_step "创建测试数据源"
- if test_create_datasource "$DOC_ID" "测试数据源_$(date +%s)" "entity" "text" "concat"; then
- DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id" 2>/dev/null)
-
- # 4. 如果有节点,绑定到数据源
- if [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
- NODE_ID=$(cat "${SCRIPT_DIR}/.last_node_id")
- test_bind_nodes_to_datasource "$DS_ID" "graph_node" "$NODE_ID" "append"
-
- # 5. 获取数据源值
- test_get_datasource_value "$DS_ID"
- else
- print_info "没有可用的节点ID,跳过绑定测试"
- fi
- fi
- }
- # 显示使用帮助
- show_help() {
- echo "使用方法: $0 [选项] [host] [port]"
- echo ""
- echo "端到端测试流程: 上传文件 -> 等待解析 -> 自动处理 -> 数据源操作"
- echo ""
- echo "选项:"
- echo " -h, --help 显示帮助信息"
- echo " -e, --e2e 执行完整端到端测试 (默认)"
- echo " -u, --upload-only 仅执行上传测试"
- echo " -s, --status 仅查询上次上传的文档状态"
- echo " -v, --vector 仅执行向量提取(使用上次的文档)"
- echo " -n, --ner 仅执行NER提取(使用上次的文档)"
- echo " -x, --structured 仅执行结构化解析(提取图片和表格)"
- echo " -i, --images 仅获取文档图片列表"
- echo " -t, --tables 仅获取文档表格列表"
- echo " -g, --nodes 获取文档的 GraphNode 列表"
- echo " -d, --datasource 获取文档的数据源列表"
- echo " --ds-create 创建数据源 (需要 --name 和 --type)"
- echo " --ds-bind 绑定节点到数据源"
- echo " --ds-value 获取数据源的值"
- echo " --ds-flow 执行数据源完整测试流程"
- echo ""
- echo "数据源相关参数:"
- echo " --name NAME 数据源名称"
- echo " --type TYPE 数据源类型 (entity/paragraph/image/table)"
- echo " --value-type TYPE 值类型 (text/image/table/mixed)"
- echo " --aggregate TYPE 聚合方式 (first/last/concat/sum/avg/list)"
- echo " --ds-id ID 数据源ID"
- echo " --node-type TYPE 节点类型 (graph_node/document_element)"
- echo " --node-ids IDS 节点ID列表 (逗号分隔)"
- echo ""
- echo "示例:"
- echo " $0 # 完整端到端测试"
- echo " $0 192.168.1.100 5232 # 指定服务器地址"
- echo " $0 -u # 仅上传文件"
- echo " $0 -s # 查询上次上传的状态"
- echo " $0 -g # 获取文档的 GraphNode 列表"
- echo " $0 -d # 获取文档的数据源列表"
- echo " $0 --ds-flow # 执行数据源完整测试流程"
- echo " $0 --ds-create --name '报告编号' --type entity"
- echo " $0 --ds-bind --ds-id xxx --node-type graph_node --node-ids 'id1,id2'"
- echo " $0 --ds-value --ds-id xxx"
- }
- # 主函数
- main() {
- local MODE="e2e" # 默认执行完整端到端测试
-
- # 数据源相关参数
- local DS_NAME=""
- local DS_TYPE=""
- local DS_VALUE_TYPE="text"
- local DS_AGGREGATE="first"
- local DS_ID=""
- local NODE_TYPE="graph_node"
- local NODE_IDS=""
-
- # 解析参数
- while [[ $# -gt 0 ]]; do
- case $1 in
- -h|--help)
- show_help
- exit 0
- ;;
- -e|--e2e)
- MODE="e2e"
- shift
- ;;
- -u|--upload-only)
- MODE="upload"
- shift
- ;;
- -s|--status)
- MODE="status"
- shift
- ;;
- -v|--vector)
- MODE="vector"
- shift
- ;;
- -n|--ner)
- MODE="ner"
- shift
- ;;
- -x|--structured)
- MODE="structured"
- shift
- ;;
- -i|--images)
- MODE="images"
- shift
- ;;
- -t|--tables)
- MODE="tables"
- shift
- ;;
- -g|--nodes)
- MODE="nodes"
- shift
- ;;
- -d|--datasource)
- MODE="datasource"
- shift
- ;;
- --ds-create)
- MODE="ds-create"
- shift
- ;;
- --ds-bind)
- MODE="ds-bind"
- shift
- ;;
- --ds-value)
- MODE="ds-value"
- shift
- ;;
- --ds-flow)
- MODE="ds-flow"
- shift
- ;;
- --name)
- DS_NAME="$2"
- shift 2
- ;;
- --type)
- DS_TYPE="$2"
- shift 2
- ;;
- --value-type)
- DS_VALUE_TYPE="$2"
- shift 2
- ;;
- --aggregate)
- DS_AGGREGATE="$2"
- shift 2
- ;;
- --ds-id)
- DS_ID="$2"
- shift 2
- ;;
- --node-type)
- NODE_TYPE="$2"
- shift 2
- ;;
- --node-ids)
- NODE_IDS="$2"
- shift 2
- ;;
- -p|--poll)
- # 兼容旧参数,等同于e2e
- MODE="e2e"
- shift
- ;;
- *)
- if [[ ! "$1" =~ ^- ]]; then
- if [[ -z "$HOST_SET" ]]; then
- HOST=$1
- HOST_SET=true
- else
- PORT=$1
- fi
- fi
- shift
- ;;
- esac
- done
-
- # 更新URL
- BASE_URL="http://${HOST}:${PORT}"
- UPLOAD_URL="${BASE_URL}/api/v1/files/upload"
- STATUS_URL="${BASE_URL}/parse/status"
- REGISTER_URL="${BASE_URL}/auth/register"
- TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
- RAG_INDEX_URL="${BASE_URL}/api/rag/index"
- NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
- STRUCTURED_URL="${BASE_URL}/parse/structured"
- ELEMENTS_URL="${BASE_URL}/parse/elements"
- DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
- GRAPH_URL="${BASE_URL}/api/graph"
-
- print_header "文件上传端到端测试"
- echo "目标服务: $BASE_URL"
- echo "测试模式: $MODE"
- echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
-
- check_dependencies
-
- # 根据模式执行不同操作
- case $MODE in
- status)
- # 仅查询状态
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "查询解析状态"
- test_parse_status "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- vector)
- # 仅执行向量提取
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "向量提取测试"
- if get_document_text "$DOCUMENT_ID"; then
- test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
- fi
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ner)
- # 仅执行NER提取
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "NER 提取测试"
- test_ner_extraction "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- structured)
- # 仅执行结构化解析
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "结构化解析测试"
- test_structured_extraction "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- images)
- # 仅获取图片列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档图片"
- test_get_images "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- tables)
- # 仅获取表格列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档表格"
- test_get_tables "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- nodes)
- # 获取 GraphNode 列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档 GraphNode"
- test_get_graph_nodes "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- datasource)
- # 获取数据源列表
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "获取文档数据源"
- test_get_datasources "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ds-create)
- # 创建数据源
- if [ -z "$DS_NAME" ]; then
- print_error "请指定数据源名称 (--name)"
- exit 1
- fi
- if [ -z "$DS_TYPE" ]; then
- print_error "请指定数据源类型 (--type)"
- exit 1
- fi
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- print_header "创建数据源"
- test_create_datasource "$DOCUMENT_ID" "$DS_NAME" "$DS_TYPE" "$DS_VALUE_TYPE" "$DS_AGGREGATE"
- else
- print_error "未找到上次上传的文档ID"
- exit 1
- fi
- ;;
-
- ds-bind)
- # 绑定节点到数据源
- if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
- DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
- fi
- if [ -z "$DS_ID" ]; then
- print_error "请指定数据源ID (--ds-id)"
- exit 1
- fi
- if [ -z "$NODE_IDS" ] && [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
- NODE_IDS=$(cat "${SCRIPT_DIR}/.last_node_id")
- fi
- if [ -z "$NODE_IDS" ]; then
- print_error "请指定节点ID (--node-ids)"
- exit 1
- fi
- print_header "绑定节点到数据源"
- test_bind_nodes_to_datasource "$DS_ID" "$NODE_TYPE" "$NODE_IDS" "append"
- ;;
-
- ds-value)
- # 获取数据源值
- if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
- DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
- fi
- if [ -z "$DS_ID" ]; then
- print_error "请指定数据源ID (--ds-id)"
- exit 1
- fi
- print_header "获取数据源值"
- test_get_datasource_value "$DS_ID"
- ;;
-
- ds-flow)
- # 数据源完整测试流程
- if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- test_datasource_flow "$DOCUMENT_ID"
- else
- print_error "未找到上次上传的文档ID,请先上传文档"
- exit 1
- fi
- ;;
-
- upload)
- # 仅上传
- check_test_file
- check_service
- register_test_user
- test_upload
- ;;
-
- e2e)
- # 完整端到端测试
- check_test_file
- check_service
- register_test_user
-
- print_header "步骤 1/4: 文件上传"
- test_upload
-
- if [ -z "$DOCUMENT_ID" ] && [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
- DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
- fi
-
- if [ -z "$DOCUMENT_ID" ]; then
- print_error "无法获取文档ID,终止测试"
- exit 1
- fi
-
- print_header "步骤 2/4: 等待解析完成"
- if ! poll_parse_status "$DOCUMENT_ID" 60 3; then
- print_error "解析未完成,终止测试"
- exit 1
- fi
-
- print_header "步骤 3/4: 向量提取"
- if get_document_text "$DOCUMENT_ID"; then
- test_vector_extraction "$DOCUMENT_ID" "$DOCUMENT_TEXT"
- else
- print_info "跳过向量提取(无法获取文本)"
- fi
-
- print_header "步骤 4/5: NER 提取"
- test_ner_extraction "$DOCUMENT_ID"
-
- print_header "步骤 5/5: 结构化解析"
- test_structured_extraction "$DOCUMENT_ID"
-
- # 等待自动处理完成
- print_info "等待后台自动处理完成 (5秒)..."
- sleep 5
-
- # 获取 GraphNode 列表
- print_header "步骤 6/6: 查看处理结果"
- test_get_graph_nodes "$DOCUMENT_ID"
- test_get_images "$DOCUMENT_ID"
- test_get_tables "$DOCUMENT_ID"
- ;;
- esac
-
- print_header "测试完成"
- if [ -n "$DOCUMENT_ID" ]; then
- echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
- fi
- if [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
- echo -e "${GREEN}最后数据源ID: $(cat ${SCRIPT_DIR}/.last_datasource_id)${NC}"
- fi
- echo ""
- echo "可使用以下命令进行后续操作:"
- echo ""
- echo " === 文档处理 ==="
- echo " $0 -s # 查询解析状态"
- echo " $0 -v # 重新向量提取"
- echo " $0 -n # 重新NER提取"
- echo " $0 -x # 结构化解析"
- echo " $0 -i # 获取图片列表"
- echo " $0 -t # 获取表格列表"
- echo ""
- echo " === 数据源操作 ==="
- echo " $0 -g # 获取 GraphNode 列表"
- echo " $0 -d # 获取数据源列表"
- echo " $0 --ds-flow # 执行数据源完整测试"
- echo " $0 --ds-create --name '名称' --type entity"
- echo " $0 --ds-bind --node-ids 'id1,id2'"
- echo " $0 --ds-value"
- }
- # 运行主函数
- main "$@"
|