|
|
@@ -3,7 +3,7 @@
|
|
|
# ============================================
|
|
|
# 文件上传端到端测试脚本
|
|
|
# ============================================
|
|
|
-# 测试流程: 上传 -> 解析等待 -> 向量提取 -> NER提取
|
|
|
+# 测试流程: 上传 -> 解析等待 -> 自动处理(向量/NER/结构化)-> 数据源操作
|
|
|
# 使用方法: ./test_upload_api.sh [host] [port]
|
|
|
# 示例: ./test_upload_api.sh localhost 5232
|
|
|
# ============================================
|
|
|
@@ -12,7 +12,7 @@
|
|
|
HOST=${1:-localhost}
|
|
|
PORT=${2:-5232}
|
|
|
BASE_URL="http://${HOST}:${PORT}"
|
|
|
-UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
|
|
|
+UPLOAD_URL="${BASE_URL}/api/v1/files/upload"
|
|
|
STATUS_URL="${BASE_URL}/parse/status"
|
|
|
REGISTER_URL="${BASE_URL}/auth/register"
|
|
|
TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
|
|
|
@@ -20,6 +20,8 @@ RAG_INDEX_URL="${BASE_URL}/api/rag/index"
|
|
|
NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
|
|
|
STRUCTURED_URL="${BASE_URL}/parse/structured"
|
|
|
ELEMENTS_URL="${BASE_URL}/parse/elements"
|
|
|
+DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
|
|
|
+GRAPH_URL="${BASE_URL}/api/graph"
|
|
|
|
|
|
# 测试文件路径(相对于脚本所在目录)
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
@@ -587,11 +589,299 @@ test_get_tables() {
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
+# ============================================
|
|
|
+# 数据源相关测试函数
|
|
|
+# ============================================
|
|
|
+
|
|
|
+# 获取文档的 GraphNode 列表
|
|
|
+test_get_graph_nodes() {
|
|
|
+ local DOC_ID=$1
|
|
|
+
|
|
|
+ print_step "获取文档 GraphNode 列表"
|
|
|
+
|
|
|
+ print_info "文档ID: $DOC_ID"
|
|
|
+ print_info "请求URL: ${GRAPH_URL}/documents/${DOC_ID}/nodes"
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X GET "${GRAPH_URL}/documents/${DOC_ID}/nodes" \
|
|
|
+ --connect-timeout 10)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
|
|
|
+ print_success "获取 GraphNode 成功! 共 $COUNT 个"
|
|
|
+
|
|
|
+ if [ "$COUNT" -gt 0 ]; then
|
|
|
+ echo -e "${YELLOW}节点列表 (前10个):${NC}"
|
|
|
+ echo "$BODY" | jq -r '.data[:10][] | " [\(.id)] \(.nodeType): \(.name)"' 2>/dev/null
|
|
|
+
|
|
|
+ # 保存第一个节点ID供后续测试使用
|
|
|
+ FIRST_NODE_ID=$(echo "$BODY" | jq -r '.data[0].id // empty' 2>/dev/null)
|
|
|
+ if [ -n "$FIRST_NODE_ID" ] && [ "$FIRST_NODE_ID" != "null" ]; then
|
|
|
+ echo "$FIRST_NODE_ID" > "${SCRIPT_DIR}/.last_node_id"
|
|
|
+ print_info "已保存第一个节点ID: $FIRST_NODE_ID"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_success "获取 GraphNode 成功!"
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "获取 GraphNode 失败 (HTTP $HTTP_CODE)"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 获取文档的数据源列表
|
|
|
+test_get_datasources() {
|
|
|
+ local DOC_ID=$1
|
|
|
+
|
|
|
+ print_step "获取文档数据源列表"
|
|
|
+
|
|
|
+ print_info "文档ID: $DOC_ID"
|
|
|
+ print_info "请求URL: ${DATASOURCE_URL}/document/${DOC_ID}"
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X GET "${DATASOURCE_URL}/document/${DOC_ID}" \
|
|
|
+ --connect-timeout 10)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ COUNT=$(echo "$BODY" | jq -r '.data | length' 2>/dev/null)
|
|
|
+ print_success "获取数据源成功! 共 $COUNT 个"
|
|
|
+
|
|
|
+ if [ "$COUNT" -gt 0 ]; then
|
|
|
+ echo -e "${YELLOW}数据源列表:${NC}"
|
|
|
+ echo "$BODY" | jq -r '.data[] | " [\(.id)] \(.name) (\(.type)) - 值类型: \(.valueType), 聚合: \(.aggregateType)"' 2>/dev/null
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ print_success "获取数据源成功!"
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "获取数据源失败 (HTTP $HTTP_CODE)"
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 创建数据源
|
|
|
+test_create_datasource() {
|
|
|
+ local DOC_ID=$1
|
|
|
+ local NAME=$2
|
|
|
+ local TYPE=$3
|
|
|
+ local VALUE_TYPE=${4:-text}
|
|
|
+ local AGGREGATE_TYPE=${5:-first}
|
|
|
+
|
|
|
+ print_step "创建数据源"
|
|
|
+
|
|
|
+ print_info "文档ID: $DOC_ID"
|
|
|
+ print_info "名称: $NAME"
|
|
|
+ print_info "类型: $TYPE"
|
|
|
+ print_info "值类型: $VALUE_TYPE"
|
|
|
+ print_info "聚合方式: $AGGREGATE_TYPE"
|
|
|
+
|
|
|
+ # 获取用户ID
|
|
|
+ local ACTUAL_USER_ID="default-user"
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_user_id" ]; then
|
|
|
+ ACTUAL_USER_ID=$(cat "${SCRIPT_DIR}/.last_user_id")
|
|
|
+ fi
|
|
|
+
|
|
|
+ REQUEST_BODY=$(cat <<EOF
|
|
|
+{
|
|
|
+ "documentId": "$DOC_ID",
|
|
|
+ "name": "$NAME",
|
|
|
+ "type": "$TYPE",
|
|
|
+ "valueType": "$VALUE_TYPE",
|
|
|
+ "aggregateType": "$AGGREGATE_TYPE"
|
|
|
+}
|
|
|
+EOF
|
|
|
+)
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X POST "$DATASOURCE_URL" \
|
|
|
+ -H "Content-Type: application/json" \
|
|
|
+ -H "X-User-Id: $ACTUAL_USER_ID" \
|
|
|
+ -d "$REQUEST_BODY" \
|
|
|
+ --connect-timeout 10)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ DS_ID=$(echo "$BODY" | jq -r '.data.id // empty' 2>/dev/null)
|
|
|
+ print_success "创建数据源成功!"
|
|
|
+ print_info "数据源ID: $DS_ID"
|
|
|
+ echo "$DS_ID" > "${SCRIPT_DIR}/.last_datasource_id"
|
|
|
+ echo "$BODY" | jq '.data' 2>/dev/null
|
|
|
+ else
|
|
|
+ print_success "创建数据源成功!"
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "创建数据源失败 (HTTP $HTTP_CODE)"
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 绑定节点到数据源
|
|
|
+test_bind_nodes_to_datasource() {
|
|
|
+ local DS_ID=$1
|
|
|
+ local NODE_TYPE=$2 # graph_node 或 document_element
|
|
|
+ local NODE_IDS=$3 # 逗号分隔的节点ID列表
|
|
|
+ local MODE=${4:-append} # replace/append/remove
|
|
|
+
|
|
|
+ print_step "绑定节点到数据源"
|
|
|
+
|
|
|
+ print_info "数据源ID: $DS_ID"
|
|
|
+ print_info "节点类型: $NODE_TYPE"
|
|
|
+ print_info "节点IDs: $NODE_IDS"
|
|
|
+ print_info "模式: $MODE"
|
|
|
+
|
|
|
+ # 构建 refs 数组
|
|
|
+ local REFS_ARRAY="["
|
|
|
+ local FIRST=true
|
|
|
+ IFS=',' read -ra IDS <<< "$NODE_IDS"
|
|
|
+ for id in "${IDS[@]}"; do
|
|
|
+ if [ "$FIRST" = true ]; then
|
|
|
+ FIRST=false
|
|
|
+ else
|
|
|
+ REFS_ARRAY+=","
|
|
|
+ fi
|
|
|
+ REFS_ARRAY+="{\"type\":\"$NODE_TYPE\",\"id\":\"$id\"}"
|
|
|
+ done
|
|
|
+ REFS_ARRAY+="]"
|
|
|
+
|
|
|
+ REQUEST_BODY=$(cat <<EOF
|
|
|
+{
|
|
|
+ "mode": "$MODE",
|
|
|
+ "refs": $REFS_ARRAY
|
|
|
+}
|
|
|
+EOF
|
|
|
+)
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X PUT "${DATASOURCE_URL}/${DS_ID}/refs" \
|
|
|
+ -H "Content-Type: application/json" \
|
|
|
+ -d "$REQUEST_BODY" \
|
|
|
+ --connect-timeout 10)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ print_success "绑定节点成功!"
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq '.data' 2>/dev/null
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "绑定节点失败 (HTTP $HTTP_CODE)"
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 获取数据源的值
|
|
|
+test_get_datasource_value() {
|
|
|
+ local DS_ID=$1
|
|
|
+
|
|
|
+ print_step "获取数据源值"
|
|
|
+
|
|
|
+ print_info "数据源ID: $DS_ID"
|
|
|
+ print_info "请求URL: ${DATASOURCE_URL}/${DS_ID}/value"
|
|
|
+
|
|
|
+ RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
|
+ -X GET "${DATASOURCE_URL}/${DS_ID}/value" \
|
|
|
+ --connect-timeout 10)
|
|
|
+
|
|
|
+ HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
|
|
+ BODY=$(echo "$RESPONSE" | sed '$d')
|
|
|
+
|
|
|
+ echo -e "${YELLOW}响应状态码:${NC} $HTTP_CODE"
|
|
|
+
|
|
|
+ if [ "$HTTP_CODE" = "200" ]; then
|
|
|
+ print_success "获取数据源值成功!"
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq '.data' 2>/dev/null
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ print_error "获取数据源值失败 (HTTP $HTTP_CODE)"
|
|
|
+ if [ "$JQ_AVAILABLE" = true ]; then
|
|
|
+ echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
|
|
|
+ else
|
|
|
+ echo "$BODY"
|
|
|
+ fi
|
|
|
+ return 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 数据源完整测试流程
|
|
|
+test_datasource_flow() {
|
|
|
+ local DOC_ID=$1
|
|
|
+
|
|
|
+ print_header "数据源完整测试流程"
|
|
|
+
|
|
|
+ # 1. 获取 GraphNode 列表
|
|
|
+ test_get_graph_nodes "$DOC_ID"
|
|
|
+
|
|
|
+ # 2. 获取现有数据源
|
|
|
+ test_get_datasources "$DOC_ID"
|
|
|
+
|
|
|
+ # 3. 创建一个测试数据源
|
|
|
+ print_step "创建测试数据源"
|
|
|
+ if test_create_datasource "$DOC_ID" "测试数据源_$(date +%s)" "entity" "text" "concat"; then
|
|
|
+ DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id" 2>/dev/null)
|
|
|
+
|
|
|
+ # 4. 如果有节点,绑定到数据源
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
|
|
|
+ NODE_ID=$(cat "${SCRIPT_DIR}/.last_node_id")
|
|
|
+ test_bind_nodes_to_datasource "$DS_ID" "graph_node" "$NODE_ID" "append"
|
|
|
+
|
|
|
+ # 5. 获取数据源值
|
|
|
+ test_get_datasource_value "$DS_ID"
|
|
|
+ else
|
|
|
+ print_info "没有可用的节点ID,跳过绑定测试"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
# 显示使用帮助
|
|
|
show_help() {
|
|
|
echo "使用方法: $0 [选项] [host] [port]"
|
|
|
echo ""
|
|
|
- echo "端到端测试流程: 上传文件 -> 等待解析 -> 向量提取 -> NER提取 -> 结构化解析"
|
|
|
+ echo "端到端测试流程: 上传文件 -> 等待解析 -> 自动处理 -> 数据源操作"
|
|
|
echo ""
|
|
|
echo "选项:"
|
|
|
echo " -h, --help 显示帮助信息"
|
|
|
@@ -603,23 +893,48 @@ show_help() {
|
|
|
echo " -x, --structured 仅执行结构化解析(提取图片和表格)"
|
|
|
echo " -i, --images 仅获取文档图片列表"
|
|
|
echo " -t, --tables 仅获取文档表格列表"
|
|
|
+ echo " -g, --nodes 获取文档的 GraphNode 列表"
|
|
|
+ echo " -d, --datasource 获取文档的数据源列表"
|
|
|
+ echo " --ds-create 创建数据源 (需要 --name 和 --type)"
|
|
|
+ echo " --ds-bind 绑定节点到数据源"
|
|
|
+ echo " --ds-value 获取数据源的值"
|
|
|
+ echo " --ds-flow 执行数据源完整测试流程"
|
|
|
+ echo ""
|
|
|
+ echo "数据源相关参数:"
|
|
|
+ echo " --name NAME 数据源名称"
|
|
|
+ echo " --type TYPE 数据源类型 (entity/paragraph/image/table)"
|
|
|
+ echo " --value-type TYPE 值类型 (text/image/table/mixed)"
|
|
|
+ echo " --aggregate TYPE 聚合方式 (first/last/concat/sum/avg/list)"
|
|
|
+ echo " --ds-id ID 数据源ID"
|
|
|
+ echo " --node-type TYPE 节点类型 (graph_node/document_element)"
|
|
|
+ echo " --node-ids IDS 节点ID列表 (逗号分隔)"
|
|
|
echo ""
|
|
|
echo "示例:"
|
|
|
- echo " $0 # 使用默认配置执行完整端到端测试"
|
|
|
+ echo " $0 # 完整端到端测试"
|
|
|
echo " $0 192.168.1.100 5232 # 指定服务器地址"
|
|
|
echo " $0 -u # 仅上传文件"
|
|
|
echo " $0 -s # 查询上次上传的状态"
|
|
|
- echo " $0 -v # 对上次文档执行向量提取"
|
|
|
- echo " $0 -n # 对上次文档执行NER提取"
|
|
|
- echo " $0 -x # 对上次文档执行结构化解析"
|
|
|
- echo " $0 -i # 获取上次文档的图片列表"
|
|
|
- echo " $0 -t # 获取上次文档的表格列表"
|
|
|
+ echo " $0 -g # 获取文档的 GraphNode 列表"
|
|
|
+ echo " $0 -d # 获取文档的数据源列表"
|
|
|
+ echo " $0 --ds-flow # 执行数据源完整测试流程"
|
|
|
+ echo " $0 --ds-create --name '报告编号' --type entity"
|
|
|
+ echo " $0 --ds-bind --ds-id xxx --node-type graph_node --node-ids 'id1,id2'"
|
|
|
+ echo " $0 --ds-value --ds-id xxx"
|
|
|
}
|
|
|
|
|
|
# 主函数
|
|
|
main() {
|
|
|
local MODE="e2e" # 默认执行完整端到端测试
|
|
|
|
|
|
+ # 数据源相关参数
|
|
|
+ local DS_NAME=""
|
|
|
+ local DS_TYPE=""
|
|
|
+ local DS_VALUE_TYPE="text"
|
|
|
+ local DS_AGGREGATE="first"
|
|
|
+ local DS_ID=""
|
|
|
+ local NODE_TYPE="graph_node"
|
|
|
+ local NODE_IDS=""
|
|
|
+
|
|
|
# 解析参数
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
case $1 in
|
|
|
@@ -659,6 +974,58 @@ main() {
|
|
|
MODE="tables"
|
|
|
shift
|
|
|
;;
|
|
|
+ -g|--nodes)
|
|
|
+ MODE="nodes"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ -d|--datasource)
|
|
|
+ MODE="datasource"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ --ds-create)
|
|
|
+ MODE="ds-create"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ --ds-bind)
|
|
|
+ MODE="ds-bind"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ --ds-value)
|
|
|
+ MODE="ds-value"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ --ds-flow)
|
|
|
+ MODE="ds-flow"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
+ --name)
|
|
|
+ DS_NAME="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
+ --type)
|
|
|
+ DS_TYPE="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
+ --value-type)
|
|
|
+ DS_VALUE_TYPE="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
+ --aggregate)
|
|
|
+ DS_AGGREGATE="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
+ --ds-id)
|
|
|
+ DS_ID="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
+ --node-type)
|
|
|
+ NODE_TYPE="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
+ --node-ids)
|
|
|
+ NODE_IDS="$2"
|
|
|
+ shift 2
|
|
|
+ ;;
|
|
|
-p|--poll)
|
|
|
# 兼容旧参数,等同于e2e
|
|
|
MODE="e2e"
|
|
|
@@ -680,7 +1047,7 @@ main() {
|
|
|
|
|
|
# 更新URL
|
|
|
BASE_URL="http://${HOST}:${PORT}"
|
|
|
- UPLOAD_URL="${BASE_URL}/api/v1/parse/upload"
|
|
|
+ UPLOAD_URL="${BASE_URL}/api/v1/files/upload"
|
|
|
STATUS_URL="${BASE_URL}/parse/status"
|
|
|
REGISTER_URL="${BASE_URL}/auth/register"
|
|
|
TEXT_STORAGE_URL="${BASE_URL}/api/v1/graph/text-storage"
|
|
|
@@ -688,6 +1055,8 @@ main() {
|
|
|
NER_DOCUMENT_URL="${BASE_URL}/api/ner/document"
|
|
|
STRUCTURED_URL="${BASE_URL}/parse/structured"
|
|
|
ELEMENTS_URL="${BASE_URL}/parse/elements"
|
|
|
+ DATASOURCE_URL="${BASE_URL}/api/v1/datasource"
|
|
|
+ GRAPH_URL="${BASE_URL}/api/graph"
|
|
|
|
|
|
print_header "文件上传端到端测试"
|
|
|
echo "目标服务: $BASE_URL"
|
|
|
@@ -772,6 +1141,94 @@ main() {
|
|
|
fi
|
|
|
;;
|
|
|
|
|
|
+ nodes)
|
|
|
+ # 获取 GraphNode 列表
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ print_header "获取文档 GraphNode"
|
|
|
+ test_get_graph_nodes "$DOCUMENT_ID"
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
+ datasource)
|
|
|
+ # 获取数据源列表
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ print_header "获取文档数据源"
|
|
|
+ test_get_datasources "$DOCUMENT_ID"
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
+ ds-create)
|
|
|
+ # 创建数据源
|
|
|
+ if [ -z "$DS_NAME" ]; then
|
|
|
+ print_error "请指定数据源名称 (--name)"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ if [ -z "$DS_TYPE" ]; then
|
|
|
+ print_error "请指定数据源类型 (--type)"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ print_header "创建数据源"
|
|
|
+ test_create_datasource "$DOCUMENT_ID" "$DS_NAME" "$DS_TYPE" "$DS_VALUE_TYPE" "$DS_AGGREGATE"
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
+ ds-bind)
|
|
|
+ # 绑定节点到数据源
|
|
|
+ if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
|
|
|
+ DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
|
|
|
+ fi
|
|
|
+ if [ -z "$DS_ID" ]; then
|
|
|
+ print_error "请指定数据源ID (--ds-id)"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ if [ -z "$NODE_IDS" ] && [ -f "${SCRIPT_DIR}/.last_node_id" ]; then
|
|
|
+ NODE_IDS=$(cat "${SCRIPT_DIR}/.last_node_id")
|
|
|
+ fi
|
|
|
+ if [ -z "$NODE_IDS" ]; then
|
|
|
+ print_error "请指定节点ID (--node-ids)"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ print_header "绑定节点到数据源"
|
|
|
+ test_bind_nodes_to_datasource "$DS_ID" "$NODE_TYPE" "$NODE_IDS" "append"
|
|
|
+ ;;
|
|
|
+
|
|
|
+ ds-value)
|
|
|
+ # 获取数据源值
|
|
|
+ if [ -z "$DS_ID" ] && [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
|
|
|
+ DS_ID=$(cat "${SCRIPT_DIR}/.last_datasource_id")
|
|
|
+ fi
|
|
|
+ if [ -z "$DS_ID" ]; then
|
|
|
+ print_error "请指定数据源ID (--ds-id)"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ print_header "获取数据源值"
|
|
|
+ test_get_datasource_value "$DS_ID"
|
|
|
+ ;;
|
|
|
+
|
|
|
+ ds-flow)
|
|
|
+ # 数据源完整测试流程
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_document_id" ]; then
|
|
|
+ DOCUMENT_ID=$(cat "${SCRIPT_DIR}/.last_document_id")
|
|
|
+ test_datasource_flow "$DOCUMENT_ID"
|
|
|
+ else
|
|
|
+ print_error "未找到上次上传的文档ID,请先上传文档"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ ;;
|
|
|
+
|
|
|
upload)
|
|
|
# 仅上传
|
|
|
check_test_file
|
|
|
@@ -816,18 +1273,44 @@ main() {
|
|
|
|
|
|
print_header "步骤 5/5: 结构化解析"
|
|
|
test_structured_extraction "$DOCUMENT_ID"
|
|
|
+
|
|
|
+ # 等待自动处理完成
|
|
|
+ print_info "等待后台自动处理完成 (5秒)..."
|
|
|
+ sleep 5
|
|
|
+
|
|
|
+ # 获取 GraphNode 列表
|
|
|
+ print_header "步骤 6/6: 查看处理结果"
|
|
|
+ test_get_graph_nodes "$DOCUMENT_ID"
|
|
|
+ test_get_images "$DOCUMENT_ID"
|
|
|
+ test_get_tables "$DOCUMENT_ID"
|
|
|
;;
|
|
|
esac
|
|
|
|
|
|
print_header "测试完成"
|
|
|
- echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
|
|
|
+ if [ -n "$DOCUMENT_ID" ]; then
|
|
|
+ echo -e "${GREEN}文档ID: $DOCUMENT_ID${NC}"
|
|
|
+ fi
|
|
|
+ if [ -f "${SCRIPT_DIR}/.last_datasource_id" ]; then
|
|
|
+ echo -e "${GREEN}最后数据源ID: $(cat ${SCRIPT_DIR}/.last_datasource_id)${NC}"
|
|
|
+ fi
|
|
|
+ echo ""
|
|
|
echo "可使用以下命令进行后续操作:"
|
|
|
- echo " $0 -s # 查询状态"
|
|
|
+ echo ""
|
|
|
+ echo " === 文档处理 ==="
|
|
|
+ echo " $0 -s # 查询解析状态"
|
|
|
echo " $0 -v # 重新向量提取"
|
|
|
echo " $0 -n # 重新NER提取"
|
|
|
- echo " $0 -x # 结构化解析(提取图片表格)"
|
|
|
+ echo " $0 -x # 结构化解析"
|
|
|
echo " $0 -i # 获取图片列表"
|
|
|
echo " $0 -t # 获取表格列表"
|
|
|
+ echo ""
|
|
|
+ echo " === 数据源操作 ==="
|
|
|
+ echo " $0 -g # 获取 GraphNode 列表"
|
|
|
+ echo " $0 -d # 获取数据源列表"
|
|
|
+ echo " $0 --ds-flow # 执行数据源完整测试"
|
|
|
+ echo " $0 --ds-create --name '名称' --type entity"
|
|
|
+ echo " $0 --ds-bind --node-ids 'id1,id2'"
|
|
|
+ echo " $0 --ds-value"
|
|
|
}
|
|
|
|
|
|
# 运行主函数
|