// backend/lingyue-common/src/main/java/com/lingyue/common/core/Constants.java
public final class Constants {
// ... 现有常量
// NER相关节点类型
public static final String NODE_NER_ENTITY = "NER_ENTITY";
public static final String NODE_NER_RELATION = "NER_RELATION";
// NER相关边类型
public static final String EDGE_HAS_NER_ENTITY = "HAS_NER_ENTITY";
public static final String EDGE_ENTITY_RELATION = "ENTITY_RELATION";
public static final String EDGE_ENTITY_TO_VALUE = "ENTITY_TO_VALUE";
// NER提取方法
public static final String NER_METHOD_RULE = "rule";
public static final String NER_METHOD_LLM = "llm";
public static final String NER_METHOD_MANUAL = "manual";
// NER状态
public static final String NER_PENDING = "pending";
public static final String NER_PROCESSING = "processing";
public static final String NER_COMPLETED = "completed";
public static final String NER_FAILED = "failed";
}
// backend/lingyue-ai/src/main/java/com/lingyue/ai/dto/NerEntityDTO.java
package com.lingyue.ai.dto;
import lombok.Data;
import java.math.BigDecimal;
@Data
public class NerEntityDTO {
private Long id;
private String entityType; // ORG, DATE, PERSON, SCORE等
private String entityName; // 实体名称
private String entityValue; // 实体值
private BigDecimal confidence; // 置信度
// 位置信息
private Integer charStart;
private Integer charEnd;
private Integer line;
private String context; // 上下文
// 来源信息
private Long attachmentId;
private String attachmentName;
private String extractMethod; // rule/llm/manual
private String extractTime;
// 映射信息
private String mappedElementKey; // 映射到的要素key
private Boolean isMapped;
}
// backend/lingyue-ai/src/main/java/com/lingyue/ai/service/NerEntityService.java
package com.lingyue.ai.service;
import com.lingyue.ai.dto.NerEntityDTO;
import com.lingyue.ai.dto.NerExtractRequest;
import com.lingyue.ai.dto.NerExtractResponse;
import com.lingyue.common.core.Constants;
import com.lingyue.graph.service.NodeService;
import com.lingyue.graph.service.EdgeService;
import com.lingyue.graph.service.PropertyService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
@RequiredArgsConstructor
public class NerEntityService {
private final NerService nerService;
private final NodeService nodeService;
private final EdgeService edgeService;
private final PropertyService propertyService;
/**
* 对附件执行NER提取并保存到图数据库
*/
@Transactional
public List<NerEntityDTO> extractAndSaveEntities(Long attachmentId, String text) {
log.info("开始NER提取: attachmentId={}", attachmentId);
// 1. 调用Python NER服务提取实体
NerExtractRequest request = new NerExtractRequest();
request.setText(text);
request.setDocumentId(String.valueOf(attachmentId));
NerExtractResponse response = nerService.extract(request);
// 2. 保存实体到图数据库
List<NerEntityDTO> savedEntities = new ArrayList<>();
for (NerExtractResponse.EntityItem item : response.getEntities()) {
NerEntityDTO entity = saveEntity(attachmentId, item);
savedEntities.add(entity);
}
log.info("NER提取完成: attachmentId={}, entityCount={}",
attachmentId, savedEntities.size());
return savedEntities;
}
/**
* 保存单个实体到图数据库
*/
private NerEntityDTO saveEntity(Long attachmentId, NerExtractResponse.EntityItem item) {
// 创建实体节点
String entityKey = "entity_" + System.currentTimeMillis() + "_" +
Math.abs(item.getText().hashCode());
Long entityId = nodeService.createNode(
Constants.NODE_NER_ENTITY,
entityKey,
item.getText(),
null // createdBy
);
// 设置实体属性
propertyService.setNodeProperty(entityId, "entity_type", item.getType());
propertyService.setNodeProperty(entityId, "entity_value", item.getText());
propertyService.setNodeProperty(entityId, "confidence",
String.valueOf(item.getConfidence()));
propertyService.setNodeProperty(entityId, "char_start",
String.valueOf(item.getStartPos()));
propertyService.setNodeProperty(entityId, "char_end",
String.valueOf(item.getEndPos()));
propertyService.setNodeProperty(entityId, "extract_method",
Constants.NER_METHOD_RULE);
// 创建附件→实体的边
edgeService.createEdge(
Constants.EDGE_HAS_NER_ENTITY,
attachmentId,
entityId,
0 // sortOrder
);
// 构建DTO返回
NerEntityDTO dto = new NerEntityDTO();
dto.setId(entityId);
dto.setEntityType(item.getType());
dto.setEntityName(item.getText());
dto.setEntityValue(item.getText());
dto.setConfidence(item.getConfidence());
dto.setCharStart(item.getStartPos());
dto.setCharEnd(item.getEndPos());
dto.setAttachmentId(attachmentId);
dto.setExtractMethod(Constants.NER_METHOD_RULE);
dto.setIsMapped(false);
return dto;
}
/**
* 查询附件的所有NER实体
*/
public List<NerEntityDTO> getEntitiesByAttachment(Long attachmentId) {
// TODO: 实现查询逻辑
return new ArrayList<>();
}
/**
* 将实体映射到要素
*/
@Transactional
public void mapEntityToElement(Long entityId, String elementKey) {
log.info("映射实体到要素: entityId={}, elementKey={}", entityId, elementKey);
// 1. 查找element节点
// 2. 创建ENTITY_TO_VALUE边
// 3. 更新entity的mapped属性
propertyService.setNodeProperty(entityId, "mapped_element_key", elementKey);
propertyService.setNodeProperty(entityId, "is_mapped", "true");
}
}
// backend/lingyue-ai/src/main/java/com/lingyue/ai/controller/NerController.java
package com.lingyue.ai.controller;
import com.lingyue.ai.dto.NerEntityDTO;
import com.lingyue.ai.service.NerEntityService;
import com.lingyue.common.core.Result;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@Slf4j
@RestController
@RequestMapping("/api/v1/ner")
@RequiredArgsConstructor
public class NerController {
private final NerEntityService nerEntityService;
/**
* 对附件执行NER提取
*/
@PostMapping("/attachments/{attachmentId}/extract")
public Result<List<NerEntityDTO>> extractEntities(
@PathVariable Long attachmentId,
@RequestBody String text) {
List<NerEntityDTO> entities = nerEntityService.extractAndSaveEntities(
attachmentId, text
);
return Result.ok(entities);
}
/**
* 查询附件的NER实体
*/
@GetMapping("/attachments/{attachmentId}/entities")
public Result<List<NerEntityDTO>> getEntities(@PathVariable Long attachmentId) {
List<NerEntityDTO> entities = nerEntityService.getEntitiesByAttachment(
attachmentId
);
return Result.ok(entities);
}
/**
* 将实体映射到要素
*/
@PostMapping("/entities/{entityId}/map")
public Result<?> mapEntity(
@PathVariable Long entityId,
@RequestParam String elementKey) {
nerEntityService.mapEntityToElement(entityId, elementKey);
return Result.ok();
}
}
# python-services/ner-service/app/services/ner_service.py
# 在 _extract_by_rules 方法中添加智报专用规则
async def _extract_by_rules(self, text: str, entity_types: Optional[List[str]] = None):
"""基于规则的NER提取(智报增强版)"""
rules = {
# ... 现有规则
# === 智报专用规则 ===
"SCORE": [
# 评审得分:93.33分
r'(\d+\.?\d*分)',
r'得分[::]\s*(\d+\.?\d*)',
],
"LEVEL": [
# 级别:一级、二级
r'(一级|二级|三级)',
r'级别[::]\s*(一级|二级|三级)',
],
"CERTIFICATE_CODE": [
# 证书编号:ZGDIDBOY-083
r'(ZGDIDBOY-\d+)',
r'([A-Z]+-\d+-\d+)',
r'证书编号[::]\s*([A-Z0-9\-]+)',
],
"REVIEW_CODE": [
# 评审代码:5.1.1.1
r'(5\.\d+(?:\.\d+)*)',
],
"COMPANY_ALIAS": [
# 公司简称(需要结合上下文)
r'简称[::「『]([^」』::]{2,10})[」』]',
r'以下简称[「『""]([^」』""]{2,10})[」』""]',
],
"PROJECT_CODE": [
# 项目编号:BZ-0092-2024
r'([A-Z]+-\d+-\d+)',
r'项目编号[::]\s*([A-Z0-9\-]+)',
],
"REVIEW_ITEM": [
# 评审项:目标职责、制度化管理等
r'(目标职责|制度化管理|教育培训|现场管理|安全风险管控|应急管理|事故管理|持续改进)',
],
}
# ... 其余提取逻辑保持不变
# python-services/ner-service/app/services/table_extractor.py
from typing import List, Dict
import re
class TableExtractor:
"""表格数据提取器"""
def extract_tables(self, text: str) -> List[Dict]:
"""
从文本中提取表格数据
返回格式:
[
{
"table_type": "review_project", # 表格类型
"headers": ["项目名称", "简称", "类型"],
"rows": [
["大邑地勘项目", "大邑项目", "在建项目"],
...
]
}
]
"""
tables = []
# 方法1:基于分隔符识别(简单表格)
tables.extend(self._extract_simple_tables(text))
# 方法2:基于关键词识别(特定表格)
tables.extend(self._extract_known_tables(text))
return tables
def _extract_simple_tables(self, text: str) -> List[Dict]:
"""提取简单表格(基于|或制表符分隔)"""
tables = []
# 查找表格块
table_pattern = r'(\|[^\n]+\|(?:\n\|[^\n]+\|)+)'
matches = re.finditer(table_pattern, text)
for match in matches:
table_text = match.group(1)
rows = table_text.strip().split('\n')
# 解析表头和数据行
headers = [cell.strip() for cell in rows[0].split('|') if cell.strip()]
data_rows = []
for row in rows[1:]:
cells = [cell.strip() for cell in row.split('|') if cell.strip()]
if cells:
data_rows.append(cells)
if headers and data_rows:
tables.append({
"table_type": "unknown",
"headers": headers,
"rows": data_rows
})
return tables
def _extract_known_tables(self, text: str) -> List[Dict]:
"""提取已知类型的表格"""
tables = []
# 示例:提取复审项目表
if "复审项目" in text or "评审项目" in text:
table = self._extract_review_project_table(text)
if table:
tables.append(table)
# 示例:提取复审人员表
if "评审组" in text or "评审人员" in text:
table = self._extract_reviewer_table(text)
if table:
tables.append(table)
return tables
def _extract_review_project_table(self, text: str) -> Dict:
"""提取复审项目表"""
# TODO: 实现具体逻辑
return None
def _extract_reviewer_table(self, text: str) -> Dict:
"""提取评审人员表"""
# TODO: 实现具体逻辑
return None
# 创建单例
table_extractor = TableExtractor()
<!-- frontend/vue-demo/src/views/NerAnalysis.vue -->
<template>
<div class="ner-analysis-container">
<!-- 顶部统计卡片 -->
<el-row :gutter="20" class="stats-row">
<el-col :span="6">
<el-card>
<el-statistic title="实体总数" :value="statistics.totalEntities">
<template #suffix>个</template>
</el-statistic>
</el-card>
</el-col>
<el-col :span="6">
<el-card>
<el-statistic title="已映射" :value="statistics.mappedEntities">
<template #suffix>个</template>
</el-statistic>
</el-card>
</el-col>
<el-col :span="6">
<el-card>
<el-statistic title="关系数" :value="statistics.totalRelations">
<template #suffix>个</template>
</el-statistic>
</el-card>
</el-col>
<el-col :span="6">
<el-card>
<el-statistic
title="平均置信度"
:value="statistics.avgConfidence"
:precision="2">
<template #suffix>%</template>
</el-statistic>
</el-card>
</el-col>
</el-row>
<!-- 主内容区 -->
<el-card class="main-content">
<el-tabs v-model="activeTab">
<!-- 实体列表 -->
<el-tab-pane label="实体列表" name="entities">
<entity-list-view
:entities="entities"
@map="handleMapEntity"
@delete="handleDeleteEntity" />
</el-tab-pane>
<!-- 文本标注 -->
<el-tab-pane label="文本标注" name="annotation">
<text-annotation-view
:text="sourceText"
:entities="entities"
@entity-click="handleEntityClick" />
</el-tab-pane>
<!-- 要素映射 -->
<el-tab-pane label="要素映射" name="mapping">
<entity-mapping-view
:entities="entities"
:elements="elements"
@map="handleMapEntity" />
</el-tab-pane>
<!-- 关系图谱 -->
<el-tab-pane label="关系图谱" name="graph">
<relation-graph-view
:entities="entities"
:relations="relations" />
</el-tab-pane>
</el-tabs>
</el-card>
</div>
</template>
<script setup>
import { ref, computed, onMounted } from 'vue'
import { useRoute } from 'vue-router'
import { getNerEntities, mapEntityToElement } from '@/api/ner'
import EntityListView from './components/EntityListView.vue'
import TextAnnotationView from './components/TextAnnotationView.vue'
import EntityMappingView from './components/EntityMappingView.vue'
import RelationGraphView from './components/RelationGraphView.vue'
const route = useRoute()
const attachmentId = ref(route.params.attachmentId)
const activeTab = ref('entities')
const entities = ref([])
const relations = ref([])
const sourceText = ref('')
const elements = ref([])
// 统计数据
const statistics = computed(() => ({
totalEntities: entities.value.length,
mappedEntities: entities.value.filter(e => e.isMapped).length,
totalRelations: relations.value.length,
avgConfidence: entities.value.length > 0
? entities.value.reduce((sum, e) => sum + e.confidence, 0) / entities.value.length * 100
: 0
}))
// 加载数据
const loadData = async () => {
const res = await getNerEntities(attachmentId.value)
entities.value = res.data
}
// 映射实体到要素
const handleMapEntity = async (entityId, elementKey) => {
await mapEntityToElement(entityId, elementKey)
await loadData()
}
// 删除实体
const handleDeleteEntity = async (entityId) => {
// TODO: 实现删除逻辑
}
// 点击实体
const handleEntityClick = (entity) => {
console.log('点击实体:', entity)
}
onMounted(() => {
loadData()
})
</script>
<style scoped>
.ner-analysis-container {
padding: 20px;
}
.stats-row {
margin-bottom: 20px;
}
.main-content {
min-height: 600px;
}
</style>
<!-- frontend/vue-demo/src/views/components/EntityListView.vue -->
<template>
<div class="entity-list">
<!-- 筛选工具栏 -->
<el-row class="toolbar">
<el-col :span="12">
<el-input
v-model="searchText"
placeholder="搜索实体..."
clearable>
<template #prefix>
<el-icon><Search /></el-icon>
</template>
</el-input>
</el-col>
<el-col :span="12" class="filter-group">
<el-select v-model="filterType" placeholder="实体类型" clearable>
<el-option label="全部" value="" />
<el-option label="机构" value="ORG" />
<el-option label="日期" value="DATE" />
<el-option label="人名" value="PERSON" />
<el-option label="得分" value="SCORE" />
<el-option label="级别" value="LEVEL" />
</el-select>
<el-select v-model="filterMapped" placeholder="映射状态" clearable>
<el-option label="全部" value="" />
<el-option label="已映射" value="true" />
<el-option label="未映射" value="false" />
</el-select>
</el-col>
</el-row>
<!-- 实体表格 -->
<el-table
:data="filteredEntities"
stripe
border
height="500">
<el-table-column prop="entityType" label="类型" width="100">
<template #default="{ row }">
<el-tag :type="getTypeColor(row.entityType)">
{{ row.entityType }}
</el-tag>
</template>
</el-table-column>
<el-table-column prop="entityName" label="实体名称" width="200" />
<el-table-column prop="confidence" label="置信度" width="100">
<template #default="{ row }">
<el-progress
:percentage="row.confidence * 100"
:color="getConfidenceColor(row.confidence)" />
</template>
</el-table-column>
<el-table-column prop="context" label="上下文" show-overflow-tooltip />
<el-table-column prop="isMapped" label="映射状态" width="120">
<template #default="{ row }">
<el-tag v-if="row.isMapped" type="success">已映射</el-tag>
<el-tag v-else type="info">未映射</el-tag>
</template>
</el-table-column>
<el-table-column label="操作" width="200" fixed="right">
<template #default="{ row }">
<el-button
size="small"
@click="handleMap(row)">
映射
</el-button>
<el-button
size="small"
type="danger"
@click="handleDelete(row)">
删除
</el-button>
</template>
</el-table-column>
</el-table>
<!-- 映射对话框 -->
<el-dialog v-model="mapDialogVisible" title="映射到要素">
<el-select v-model="selectedElementKey" placeholder="选择要素">
<el-option
v-for="elem in elements"
:key="elem.key"
:label="elem.label"
:value="elem.key" />
</el-select>
<template #footer>
<el-button @click="mapDialogVisible = false">取消</el-button>
<el-button type="primary" @click="confirmMap">确定</el-button>
</template>
</el-dialog>
</div>
</template>
<script setup>
import { ref, computed } from 'vue'
import { Search } from '@element-plus/icons-vue'
const props = defineProps({
entities: Array,
elements: Array
})
const emit = defineEmits(['map', 'delete'])
const searchText = ref('')
const filterType = ref('')
const filterMapped = ref('')
const mapDialogVisible = ref(false)
const currentEntity = ref(null)
const selectedElementKey = ref('')
// 过滤实体
const filteredEntities = computed(() => {
let result = props.entities
if (searchText.value) {
result = result.filter(e =>
e.entityName.includes(searchText.value) ||
e.context.includes(searchText.value)
)
}
if (filterType.value) {
result = result.filter(e => e.entityType === filterType.value)
}
if (filterMapped.value) {
const isMapped = filterMapped.value === 'true'
result = result.filter(e => e.isMapped === isMapped)
}
return result
})
// 获取类型颜色
const getTypeColor = (type) => {
const colors = {
'ORG': 'primary',
'DATE': 'success',
'PERSON': 'warning',
'SCORE': 'danger',
'LEVEL': 'info'
}
return colors[type] || ''
}
// 获取置信度颜色
const getConfidenceColor = (confidence) => {
if (confidence >= 0.8) return '#67C23A'
if (confidence >= 0.6) return '#E6A23C'
return '#F56C6C'
}
// 映射操作
const handleMap = (entity) => {
currentEntity.value = entity
mapDialogVisible.value = true
}
const confirmMap = () => {
if (selectedElementKey.value) {
emit('map', currentEntity.value.id, selectedElementKey.value)
mapDialogVisible.value = false
}
}
// 删除操作
const handleDelete = (entity) => {
emit('delete', entity.id)
}
</script>
<style scoped>
.toolbar {
margin-bottom: 20px;
}
.filter-group {
display: flex;
gap: 10px;
justify-content: flex-end;
}
</style>
-- database/migrations/003_add_ner_support.sql
-- 添加NER相关的节点类型和边类型支持
-- 注意:实际执行时需要根据现有schema调整
-- 1. 如果有node_types表,添加新类型
INSERT INTO node_types (type_code, type_name, description) VALUES
('NER_ENTITY', 'NER实体', 'NER提取的命名实体'),
('NER_RELATION', 'NER关系', '实体间的关系')
ON DUPLICATE KEY UPDATE type_name = VALUES(type_name);
-- 2. 如果有edge_types表,添加新类型
INSERT INTO edge_types (type_code, type_name, description) VALUES
('HAS_NER_ENTITY', '包含NER实体', '附件包含的NER实体'),
('ENTITY_RELATION', '实体关系', '实体之间的语义关系'),
('ENTITY_TO_VALUE', '实体到值', '实体映射到要素值')
ON DUPLICATE KEY UPDATE type_name = VALUES(type_name);
-- 3. 添加NER相关索引(如果需要)
-- CREATE INDEX idx_ner_entity_type ON graph_properties(node_id, property_key)
-- WHERE property_key = 'entity_type';
以上代码提供了NER阶段的核心实现框架,可以根据实际需求进行调整和扩展。