|
|
@@ -44,6 +44,7 @@ public class TextChunkService {
|
|
|
log.warn("文本为空,跳过分块: documentId={}", documentId);
|
|
|
return Collections.emptyList();
|
|
|
}
|
|
|
+ log.info("开始文本分块: documentId={}, textLength={}", documentId, text.length());
|
|
|
|
|
|
// 先删除已有的分块
|
|
|
textChunkRepository.deleteByDocumentId(documentId);
|
|
|
@@ -51,6 +52,7 @@ public class TextChunkService {
|
|
|
List<TextChunk> chunks = new ArrayList<>();
|
|
|
int start = 0;
|
|
|
int chunkIndex = 0;
|
|
|
+ int lastReportedChunk = 0;
|
|
|
|
|
|
while (start < text.length()) {
|
|
|
int end = Math.min(start + chunkSize, text.length());
|
|
|
@@ -98,6 +100,13 @@ public class TextChunkService {
|
|
|
if (start >= text.length() || start <= 0) {
|
|
|
break;
|
|
|
}
|
|
|
+
|
|
|
+ // 进度日志:每 20 个分块输出一次
|
|
|
+ if (chunks.size() - lastReportedChunk >= 20) {
|
|
|
+ log.info("文本分块进度: documentId={}, chunks={}, pos={}/{}",
|
|
|
+ documentId, chunks.size(), end, text.length());
|
|
|
+ lastReportedChunk = chunks.size();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
log.info("文档 {} 分块完成,共 {} 块", documentId, chunks.size());
|