|
|
@@ -316,9 +316,33 @@ def parse_electromagnetic_detection_record(markdown_content: str) -> Electromagn
|
|
|
|
|
|
# 使用集合跟踪已添加的测点编号,避免重复添加(处理跨页重复的情况)
|
|
|
seen_codes = set()
|
|
|
-
|
|
|
+
|
|
|
+ def get_address_continuation(row: List[str]) -> str:
|
|
|
+ """首列为空时,取第一个像地名的非空格(与上一条合并用)。"""
|
|
|
+ for i in range(1, min(4, len(row))):
|
|
|
+ cell = (row[i] or "").strip()
|
|
|
+ if not cell:
|
|
|
+ continue
|
|
|
+ # 像地名:含中文,且不是纯数字/时间
|
|
|
+ if re.search(r"[\u4e00-\u9fa5]", cell) and not re.match(r"^[\d.\-:\s]+$", cell):
|
|
|
+ return cell
|
|
|
+ return ""
|
|
|
+
|
|
|
for table in tables:
|
|
|
for row in table:
|
|
|
+ # 首列为空且第二列(或其后)有地名类内容:视为上一条的监测地点续行,合并到上一条
|
|
|
+ first_cell = (row[0] or "").strip() if len(row) > 0 else ""
|
|
|
+ if not first_cell and record.electricMagnetic:
|
|
|
+ continuation = get_address_continuation(row)
|
|
|
+ if continuation:
|
|
|
+ last_em = record.electricMagnetic[-1]
|
|
|
+ last_em.address = (last_em.address or "").strip()
|
|
|
+ if last_em.address:
|
|
|
+ last_em.address = last_em.address + " " + continuation
|
|
|
+ else:
|
|
|
+ last_em.address = continuation
|
|
|
+ logger.debug(f"[电磁检测] 监测地点续行合并: ... + '{continuation}' -> {last_em.address}")
|
|
|
+ continue
|
|
|
if is_valid_data_row(row):
|
|
|
code = row[0].strip() if row[0] else ""
|
|
|
|