diff --git a/config.yaml b/config.yaml index 41ea256..bcade6a 100644 --- a/config.yaml +++ b/config.yaml @@ -200,6 +200,25 @@ directories: - "检查时间" bottom_key_mapping: "检查时间": "collectionTime" + - path: "./察右前旗巴音塔拉中心卫生院" + recognition_type: "BYTLZX" + key_mapping: + "姓名": "name" + "性别": "gender" + "年龄": "age" + "ID": "examId" + "HR": "hr" + "P": "P" + "PR": "pr" + "QRS": "qrs" + "QT/QTC": "qt/qtc" + "P/QRS/T": "pAxle/qrsAxle/tAxle" + "RV5/SV1": "rv5/sv1" + "RV5+SV1": "rv5Sv1" + bottom_key_words: + - "检查时间" + bottom_key_mapping: + "检查时间": "collectionTime" # OCR程序与语言包路径配置 # 新增Tesseract相关配置 @@ -208,7 +227,7 @@ directories: # language 必须,指定语言包 tesseract: - bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe" + bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe" data_path: "./tessdata" # data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata" language: "chi_sim+eng" diff --git a/src/main/java/com/ocr/FolderMonitor.java b/src/main/java/com/ocr/FolderMonitor.java index 5098f13..4256e15 100644 --- a/src/main/java/com/ocr/FolderMonitor.java +++ b/src/main/java/com/ocr/FolderMonitor.java @@ -34,6 +34,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class FolderMonitor { private static final Logger logger = LoggerFactory.getLogger(FolderMonitor.class); @@ -54,11 +56,11 @@ public class FolderMonitor { tessConfig = (Map) configManager.getConfig().get("tesseract"); } String tessdataPathStr = tessConfig != null && tessConfig.containsKey("data_path") - ? tessConfig.get("data_path").toString() - : java.nio.file.Paths.get("tessdata").toAbsolutePath().toString(); + ? tessConfig.get("data_path").toString() + : java.nio.file.Paths.get("tessdata").toAbsolutePath().toString(); String language = tessConfig != null && tessConfig.containsKey("language") - ? tessConfig.get("language").toString() - : "chi_sim+eng"; + ? tessConfig.get("language").toString() + : "chi_sim+eng"; this.tesseract.setDatapath(tessdataPathStr); this.tesseract.setLanguage(language); this.httpClient = HttpClients.createDefault(); @@ -67,17 +69,17 @@ public class FolderMonitor { public Map processImage(Path imagePath) { try { logger.info("开始处理图片: {}", imagePath); - + // 检查文件是否已处理 if (isFileProcessed(imagePath)) { logger.info("文件已处理过,跳过: {}", imagePath); return new HashMap<>(); } - + // 获取图片的完整路径 String imageFullPath = imagePath.toAbsolutePath().toString(); logger.info("图片完整路径: {}", imageFullPath); - + // 根据recognitionType分发识别逻辑 String recognitionType = directoryConfig.getRecognitionType(); Map extractedData; @@ -95,6 +97,9 @@ public class FolderMonitor { case "LQXSD": extractedData = processImageWithLQXSD(imageFullPath); break; + case "BYTLZX": + extractedData = processImageWithBYTLZX(imageFullPath); + break; case "rotate90": extractedData = processImageWithRotate90(imageFullPath); break; @@ -107,59 +112,61 @@ public class FolderMonitor { logger.error("识别逻辑处理异常", e); return new HashMap<>(); } - + // 检查是否缺少必需的关键字 List requiredKeys = configManager.getRequiredKeys(); if (requiredKeys != null && !requiredKeys.isEmpty()) { List missingKeys = new ArrayList<>(); for (String key : requiredKeys) { - if (!extractedData.containsKey(key) || extractedData.get(key) == null || extractedData.get(key).trim().isEmpty()) { + if (!extractedData.containsKey(key) || extractedData.get(key) == null + || extractedData.get(key).trim().isEmpty()) { missingKeys.add(key); } } - + if (!missingKeys.isEmpty()) { logger.warn("图片缺少必需的关键字: {}, 文件路径: {}", missingKeys, imageFullPath); - + // 记录缺少关键字的文件路径 String missingKeyFilesPath = configManager.getMissingKeyFilesPath(); - Files.write(Paths.get(missingKeyFilesPath), - (imageFullPath + "\n").getBytes(StandardCharsets.UTF_8), - java.nio.file.StandardOpenOption.APPEND); - + Files.write(Paths.get(missingKeyFilesPath), + (imageFullPath + "\n").getBytes(StandardCharsets.UTF_8), + java.nio.file.StandardOpenOption.APPEND); + // 记录缺少关键字的识别结果 Map missingKeyResult = new HashMap<>(); missingKeyResult.put("file_path", imageFullPath); - missingKeyResult.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + missingKeyResult.put("process_time", + LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); missingKeyResult.put("missing_keys", missingKeys); missingKeyResult.put("extracted_data", extractedData); - + String missingKeyResultsPath = configManager.getMissingKeyResultsPath(); ObjectMapper mapper = new ObjectMapper(); String jsonResult = mapper.writeValueAsString(missingKeyResult) + "\n"; - Files.write(Paths.get(missingKeyResultsPath), - jsonResult.getBytes(StandardCharsets.UTF_8), - java.nio.file.StandardOpenOption.APPEND); + Files.write(Paths.get(missingKeyResultsPath), + jsonResult.getBytes(StandardCharsets.UTF_8), + java.nio.file.StandardOpenOption.APPEND); } } - + // 创建结果对象 Map resultObject = new HashMap<>(); resultObject.put("file_path", imageFullPath); resultObject.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); resultObject.put("directory", directoryConfig.getPath()); resultObject.put("extracted_data", extractedData); - + logger.info("保存识别结果: {}", resultObject); - + // 保存到当前结果文件 saveToCurrentResults(resultObject); - + // 标记文件为已处理 markFileAsProcessed(imagePath); - + return extractedData; - + } catch (Exception e) { logger.error("处理图片失败: " + imagePath, e); return new HashMap<>(); @@ -171,26 +178,27 @@ public class FolderMonitor { // 执行OCR识别 String result = tesseract.doOCR(new File(imageFullPath)); logger.info("OCR识别结果: {}", result); - + // 处理OCR结果,使用配置文件中的key进行匹配 Map extractedData = new HashMap<>(); String[] lines = result.split("\\r?\\n"); logger.info("OCR结果分割为 {} 行", lines.length); - + Map keyMapping = directoryConfig.getKeyMapping(); logger.info("当前目录的key映射: {}", keyMapping); - + for (String line : lines) { line = line.trim().replaceAll("\\s+", " "); - if (line.isEmpty()) continue; - + if (line.isEmpty()) + continue; + logger.info("处理行: {}", line); - + // 在一行中查找所有key for (Map.Entry entry : keyMapping.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - + if (line.contains(key)) { // 提取key后面的值 String[] parts = line.split(key); @@ -202,7 +210,7 @@ public class FolderMonitor { } } } - + return extractedData; } @@ -211,109 +219,111 @@ public class FolderMonitor { BufferedImage image = ImageIO.read(new File(imageFullPath)); int imageWidth = image.getWidth(); int imageHeight = image.getHeight(); - + // 获取识别区域 DirectoryConfig.RecognitionArea area = directoryConfig.getRecognitionArea(); int startX = (int) (imageWidth * area.getStartX() / 100.0); int startY = (int) (imageHeight * area.getStartY() / 100.0); int width = (int) (imageWidth * area.getWidth() / 100.0); int height = (int) (imageHeight * area.getHeight() / 100.0); - + // 截取识别区域 BufferedImage recognitionArea = image.getSubimage(startX, startY, width, height); - + Map allExtractedData = new HashMap<>(); List splitBlocks = directoryConfig.getSplitBlocks(); - + // 记录分块数量 logger.info("处理图片分块,共 {} 个分块", splitBlocks.size()); - + int currentX = 0; for (int i = 0; i < splitBlocks.size(); i++) { DirectoryConfig.SplitBlock block = splitBlocks.get(i); // 计算当前块的宽度 int blockWidth = (int) (width * block.getWidthPercent() / 100.0); - + // 确保不超出边界 if (currentX + blockWidth > width) { blockWidth = width - currentX; - logger.warn("分块 {} 宽度超出边界,调整为 {} 像素", i+1, blockWidth); + logger.warn("分块 {} 宽度超出边界,调整为 {} 像素", i + 1, blockWidth); } - + if (blockWidth <= 0) { - logger.warn("分块 {} 宽度为0或负值,跳过处理", i+1); + logger.warn("分块 {} 宽度为0或负值,跳过处理", i + 1); continue; } - + // 截取当前块的图片 BufferedImage blockImage = recognitionArea.getSubimage(currentX, 0, blockWidth, height); - + // 保存临时图片 File tempFile = File.createTempFile("block_" + i + "_", ".png"); ImageIO.write(blockImage, "PNG", tempFile); - + // 执行OCR识别 String blockResult = tesseract.doOCR(tempFile); - logger.info("分块 {} OCR识别结果: {}", i+1, blockResult); - + logger.info("分块 {} OCR识别结果: {}", i + 1, blockResult); + // 删除临时文件 tempFile.delete(); - + // 处理当前块的识别结果(使用全新的匹配逻辑) processBlockResult(blockResult, block.getKeyMapping(), allExtractedData); - + currentX += blockWidth; } - + return allExtractedData; } - + /** * 处理一个区块的OCR识别结果 + * * @param blockResult OCR识别结果文本 - * @param keyMapping 关键词映射 - * @param resultMap 结果Map + * @param keyMapping 关键词映射 + * @param resultMap 结果Map */ private void processBlockResult(String blockResult, Map keyMapping, Map resultMap) { if (blockResult == null || blockResult.trim().isEmpty()) { return; } - + // 按行分割OCR结果 String[] lines = blockResult.split("\\r?\\n"); - + // 按关键字长度排序,优先匹配较长的关键字(如"P/QRS/T"优先于"QRS") List> sortedKeys = new ArrayList<>(keyMapping.entrySet()); sortedKeys.sort((e1, e2) -> e2.getKey().length() - e1.getKey().length()); - + // 为每一行寻找匹配的关键字 for (String line : lines) { line = line.trim(); - if (line.isEmpty()) continue; - + if (line.isEmpty()) + continue; + logger.info("处理行: {}", line); - + for (Map.Entry entry : sortedKeys) { String key = entry.getKey(); String fieldName = entry.getValue(); - + if (line.contains(key)) { // 找到关键字,提取后面的值 int keyIndex = line.indexOf(key); int valueStartIndex = keyIndex + key.length(); - + if (valueStartIndex < line.length()) { String value = line.substring(valueStartIndex).trim(); - + // 处理值前面的冒号、空格等 if (value.startsWith(":") || value.startsWith(":")) { value = value.substring(1).trim(); } - + // 存储结果 resultMap.put(fieldName, value); logger.info("找到匹配: {} = {}", fieldName, value); - + // 找到关键字后就不再处理这一行 break; } @@ -330,9 +340,9 @@ public class FolderMonitor { private void markFileAsProcessed(Path imagePath) throws IOException { String processedFilesPath = configManager.getProcessedFilesPath(); - Files.write(Paths.get(processedFilesPath), - (imagePath.toString() + "\n").getBytes(StandardCharsets.UTF_8), - java.nio.file.StandardOpenOption.APPEND); + Files.write(Paths.get(processedFilesPath), + (imagePath.toString() + "\n").getBytes(StandardCharsets.UTF_8), + java.nio.file.StandardOpenOption.APPEND); } private void saveToCurrentResults(Map resultObject) throws IOException { @@ -340,18 +350,18 @@ public class FolderMonitor { // 创建包含单个结果的数组 List> currentResults = new ArrayList<>(); currentResults.add(resultObject); - + // 保存当前结果(覆盖写入) String jsonStr = objectMapper.writerWithDefaultPrettyPrinter() .writeValueAsString(currentResults); - Files.write(Paths.get(currentResultsPath), - jsonStr.getBytes(StandardCharsets.UTF_8)); - + Files.write(Paths.get(currentResultsPath), + jsonStr.getBytes(StandardCharsets.UTF_8)); + logger.info("保存当前识别结果到: {}", currentResultsPath); - + // 合并到历史结果 mergeCurrentResultsToAll(resultObject); - + // 上传当前结果到后端 try { uploadResultsToBackend(currentResultsPath); @@ -363,7 +373,7 @@ public class FolderMonitor { private void mergeCurrentResultsToAll(Map resultObject) throws IOException { String allResultsPath = configManager.getAllResultsPath(); List> allResults; - + // 读取现有结果 if (Files.exists(Paths.get(allResultsPath))) { String content = new String(Files.readAllBytes(Paths.get(allResultsPath))); @@ -371,21 +381,22 @@ public class FolderMonitor { } else { allResults = new ArrayList<>(); } - + // 添加新结果 allResults.add(resultObject); - + // 保存更新后的结果 String jsonStr = objectMapper.writerWithDefaultPrettyPrinter() .writeValueAsString(allResults); - Files.write(Paths.get(allResultsPath), - jsonStr.getBytes(StandardCharsets.UTF_8)); - + Files.write(Paths.get(allResultsPath), + jsonStr.getBytes(StandardCharsets.UTF_8)); + logger.info("合并结果到历史记录: {}", allResultsPath); } /** * 将OCR结果JSON文件上传到后端服务器 + * * @param jsonFilePath JSON文件路径 * @throws IOException IO异常 */ @@ -408,13 +419,16 @@ public class FolderMonitor { org.apache.http.client.methods.HttpPost httpPost = new org.apache.http.client.methods.HttpPost(uploadUrl); httpPost.setHeader("Content-Type", "application/json"); httpPost.setHeader("Accept", "application/json"); - httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"); - httpPost.setEntity(new org.apache.http.entity.StringEntity(jsonContent, java.nio.charset.StandardCharsets.UTF_8)); + httpPost.setHeader("User-Agent", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"); + httpPost.setEntity( + new org.apache.http.entity.StringEntity(jsonContent, java.nio.charset.StandardCharsets.UTF_8)); // 发送请求 try (org.apache.http.client.methods.CloseableHttpResponse response = httpClient.execute(httpPost)) { int statusCode = response.getStatusLine().getStatusCode(); org.apache.http.HttpEntity responseEntity = response.getEntity(); - String responseBody = responseEntity != null ? org.apache.http.util.EntityUtils.toString(responseEntity) : null; + String responseBody = responseEntity != null ? org.apache.http.util.EntityUtils.toString(responseEntity) + : null; if (statusCode >= 200 && statusCode < 300) { logger.info("成功上传结果到后端,状态码: {}, 响应: {}", statusCode, responseBody); } else { @@ -430,8 +444,9 @@ public class FolderMonitor { /** * 通用底部识别方法,便于灵活调用 + * * @param imageFullPath 图片完整路径 - * @param bottomConfig 底部识别配置 + * @param bottomConfig 底部识别配置 * @return 识别结果 */ public Map recognizeBottomArea(String imageFullPath, Map bottomConfig) { @@ -440,106 +455,113 @@ public class FolderMonitor { /** * 处理图片底部区域,识别特定关键字 + * * @param imageFullPath 图片完整路径 - * @param bottomConfig 底部识别配置 + * @param bottomConfig 底部识别配置 * @return 识别结果 */ @SuppressWarnings("unchecked") private Map processImageBottom(String imageFullPath, Map bottomConfig) { Map extractedData = new HashMap<>(); - + try { logger.info("开始处理图片底部区域: {}", imageFullPath); - + // 获取配置参数 int heightPercent = ((Number) bottomConfig.getOrDefault("height_percent", 20)).intValue(); int widthPercent = ((Number) bottomConfig.getOrDefault("width_percent", 100)).intValue(); List keyWords = (List) bottomConfig.getOrDefault("key_words", Collections.emptyList()); - Map keyMapping = (Map) bottomConfig.getOrDefault("key_mapping", Collections.emptyMap()); - + Map keyMapping = (Map) bottomConfig.getOrDefault("key_mapping", + Collections.emptyMap()); + if (keyWords.isEmpty()) { logger.warn("未配置需要识别的关键字,跳过底部识别"); return extractedData; } - + logger.info("底部区域配置 - 高度: {}%, 宽度: {}%, 关键字: {}", heightPercent, widthPercent, keyWords); if (!keyMapping.isEmpty()) { logger.info("使用关键字映射: {}", keyMapping); } - + // 加载图片 BufferedImage image = ImageIO.read(new File(imageFullPath)); int imageWidth = image.getWidth(); int imageHeight = image.getHeight(); - + logger.info("图片尺寸 - 宽度: {}px, 高度: {}px", imageWidth, imageHeight); - + // 计算底部区域 int bottomHeight = (int) (imageHeight * heightPercent / 100.0); int bottomWidth = (int) (imageWidth * widthPercent / 100.0); int startX = (imageWidth - bottomWidth) / 2; // 居中 int startY = imageHeight - bottomHeight; // 底部 - + // 确保不超出边界 - if (startX < 0) startX = 0; - if (startY < 0) startY = 0; - if (bottomWidth > imageWidth) bottomWidth = imageWidth; - if (bottomHeight > imageHeight) bottomHeight = imageHeight; - - logger.info("截取底部区域 - 起始X: {}px, 起始Y: {}px, 宽度: {}px, 高度: {}px", - startX, startY, bottomWidth, bottomHeight); - + if (startX < 0) + startX = 0; + if (startY < 0) + startY = 0; + if (bottomWidth > imageWidth) + bottomWidth = imageWidth; + if (bottomHeight > imageHeight) + bottomHeight = imageHeight; + + logger.info("截取底部区域 - 起始X: {}px, 起始Y: {}px, 宽度: {}px, 高度: {}px", + startX, startY, bottomWidth, bottomHeight); + // 截取底部区域 BufferedImage bottomArea = image.getSubimage(startX, startY, bottomWidth, bottomHeight); - + // 保存临时图片 File tempFile = File.createTempFile("bottom_", ".png"); ImageIO.write(bottomArea, "PNG", tempFile); logger.info("保存底部区域临时图片: {}", tempFile.getAbsolutePath()); - + // 执行OCR识别 String ocrResult = tesseract.doOCR(tempFile); - + // 删除临时文件 tempFile.delete(); - + logger.info("底部区域OCR识别结果: {}", ocrResult); - + // 组合所有行为一个文本块,处理可能跨行的关键字 String combinedText = ocrResult.replaceAll("\\r?\\n", " "); - + // 按行分割OCR结果处理 String[] lines = ocrResult.split("\\r?\\n"); logger.info("底部区域识别到 {} 行文本", lines.length); - + // 先尝试在单行中查找完整的关键字 boolean foundAnyMatch = false; for (String line : lines) { line = line.trim(); - if (line.isEmpty()) continue; - + if (line.isEmpty()) + continue; + logger.info("处理底部行: {}", line); - + // 尝试查找每个关键字 for (String keyWord : keyWords) { // 处理关键字中的空格,创建无空格版本 String keyWordNoSpaces = keyWord.replaceAll("\\s+", ""); - + // 检查原始关键字或无空格版本是否匹配 if (line.contains(keyWord) || line.contains(keyWordNoSpaces)) { // 确定实际匹配的关键字 String matchedKeyWord = line.contains(keyWord) ? keyWord : keyWordNoSpaces; int keyIndex = line.indexOf(matchedKeyWord); int valueStartIndex = keyIndex + matchedKeyWord.length(); - + if (valueStartIndex < line.length()) { String value = line.substring(valueStartIndex).trim(); - + // 处理值前面的冒号、空格等 if (value.startsWith(":") || value.startsWith(":")) { value = value.substring(1).trim(); } - + // 处理日期时间,只保留年月日部分 if (keyWord.contains("时间") || keyWord.contains("日期")) { // 尝试提取年月日 @@ -550,10 +572,10 @@ public class FolderMonitor { value = matcher.group(1); // 只保留年月日 } } - + // 获取映射的字段名(如果有) String fieldName = keyMapping.getOrDefault(keyWord, keyWord); - + // 存储结果 extractedData.put(fieldName, value); logger.info("底部找到匹配: {} => {} = {}", keyWord, fieldName, value); @@ -564,30 +586,30 @@ public class FolderMonitor { } } } - + // 如果单行匹配没有找到任何结果,尝试在组合文本中查找 if (!foundAnyMatch) { logger.info("单行匹配未找到关键字,尝试在组合文本中查找"); for (String keyWord : keyWords) { // 处理关键字中的空格,创建无空格版本 String keyWordNoSpaces = keyWord.replaceAll("\\s+", ""); - + // 检查是否在组合文本中包含关键字(原始或无空格版本) if (combinedText.contains(keyWord) || combinedText.contains(keyWordNoSpaces)) { // 确定实际匹配的关键字 String matchedKeyWord = combinedText.contains(keyWord) ? keyWord : keyWordNoSpaces; int keyIndex = combinedText.indexOf(matchedKeyWord); int valueStartIndex = keyIndex + matchedKeyWord.length(); - + if (valueStartIndex < combinedText.length()) { // 提取冒号后的内容,直到下一个明显的分隔符(如句号或下一个关键字) String remainingText = combinedText.substring(valueStartIndex).trim(); - + // 处理值前面的冒号、空格等 if (remainingText.startsWith(":") || remainingText.startsWith(":")) { remainingText = remainingText.substring(1).trim(); } - + // 查找下一个分隔符位置 int endPos = remainingText.indexOf("_"); if (endPos == -1) { @@ -602,18 +624,17 @@ public class FolderMonitor { if (endPos == -1) { endPos = remainingText.indexOf("."); } - + // 如果没有找到分隔符,尝试限制值的长度 String value; if (endPos > 0) { value = remainingText.substring(0, endPos).trim(); } else { // 提取前30个字符或者全部(如果少于30个字符) - value = remainingText.length() > 30 ? - remainingText.substring(0, 30).trim() : - remainingText.trim(); + value = remainingText.length() > 30 ? remainingText.substring(0, 30).trim() + : remainingText.trim(); } - + // 处理日期时间,只保留年月日部分 if (keyWord.contains("时间") || keyWord.contains("日期")) { // 尝试提取年月日 @@ -624,10 +645,10 @@ public class FolderMonitor { value = matcher.group(1); // 只保留年月日 } } - + // 获取映射的字段名(如果有) String fieldName = keyMapping.getOrDefault(keyWord, keyWord); - + // 存储结果 extractedData.put(fieldName, value); logger.info("底部组合文本找到匹配: {} => {} = {}", keyWord, fieldName, value); @@ -636,10 +657,10 @@ public class FolderMonitor { } } } - + if (!foundAnyMatch) { logger.warn("底部区域未找到任何匹配的关键字"); - + // 特殊处理:尝试查找包含时间格式的行 for (String line : lines) { if (line.matches(".*\\d{4}[-/]\\d{1,2}[-/]\\d{1,2}.*")) { @@ -659,11 +680,11 @@ public class FolderMonitor { } } } - + } catch (Exception e) { logger.error("处理底部区域失败: " + imageFullPath, e); } - + return extractedData; } @@ -675,143 +696,165 @@ public class FolderMonitor { return processImageNormal(imageFullPath); } // 新版CG识别逻辑:按区域裁剪后分别OCR提取字段 -/* - private Map processImageWithCG(String imageFullPath) { - Map extractedData = new HashMap<>(); - try { - BufferedImage image = ImageIO.read(new File(imageFullPath)); - int width = image.getWidth(); - int height = image.getHeight(); - - // 1. 标题区和患者信息区分开裁剪 - int titleHeight = (int) (height * 0.01); // 标题区5% - int infoHeight = (int) (height * 0.10); // 患者信息区10% - // 跳过标题区,只识别患者信息区 - BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight); - String infoOcr = tesseract.doOCR(infoArea); - String[] infoLines = infoOcr.split("\\r?\\n"); - String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)"; - String agePattern = "(\\d+)\\s*[岁%]"; - String idPattern = "[I1l][D][::]?\\s*([A-Za-z0-9]+)"; - for (String line : infoLines) { - line = line.replaceAll("\\s+", " ").trim(); - if (extractedData.get("name") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(namePattern).matcher(line); - if (m.find()) { - extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格 - extractedData.put("gender", m.group(2)); - java.util.regex.Matcher ageM = java.util.regex.Pattern.compile(agePattern).matcher(line); - if (ageM.find()) { - extractedData.put("age", ageM.group(1)); - } - } - } - if (extractedData.get("id") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(idPattern).matcher(line); - if (m.find()) { - extractedData.put("id", m.group(1)); - } - } - } - - // 2. 左侧参数区(提取HR、P、PR、QRS、QT/QTC、P/QRS/T、RV5/SV1) - int paramWidth = (int) (width * 0.32); // 左侧32% - int paramStartY = titleHeight; - int paramHeight = (int) (height * 0.355); // 参数区高度约38% - BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight); - String paramOcr = tesseract.doOCR(paramArea); - String[] paramLines = paramOcr.split("\\r?\\n"); - String hrPattern = "HR\\s*[::.·]?\\s*([\\dOo./]+[bB][pP][mM])"; - String pPattern = "P\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; - String prPattern = "PR\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; - String qrsPattern = "QRS\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; - String qtPattern = "QT/QT[cC]?\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; - String pqrstPattern = "P/QRS/T\\s*[::.·]?\\s*([\\dOo./]+)"; - String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[::.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?"; - for (String line : paramLines) { - line = line.replaceAll("\\s+", " ").trim(); - if (extractedData.get("HR") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); - if (m.find()) { - String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("HR", hrValue); - } - } - if (extractedData.get("P") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); - if (m.find()) { - String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("P", pValue); - } - } - if (extractedData.get("PR") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); - if (m.find()) { - String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("PR", prValue); - } - } - if (extractedData.get("QRS") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); - if (m.find()) { - String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("QRS", qrsValue); - } - } - if (extractedData.get("QT/QTC") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); - if (m.find()) { - String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", ""); - extractedData.put("QT/QTC", qtValue); - } - } - if (extractedData.get("P/QRS/T") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); - if (m.find()) { - String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", ""); - extractedData.put("P/QRS/T", pqrstValue); - } - } - if (extractedData.get("RV5/SV1") == null) { - if (line.toLowerCase().contains("sv1")) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line); - if (m.find()) { - String rv5 = m.group(1).replaceAll("[Oo]", "0"); - String sv1 = m.group(2).replaceAll("[Oo]", "0"); - String rv5sv1Value = rv5 + "/" + sv1; - extractedData.put("RV5/SV1", rv5sv1Value); - - } - } - } - } - - // 3. 底部区域(提取检查时间) - int bottomHeight = (int) (height * 0.05); // 底部12% - int bottomStartY = height - bottomHeight; - BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight); - String bottomOcr = tesseract.doOCR(bottomArea); - String[] bottomLines = bottomOcr.split("\\r?\\n"); - String checkTimePattern = "检查[::]?\\s*([\\d-]+ [\\d:]+)"; - for (String line : bottomLines) { - String lineNoSpace = line.replaceAll("\\s+", ""); - if (extractedData.get("collectionTime") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[::]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace); - if (m.find()) { - String dateTime = m.group(1); - if (!dateTime.contains(" ")) { - dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10); - } - extractedData.put("collectionTime", dateTime); - } - } - } - } catch (Exception e) { - logger.error("processImageWithCG 区域识别异常", e); - } - return extractedData; - } -*/ + /* + * private Map processImageWithCG(String imageFullPath) { + * Map extractedData = new HashMap<>(); + * try { + * BufferedImage image = ImageIO.read(new File(imageFullPath)); + * int width = image.getWidth(); + * int height = image.getHeight(); + * + * // 1. 标题区和患者信息区分开裁剪 + * int titleHeight = (int) (height * 0.01); // 标题区5% + * int infoHeight = (int) (height * 0.10); // 患者信息区10% + * // 跳过标题区,只识别患者信息区 + * BufferedImage infoArea = image.getSubimage(0, titleHeight, width, + * infoHeight); + * String infoOcr = tesseract.doOCR(infoArea); + * String[] infoLines = infoOcr.split("\\r?\\n"); + * String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)"; + * String agePattern = "(\\d+)\\s*[岁%]"; + * String idPattern = "[I1l][D][::]?\\s*([A-Za-z0-9]+)"; + * for (String line : infoLines) { + * line = line.replaceAll("\\s+", " ").trim(); + * if (extractedData.get("name") == null) { + * java.util.regex.Matcher m = + * java.util.regex.Pattern.compile(namePattern).matcher(line); + * if (m.find()) { + * extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格 + * extractedData.put("gender", m.group(2)); + * java.util.regex.Matcher ageM = + * java.util.regex.Pattern.compile(agePattern).matcher(line); + * if (ageM.find()) { + * extractedData.put("age", ageM.group(1)); + * } + * } + * } + * if (extractedData.get("id") == null) { + * java.util.regex.Matcher m = + * java.util.regex.Pattern.compile(idPattern).matcher(line); + * if (m.find()) { + * extractedData.put("id", m.group(1)); + * } + * } + * } + * + * // 2. 左侧参数区(提取HR、P、PR、QRS、QT/QTC、P/QRS/T、RV5/SV1) + * int paramWidth = (int) (width * 0.32); // 左侧32% + * int paramStartY = titleHeight; + * int paramHeight = (int) (height * 0.355); // 参数区高度约38% + * BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, + * paramHeight); + * String paramOcr = tesseract.doOCR(paramArea); + * String[] paramLines = paramOcr.split("\\r?\\n"); + * String hrPattern = "HR\\s*[::.·]?\\s*([\\dOo./]+[bB][pP][mM])"; + * String pPattern = "P\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + * String prPattern = "PR\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + * String qrsPattern = "QRS\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + * String qtPattern = "QT/QT[cC]?\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + * String pqrstPattern = "P/QRS/T\\s*[::.·]?\\s*([\\dOo./]+)"; + * String rv5sv1Pattern = + * "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[::.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?"; + * for (String line : paramLines) { + * line = line.replaceAll("\\s+", " ").trim(); + * if (extractedData.get("HR") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, + * java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + * if (m.find()) { + * String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", + * ""); + * extractedData.put("HR", hrValue); + * } + * } + * if (extractedData.get("P") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, + * java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + * if (m.find()) { + * String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + * extractedData.put("P", pValue); + * } + * } + * if (extractedData.get("PR") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, + * java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + * if (m.find()) { + * String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", + * ""); + * extractedData.put("PR", prValue); + * } + * } + * if (extractedData.get("QRS") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, + * java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + * if (m.find()) { + * String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", + * ""); + * extractedData.put("QRS", qrsValue); + * } + * } + * if (extractedData.get("QT/QTC") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, + * java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + * if (m.find()) { + * String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", + * ""); + * extractedData.put("QT/QTC", qtValue); + * } + * } + * if (extractedData.get("P/QRS/T") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, + * java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + * if (m.find()) { + * String pqrstValue = m.group(1).replaceAll("[Oo]", + * "0").replaceAll("[^\\d/degDEG.]", ""); + * extractedData.put("P/QRS/T", pqrstValue); + * } + * } + * if (extractedData.get("RV5/SV1") == null) { + * if (line.toLowerCase().contains("sv1")) { + * java.util.regex.Matcher m = + * java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]"). + * matcher(line); + * if (m.find()) { + * String rv5 = m.group(1).replaceAll("[Oo]", "0"); + * String sv1 = m.group(2).replaceAll("[Oo]", "0"); + * String rv5sv1Value = rv5 + "/" + sv1; + * extractedData.put("RV5/SV1", rv5sv1Value); + * + * } + * } + * } + * } + * + * // 3. 底部区域(提取检查时间) + * int bottomHeight = (int) (height * 0.05); // 底部12% + * int bottomStartY = height - bottomHeight; + * BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, + * bottomHeight); + * String bottomOcr = tesseract.doOCR(bottomArea); + * String[] bottomLines = bottomOcr.split("\\r?\\n"); + * String checkTimePattern = "检查[::]?\\s*([\\d-]+ [\\d:]+)"; + * for (String line : bottomLines) { + * String lineNoSpace = line.replaceAll("\\s+", ""); + * if (extractedData.get("collectionTime") == null) { + * java.util.regex.Matcher m = java.util.regex.Pattern.compile( + * "检查[::]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace) + * ; + * if (m.find()) { + * String dateTime = m.group(1); + * if (!dateTime.contains(" ")) { + * dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10); + * } + * extractedData.put("collectionTime", dateTime); + * } + * } + * } + * } catch (Exception e) { + * logger.error("processImageWithCG 区域识别异常", e); + * } + * return extractedData; + * } + */ // 新版CG识别逻辑:按区域裁剪后分别OCR提取字段 private Map processImageWithCG(String imageFullPath) { @@ -823,7 +866,7 @@ public class FolderMonitor { // 1. 标题区和患者信息区分开裁剪 int titleHeight = (int) (height * 0.01); // 标题区5% - int infoHeight = (int) (height * 0.10); // 患者信息区10% + int infoHeight = (int) (height * 0.10); // 患者信息区10% // 跳过标题区,只识别患者信息区 BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight); String infoOcr = tesseract.doOCR(infoArea); @@ -870,42 +913,48 @@ public class FolderMonitor { for (String line : paramLines) { line = line.replaceAll("\\s+", " ").trim(); if (extractedData.get("hr") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); extractedData.put("hr", hrValue); } } if (extractedData.get("p") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); extractedData.put("p", pValue); } } if (extractedData.get("pr") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); extractedData.put("pr", prValue); } } if (extractedData.get("qrs") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); extractedData.put("qrs", qrsValue); } } if (extractedData.get("qt/qtc") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", ""); extractedData.put("qt/qtc", qtValue); } } if (extractedData.get("pAxle/qrsAxle/tAxle") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", ""); extractedData.put("pAxle/qrsAxle/tAxle", pqrstValue); @@ -913,13 +962,14 @@ public class FolderMonitor { } if (extractedData.get("rv5/sv1") == null) { if (line.toLowerCase().contains("sv1")) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line); if (m.find()) { String rv5 = m.group(1).replaceAll("[Oo]", "0"); String sv1 = m.group(2).replaceAll("[Oo]", "0"); String rv5sv1Value = rv5 + "/" + sv1; extractedData.put("rv5/sv1", rv5sv1Value); - + // 计算rv5Sv1的值(rv5和sv1的和) try { double rv5Value = Double.parseDouble(rv5); @@ -944,7 +994,8 @@ public class FolderMonitor { for (String line : bottomLines) { String lineNoSpace = line.replaceAll("\\s+", ""); if (extractedData.get("collectionTime") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[::]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace); + java.util.regex.Matcher m = java.util.regex.Pattern + .compile("检查[::]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace); if (m.find()) { String dateTime = m.group(1); if (!dateTime.contains(" ")) { @@ -991,8 +1042,8 @@ public class FolderMonitor { extractedData.put("name", name); } else { // 如果正则匹配失败,尝试直接提取 - if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名") || - line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) { + if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名") || + line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) { // 统一处理各种可能的姓名标识 String processedLine = line.replaceAll("[姓姊]\\s*名\\s*[::]?", "姓名:"); String[] parts = processedLine.split("姓名:"); @@ -1011,7 +1062,7 @@ public class FolderMonitor { nextFieldIndex = afterName.indexOf("性 别"); } } - + if (nextFieldIndex > 0) { String name = afterName.substring(0, nextFieldIndex) .replaceAll("[::]", "") @@ -1050,7 +1101,7 @@ public class FolderMonitor { String datePattern = "日\\s*期\\s*[::]?\\s*(\\d{4}[-/]\\d{1,2}[-/]\\d{1,2})"; // 预处理行,处理字符间可能有空格的情况 String processedLine = line.replaceAll("\\s+", ""); - + // 先尝试原始行 java.util.regex.Matcher m = java.util.regex.Pattern.compile(datePattern).matcher(line); if (m.find()) { @@ -1070,7 +1121,7 @@ public class FolderMonitor { // 2. 参数区(底部1/3,适配新模板) int paramStartY = (int) (height * 0.81); int paramHeight = height - paramStartY; - BufferedImage paramArea = image.getSubimage(0, paramStartY, width-900, paramHeight); + BufferedImage paramArea = image.getSubimage(0, paramStartY, width - 900, paramHeight); String paramOcr = tesseract.doOCR(paramArea); String[] paramLines = paramOcr.split("\\r?\\n"); String hrPattern = "(?:心率|心亨)[::]?\\s*([\\dOo./]+)"; @@ -1084,7 +1135,7 @@ public class FolderMonitor { String tAxlePattern = "T轴[::]?\\s*([\\dOo.]+)"; String rv5sv1Pattern = "RV5/SV1.*?([\\dOo.]+/[\\dOo.]+|[\\dOo.]+).*?[mM][vVyY]?"; String rv5plusSv1Pattern = "RV5\\+SV1[::]?\\s*([\\dOo./]+)"; - + // 用于存储QT和QTc的值 String qtValue = null; String qtcValue = null; @@ -1093,10 +1144,10 @@ public class FolderMonitor { String pAxleValue = null; String qrsAxleValue = null; String tAxleValue = null; - + // 标记是否已识别为P轴 boolean isPAxleDetected = false; - + for (String line : paramLines) { line = line.replaceAll("\\s+", "").trim(); // HR @@ -1107,7 +1158,7 @@ public class FolderMonitor { extractedData.put("hr", hrValue); } } - + // 处理所有P值 - 先检查是否含有度数符号判断是P轴还是P时限 if (line.contains("P") && line.contains("°")) { // 如果行中同时包含P和度数符号,尝试提取P轴值 @@ -1124,7 +1175,7 @@ public class FolderMonitor { extractedData.put("P", pValue); } } - + // P - 仅当未识别为P轴时才尝试识别为P时限,但明确包含"P时限"的行除外 if ((extractedData.get("P") == null && !isPAxleDetected) || line.contains("P时限")) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line); @@ -1133,16 +1184,17 @@ public class FolderMonitor { extractedData.put("P", pValue); } } - + // P轴 - 检查传统格式,仅当P轴未识别时 if (pAxleValue == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[::]?\\s*([\\dOo.+-]+)").matcher(line); + java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[::]?\\s*([\\dOo.+-]+)") + .matcher(line); if (m.find()) { pAxleValue = m.group(1).replaceAll("[Oo]", "0"); isPAxleDetected = true; // 标记已识别为P轴 } } - + // PR if (extractedData.get("pr") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern).matcher(line); @@ -1200,7 +1252,7 @@ public class FolderMonitor { } } } - + // 处理QT/QTc的值 if (qtValue != null || qtcValue != null) { if (qtValue != null && qtcValue != null) { @@ -1233,51 +1285,223 @@ public class FolderMonitor { extractedData.put("pAxle/qrsAxle/tAxle", axleValue.toString()); } - } catch (Exception e) { logger.error("processImageWithLQXSD 识别异常", e); } return extractedData; } + // 新版察右前旗巴音塔拉中心卫生院识别逻辑:按区域裁剪后分别OCR提取字段 + private Map processImageWithBYTLZX(String imageFullPath) { + Map extractedData = new HashMap<>(); + try { + BufferedImage image = ImageIO.read(new File(imageFullPath)); + int width = image.getWidth(); + int height = image.getHeight(); + + int infoHeight = (int) (height * 0.0427); // 顶部2.5%,你可以根据实际图片微调 + int infoWidth = (int) (width * 0.25); // 宽度25%,只截取左侧1/4 + BufferedImage infoArea = image.getSubimage(0, 0, infoWidth, infoHeight); + String infoOcr = tesseract.doOCR(infoArea); + String[] infoLines = infoOcr.split("\\r?\\n"); + String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)"; + String agePattern = "(\\d+)\\s*[岁%]"; + String idPattern = "[I1l][D][::]?\\s*([A-Za-z0-9]+)"; + for (String line : infoLines) { + line = line.replaceAll("\\s+", " ").trim(); + // 提取ID + Pattern pattern = Pattern.compile("[I1l][D][::]?\\s*([A-Za-z0-9]*)\\s*(男|女)"); + Matcher matcher = pattern.matcher(line); + if (matcher.find()) { + String idValue = matcher.group(1).trim(); + String genderValue = matcher.group(2).trim(); + + if (idValue.isEmpty()) { + idValue = UUID.randomUUID().toString().replace("-", ""); + } + extractedData.put("examId", idValue); + extractedData.put("gender", genderValue); + break; // 找到就不用再循环了 + } + } + + // 2. 左侧参数区(提取HR、P、PR、QRS、QT/QTC、P/QRS/T、RV5/SV1) + int paramWidth = (int) (width * 0.32); // 左侧32% + int paramStartY = 0; + int paramHeight = (int) (height * 0.21); // 参数区高度约38% + BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight); + // 2. 临时保存区域图片,调试用 + // ImageIO.write(paramArea, "png", new File("test_info.png")); + String paramOcr = tesseract.doOCR(paramArea); + String[] paramLines = paramOcr.split("\\r?\\n"); + String hrPattern = "HR\\s*[::.·]?\\s*([\\dOo.]+)\\s*[bB][pP][mM]"; + String pPattern = "P\\s*[::.·]?\\s*([\\dOo.]+)\\s*[mM][sS]"; + String prPattern = "PR\\s*[::.·]?\\s*([\\dOo.]+)\\s*[mM][sS]"; + String qrsPattern = "QRS\\s*[::.·]?\\s*([\\dOo.]+)\\s*[mM][sS]"; + String qtPattern = "(?:QT|T)\\s*[::.·]?\\s*(\\d+)\\s*[\"“]?[mM][sS]"; + String qtcPattern = "[@Q]Tc\\s*[::.·]?\\s*(\\d+)\\s*[^\\d\\s]?[mM][sS]"; + String pqrstPattern = "P/QRS/T\\s*[::.·]?\\s*([\\dOo./]+)"; + String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[::.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?"; + // 临时变量 + String qtValue = null; + String qtcValue = null; + + for (String line : paramLines) { + line = line.replaceAll("\\s+", " ").trim(); + if (extractedData.get("hr") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("hr", hrValue); + } + } + if (extractedData.get("p") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("p", pValue); + } + } + if (extractedData.get("pr") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("pr", prValue); + } + } + if (extractedData.get("qrs") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("qrs", qrsValue); + } + } + // 识别QT + java.util.regex.Matcher mQt = java.util.regex.Pattern + .compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (mQt.find()) { + qtValue = mQt.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", ""); + } + + // 识别QTc + java.util.regex.Matcher mQtc = java.util.regex.Pattern + .compile(qtcPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (mQtc.find()) { + qtcValue = mQtc.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", ""); + } + + // 只保留 qt/qtc + if (qtValue != null && qtcValue != null) { + extractedData.put("qt/qtc", qtValue + "/" + qtcValue); + } else if (qtValue != null) { + extractedData.put("qt/qtc", qtValue); + } else if (qtcValue != null) { + extractedData.put("qt/qtc", qtcValue); + } + + if (extractedData.get("pAxle/qrsAxle/tAxle") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", ""); + extractedData.put("pAxle/qrsAxle/tAxle", pqrstValue); + } + } + if (extractedData.get("rv5/sv1") == null) { + if (line.toLowerCase().contains("sv1")) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line); + if (m.find()) { + String rv5 = m.group(1).replaceAll("[Oo]", "0"); + String sv1 = m.group(2).replaceAll("[Oo]", "0"); + String rv5sv1Value = rv5 + "/" + sv1; + extractedData.put("rv5/sv1", rv5sv1Value); + + // 计算rv5Sv1的值(rv5和sv1的和) + try { + double rv5Value = Double.parseDouble(rv5); + double sv1Value = Double.parseDouble(sv1); + double sum = rv5Value + sv1Value; + extractedData.put("rv5Sv1", String.format("%.3f", sum)); + } catch (NumberFormatException e) { + logger.error("计算rv5Sv1时发生数字格式错误: rv5={}, sv1={}", rv5, sv1, e); + } + } + } + } + } + + // 3. 底部区域(提取检查时间) + int bottomHeight = (int) (height * 0.05); // 底部12% + int bottomStartY = height - bottomHeight; + BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight); + String bottomOcr = tesseract.doOCR(bottomArea); + String[] bottomLines = bottomOcr.split("\\r?\\n"); + String checkTimePattern = "检查[::]?\\s*([\\d-]+ [\\d:]+)"; + for (String line : bottomLines) { + String lineNoSpace = line.replaceAll("\\s+", ""); + if (extractedData.get("collectionTime") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern + .compile("检查[::]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace); + if (m.find()) { + String dateTime = m.group(1); + if (!dateTime.contains(" ")) { + dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10); + } + extractedData.put("collectionTime", dateTime); + } + } + } + } catch (Exception e) { + logger.error("processImageWithCG 区域识别异常", e); + } + return extractedData; + } + /** * 处理图片,将图片逆时针旋转90度 + * * @param imageFullPath 图片完整路径 * @return 识别结果 */ private Map processImageWithRotate90(String imageFullPath) throws Exception { logger.info("使用旋转90度识别逻辑处理: {}", imageFullPath); - + // 读取原始图像 BufferedImage originalImage = ImageIO.read(new File(imageFullPath)); - + // 创建一个旋转后的图像(宽高交换) int width = originalImage.getWidth(); int height = originalImage.getHeight(); BufferedImage rotatedImage = new BufferedImage(height, width, originalImage.getType()); - + // 执行逆时针旋转90度操作 Graphics2D g2d = rotatedImage.createGraphics(); g2d.translate(height, 0); g2d.rotate(Math.PI / 2); g2d.drawImage(originalImage, 0, 0, null); g2d.dispose(); - + // 将旋转后的图片保存回原始位置 // ImageIO.write(rotatedImage, "PNG", new File(imageFullPath)); // logger.info("已将图片逆时针旋转90度并保存: {}", imageFullPath); - + // 在旋转后的图片上执行OCR识别 String result = tesseract.doOCR(new File(imageFullPath)); logger.info("旋转90度后OCR识别结果: {}", result); - + // 处理OCR结果 return processOcrResult(result, directoryConfig.getKeyMapping()); } - + /** * 处理OCR识别结果 - * @param ocrResult OCR结果文本 + * + * @param ocrResult OCR结果文本 * @param keyMapping 关键词映射 * @return 提取的数据 */ @@ -1285,52 +1509,52 @@ public class FolderMonitor { Map extractedData = new HashMap<>(); String[] lines = ocrResult.split("\\r?\\n"); logger.info("OCR结果分割为 {} 行", lines.length); - + // 处理第一行 - 整行处理获取时间等信息 if (lines.length > 0) { String firstLine = lines[0].trim().replaceAll("\\s+", " "); if (!firstLine.isEmpty()) { logger.info("处理第一行(整行): {}", firstLine); - + // 创建时间相关映射 Map timeMapping = new HashMap<>(); for (Map.Entry entry : keyMapping.entrySet()) { - if (entry.getValue().contains("time") || entry.getValue().contains("Time") || - entry.getValue().contains("日期") || entry.getValue().contains("collectionTime")) { + if (entry.getValue().contains("time") || entry.getValue().contains("Time") || + entry.getValue().contains("日期") || entry.getValue().contains("collectionTime")) { timeMapping.put(entry.getKey(), entry.getValue()); } } - + processLine(firstLine, timeMapping, extractedData); } } - + // 处理第二行 - 主要提取ID if (lines.length > 1) { String secondLine = lines[1].trim().replaceAll("\\s+", " "); if (!secondLine.isEmpty()) { logger.info("处理第二行(主要提取ID): {}", secondLine); - + // 针对ID创建映射 Map idMapping = new HashMap<>(); for (Map.Entry entry : keyMapping.entrySet()) { - if (entry.getValue().contains("id") || entry.getValue().contains("Id") || - entry.getValue().equals("ID") || entry.getValue().equals("examId")) { + if (entry.getValue().contains("id") || entry.getValue().contains("Id") || + entry.getValue().equals("ID") || entry.getValue().equals("examId")) { idMapping.put(entry.getKey(), entry.getValue()); } } - + // 处理ID提取 processLine(secondLine, idMapping, extractedData); } } - + // 处理第三行 - 同时提取姓名、性别和年龄 if (lines.length > 2) { String thirdLine = lines[2].trim().replaceAll("\\s+", " "); if (!thirdLine.isEmpty()) { logger.info("处理第三行(提取姓名、性别和年龄): {}", thirdLine); - + // 提取姓名 String nameField = null; for (Map.Entry entry : keyMapping.entrySet()) { @@ -1339,28 +1563,30 @@ public class FolderMonitor { break; } } - + if (nameField != null && thirdLine.contains("姓") && thirdLine.contains("名")) { // 通过"姓名"标记提取姓名,修改正则表达式以匹配包含引号的情况 String namePattern = "姓\\s*名\\s*[::]?\\s*[\"\\s]*([^\\d年龄]{2,8})"; java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(namePattern); java.util.regex.Matcher matcher = pattern.matcher(thirdLine); - + if (matcher.find()) { String name = matcher.group(1).replaceAll("\\s+", ""); // 去除名字中的引号字符 - name = name.replaceAll("\"", "") // 双引号 - .replaceAll("\u201C", "") // 左双引号 - .replaceAll("\u201D", ""); // 右双引号 + name = name.replaceAll("\"", "") // 双引号 + .replaceAll("\u201C", "") // 左双引号 + .replaceAll("\u201D", ""); // 右双引号 extractedData.put(nameField, name); logger.info("从第三行提取姓名: {}", name); } else { // 尝试使用另一种提取方法 int nameStart = thirdLine.indexOf("姓名"); - if (nameStart < 0) nameStart = thirdLine.indexOf("姓 名"); + if (nameStart < 0) + nameStart = thirdLine.indexOf("姓 名"); int ageStart = thirdLine.indexOf("年龄"); - if (ageStart < 0) ageStart = thirdLine.indexOf("年 龄"); - + if (ageStart < 0) + ageStart = thirdLine.indexOf("年 龄"); + if (nameStart >= 0 && ageStart > nameStart) { String nameSection = thirdLine.substring(nameStart + 2, ageStart).trim(); // 清理冒号、引号等 @@ -1370,7 +1596,7 @@ public class FolderMonitor { nameSection = nameSection.replaceAll("\u201D", "").trim(); // 右双引号 // 移除空格 nameSection = nameSection.replaceAll("\\s+", ""); - + if (!nameSection.isEmpty()) { extractedData.put(nameField, nameSection); logger.info("通过分割提取姓名: {}", nameSection); @@ -1378,34 +1604,35 @@ public class FolderMonitor { } } } - + // 针对性别和年龄创建特定的映射 Map genderAgeMapping = new HashMap<>(); for (Map.Entry entry : keyMapping.entrySet()) { if (entry.getValue().contains("gender") || entry.getValue().contains("性别") || - entry.getValue().contains("age") || entry.getValue().contains("年龄")) { + entry.getValue().contains("age") || entry.getValue().contains("年龄")) { genderAgeMapping.put(entry.getKey(), entry.getValue()); } } - + // 提取性别和年龄 processLine(thirdLine, genderAgeMapping, extractedData); - + // 如果姓名未提取成功,尝试手动提取 if (!extractedData.keySet().stream().anyMatch(k -> k.contains("name") || k.contains("姓名"))) { // 针对特定的行结构:"姓 名 : " 秦 浣 彷 年 龄 : 27" - if (thirdLine.contains("姓") && thirdLine.contains("名") && thirdLine.contains("年") && thirdLine.contains("龄")) { + if (thirdLine.contains("姓") && thirdLine.contains("名") && thirdLine.contains("年") + && thirdLine.contains("龄")) { // 获取"姓名"和"年龄"之间的内容 int nameStart = thirdLine.indexOf("名"); int ageStart = thirdLine.indexOf("年龄"); - + if (nameStart >= 0 && ageStart > nameStart) { String nameSection = thirdLine.substring(nameStart + 1, ageStart).trim(); // 清理冒号、引号等 nameSection = nameSection.replaceAll("[::\"]", "").trim(); // 移除空格 nameSection = nameSection.replaceAll("\\s+", ""); - + if (!nameSection.isEmpty() && nameField != null) { extractedData.put(nameField, nameSection); logger.info("通过位置提取姓名: {}", nameSection); @@ -1415,26 +1642,28 @@ public class FolderMonitor { } } } - + // 处理剩余行 - 使用完整的关键字映射 for (int i = 3; i < lines.length; i++) { String line = lines[i].trim().replaceAll("\\s+", " "); - if (line.isEmpty()) continue; - - logger.info("处理行 {}: {}", i+1, line); + if (line.isEmpty()) + continue; + + logger.info("处理行 {}: {}", i + 1, line); processLine(line, keyMapping, extractedData); } - + // 确保所有提取到的数据都被存入结果中 logger.info("提取结果: {}", extractedData); - + return extractedData; } - + /** * 处理单行文本,提取关键字和对应的值 - * @param line 要处理的文本行 - * @param keyMapping 关键字映射 + * + * @param line 要处理的文本行 + * @param keyMapping 关键字映射 * @param extractedData 用于存储提取的数据的Map */ private void processLine(String line, Map keyMapping, Map extractedData) { @@ -1442,18 +1671,18 @@ public class FolderMonitor { for (Map.Entry entry : keyMapping.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - + if (line.contains(key)) { // 提取key后面的值 String[] parts = line.split(key); if (parts.length > 1) { String extractedValue = parts[1].trim(); - + // 清除值前面的冒号和空格 if (extractedValue.startsWith(":") || extractedValue.startsWith(":")) { extractedValue = extractedValue.substring(1).trim(); } - + // 根据字段类型进行特殊处理 if (value.contains("id") || value.contains("Id") || value.equals("ID") || value.equals("examId")) { // 对ID进行处理,只保留数字 @@ -1492,10 +1721,11 @@ public class FolderMonitor { } } } - } else if (value.contains("collectionTime") || value.contains("exam_time") || value.contains("time") || value.contains("日期")) { + } else if (value.contains("collectionTime") || value.contains("exam_time") || value.contains("time") + || value.contains("日期")) { // 处理日期时间格式 extractedValue = extractedValue.replaceAll("\\s+", " "); - + // 使用-分割字符串 String[] dateParts = extractedValue.split("-"); if (dateParts.length >= 3) { @@ -1507,7 +1737,7 @@ public class FolderMonitor { year = currentYear; logger.info("年份大于当前年份,使用当前年份: {}", year); } - + // 处理月份 int month = Integer.parseInt(dateParts[1]); if (month > 12) { @@ -1518,13 +1748,14 @@ public class FolderMonitor { logger.info("月份大于12,取后两位: {}", month); } } - + // 处理日期(取第一部分) String dayPart = dateParts[2].split("\\s+")[0]; // 只取日期部分,去掉时间 int day = Integer.parseInt(dayPart); - + // 日期有效性检查 - if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1 && day <= 31) { + if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1 + && day <= 31) { extractedValue = String.format("%04d-%02d-%02d", year, month, day); logger.info("格式化后的日期: {}", extractedValue); } else { @@ -1539,22 +1770,22 @@ public class FolderMonitor { } else if (value.contains("name") || value.contains("姓名")) { // 处理姓名,去除多余的空格,并验证是否为有效姓名 extractedValue = extractedValue.replaceAll("\\s+", ""); - + // 检查姓名是否包含"医院"、"卫生院"等机构名称,如果包含则可能是误提取 - if (extractedValue.contains("医院") || extractedValue.contains("卫生院") || - extractedValue.contains("诊所") || extractedValue.contains("中心")) { + if (extractedValue.contains("医院") || extractedValue.contains("卫生院") || + extractedValue.contains("诊所") || extractedValue.contains("中心")) { // 这可能是医院名称而不是患者姓名,不进行存储 logger.warn("疑似将机构名称误识别为姓名: {}", extractedValue); continue; // 跳过这个字段,不添加到结果中 } - + // 检查姓名长度,正常中文姓名长度为2-4个字符 if (extractedValue.length() > 10) { logger.warn("疑似姓名长度异常: {}", extractedValue); continue; // 跳过这个字段,不添加到结果中 } } - + extractedData.put(value, extractedValue); logger.info("找到匹配: {} = {}", value, extractedValue); } @@ -1564,27 +1795,29 @@ public class FolderMonitor { /** * 处理已提取的时间格式 + * * @param extractedData 已提取的数据 */ private void processTimeFields(Map extractedData) { // 处理已提取的时间格式 logger.info("开始处理时间字段,当前提取数据: {}", extractedData); - + for (Map.Entry entry : extractedData.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - - if (key.contains("time") || key.contains("Time") || key.contains("collection") || key.contains("collectionTime") || - key.contains("时间") || key.contains("日期")) { + + if (key.contains("time") || key.contains("Time") || key.contains("collection") + || key.contains("collectionTime") || + key.contains("时间") || key.contains("日期")) { logger.info("处理时间字段: {} = {}", key, value); // 处理日期时间格式 value = value.replaceAll("\\s+", " "); - + // 识别数字格式,尝试提取年月日和时分秒 String dateTimePattern = "(\\d{4})[-/]?(\\d{1,2})[-/]?(\\d{1,2})\\s*(\\d{1,2})[:\\s]?(\\d{1,2})[:\\s]?(\\d{1,2})"; java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(dateTimePattern); java.util.regex.Matcher matcher = pattern.matcher(value); - + if (matcher.find()) { try { int year = Integer.parseInt(matcher.group(1)); @@ -1593,13 +1826,13 @@ public class FolderMonitor { int hour = Integer.parseInt(matcher.group(4)); int minute = Integer.parseInt(matcher.group(5)); int second = Integer.parseInt(matcher.group(6)); - - logger.info("原始日期时间值: 年={}, 月={}, 日={}, 时={}, 分={}, 秒={}", - year, month, day, hour, minute, second); - + + logger.info("原始日期时间值: 年={}, 月={}, 日={}, 时={}, 分={}, 秒={}", + year, month, day, hour, minute, second); + // 获取当前年份 int currentYear = java.time.LocalDate.now().getYear(); - + // 修正明显错误的日期值 // 如果年份大于当前年份,使用当前年份 if (year > currentYear) { @@ -1607,7 +1840,7 @@ public class FolderMonitor { year = currentYear; logger.info("修正日期中的年份为当前年份: {}", year); } - + // 如果月份大于12,取后两位 if (month > 12) { logger.info("月份 {} 无效,开始修正", month); @@ -1630,30 +1863,30 @@ public class FolderMonitor { } logger.info("修正后的月份: {}", month); } - + if (day > 31) { logger.info("日期 {} 无效,开始修正", day); int newDay = day % 100; // 对于识别错误,尝试取后两位 logger.info("尝试取模100修正日期: {} -> {}", day, newDay); day = newDay; - + if (day > 31) { newDay = day % 10; logger.info("日期仍然无效,取模10: {} -> {}", day, newDay); day = newDay; } - + logger.info("修正后的日期: {}", day); } - + // 日期有效性检查 if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1 && day <= 31) { String formattedDate = String.format("%04d-%02d-%02d", year, month, day); logger.info("字段 {} 的日期已修正: {} -> {}", key, value, formattedDate); extractedData.put(key, formattedDate); } else { - logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}", - key, year, month, day); + logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}", + key, year, month, day); } } catch (NumberFormatException e) { logger.warn("日期时间解析错误: {}, 异常: {}", value, e.getMessage()); @@ -1664,18 +1897,18 @@ public class FolderMonitor { String datePattern = "(\\d{4})[-/]?(\\d{1,2})[-/]?(\\d{1,2})"; pattern = java.util.regex.Pattern.compile(datePattern); matcher = pattern.matcher(value); - + if (matcher.find()) { try { int year = Integer.parseInt(matcher.group(1)); int month = Integer.parseInt(matcher.group(2)); int day = Integer.parseInt(matcher.group(3)); - + logger.info("仅日期部分匹配: 年={}, 月={}, 日={}", year, month, day); - + // 获取当前年份 int currentYear = java.time.LocalDate.now().getYear(); - + // 修正明显错误的日期值 if (month > 12) { logger.info("月份 {} 无效,尝试修正", month); @@ -1688,22 +1921,22 @@ public class FolderMonitor { int newDay = day % 100; // 对于识别错误,尝试取后两位 logger.info("取模100修正日期: {} -> {}", day, newDay); day = newDay; - + if (day > 31) { newDay = day % 10; logger.info("日期仍然无效,取模10: {} -> {}", day, newDay); day = newDay; } } - + // 日期有效性检查 if (year > 1900 && year < 2100 && month >= 1 && month <= 12 && day >= 1 && day <= 31) { String formattedDate = String.format("%04d-%02d-%02d", year, month, day); logger.info("字段 {} 的日期已修正: {} -> {}", key, value, formattedDate); extractedData.put(key, formattedDate); } else { - logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}", - key, year, month, day); + logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}", + key, year, month, day); } } catch (NumberFormatException e) { logger.warn("日期解析错误: {}, 异常: {}", value, e.getMessage()); @@ -1714,7 +1947,7 @@ public class FolderMonitor { } } } - + logger.info("时间字段处理完成,处理后数据: {}", extractedData); }