建陵卫生院

2025-05-15 09:55:57 +08:00 · 2025-05-15 09:55:57 +08:00 · dbac0cc964
commit dbac0cc964
parent 937e90c621
4 changed files with 585 additions and 42 deletions
--- a/config.yaml
+++ b/config.yaml
@ -5,7 +5,7 @@ output:
  processed_files: "./ocr_results/processed_files.txt"  # 已处理文件列表

 # 后端接口配置
-# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
+#upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
 upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData

 # 底部识别配置
@ -141,6 +141,20 @@ directories:
        key_mapping:
          "医师": "doctor"
          "结论": "conclusion"
+          
+  - path: "./建陵卫生院"  # 需要旋转90度的图片目录
+    recognition_type: "rotate90"  # 新增旋转类型
+    recognition_area:  # 添加旋转后的识别区域
+      start_x: 0
+      start_y: 0
+      width: 100  # 宽度百分比
+      height: 25  # 高度百分比
+    key_mapping:
+      "ID": "examId"
+      "姓 名": "name"
+      "年 龄": "age"
+      "性 别": "gender"
+      "时 间": "collectionTime"

 # OCR程序与语言包路径配置

--- a/src/main/java/com/ocr/ConfigManager.java
+++ b/src/main/java/com/ocr/ConfigManager.java
@ -71,7 +71,6 @@ public class ConfigManager {
            // 检查是否是分块模式
            if (dir.containsKey("recognition_area")) {
                Map<String, Integer> recognitionArea = (Map<String, Integer>) dir.get("recognition_area");
-                List<Map<String, Object>> splitBlocks = (List<Map<String, Object>>) dir.get("split_blocks");
                
                // 创建识别区域对象
                DirectoryConfig.RecognitionArea area = new DirectoryConfig.RecognitionArea(
@ -81,48 +80,88 @@ public class ConfigManager {
                    recognitionArea.get("height")
                );
                
-                // 创建切割块列表
-                List<DirectoryConfig.SplitBlock> blocks = new ArrayList<>();
-                int totalWidth = 0;
-                
-                // 计算总宽度，确保所有块的宽度总和为100%
-                for (Map<String, Object> block : splitBlocks) {
-                    totalWidth += (Integer) block.get("width_percent");
-                }
-                
-                if (totalWidth != 100) {
-                    logger.warn("目录 {} 的分块宽度总和为 {}%，不等于100%，将进行自动调整", path, totalWidth);
-                }
-                
-                // 创建分块配置
-                totalWidth = 0;
-                for (int i = 0; i < splitBlocks.size(); i++) {
-                    Map<String, Object> block = splitBlocks.get(i);
-                    int widthPercent = (Integer) block.get("width_percent");
-                    Map<String, String> keyMapping = (Map<String, String>) block.get("key_mapping");
-                    
-                    // 如果是最后一个块且总宽度不足100%，调整宽度
-                    if (i == splitBlocks.size() - 1 && totalWidth + widthPercent != 100) {
-                        widthPercent = 100 - totalWidth;
-                        logger.info("调整最后一个分块的宽度为 {}%，确保总宽度为100%", widthPercent);
+                // 根据识别类型处理
+                if ("rotate90".equals(recognitionType)) {
+                    // 旋转类型不需要分块
+                    Map<String, String> keyMapping = (Map<String, String>) dir.get("key_mapping");
+                    DirectoryConfig dirConfig;
+                    if (bottomKeyWords != null && bottomKeyMapping != null) {
+                        dirConfig = new DirectoryConfig(path, keyMapping, bottomKeyWords, bottomKeyMapping);
+                    } else if (bottomKeyWords != null) {
+                        dirConfig = new DirectoryConfig(path, keyMapping, bottomKeyWords);
+                    } else {
+                        dirConfig = new DirectoryConfig(path, keyMapping);
+                    }
+                    dirConfig.setRecognitionType(recognitionType);
+                    // 设置识别区域
+                    dirConfig.setRecognitionArea(area);
+                    configs.add(dirConfig);
+                    logger.info("加载旋转识别目录配置: {}", path);
+                } else {
+                    // 处理分块模式
+                    List<Map<String, Object>> splitBlocks = (List<Map<String, Object>>) dir.get("split_blocks");
+                    if (splitBlocks == null) {
+                        logger.warn("目录 {} 配置了recognition_area但没有split_blocks，将使用整个区域处理", path);
+                        // 如果没有分块配置，仍然使用普通模式
+                        Map<String, String> keyMapping = (Map<String, String>) dir.get("key_mapping");
+                        DirectoryConfig dirConfig;
+                        if (bottomKeyWords != null && bottomKeyMapping != null) {
+                            dirConfig = new DirectoryConfig(path, keyMapping, bottomKeyWords, bottomKeyMapping);
+                        } else if (bottomKeyWords != null) {
+                            dirConfig = new DirectoryConfig(path, keyMapping, bottomKeyWords);
+                        } else {
+                            dirConfig = new DirectoryConfig(path, keyMapping);
+                        }
+                        dirConfig.setRecognitionType(recognitionType);
+                        dirConfig.setRecognitionArea(area);
+                        configs.add(dirConfig);
+                        logger.info("加载区域识别目录配置: {}", path);
+                        continue;
                    }
                    
-                    blocks.add(new DirectoryConfig.SplitBlock(widthPercent, keyMapping));
-                    totalWidth += widthPercent;
+                    // 创建切割块列表
+                    List<DirectoryConfig.SplitBlock> blocks = new ArrayList<>();
+                    int totalWidth = 0;
+                    
+                    // 计算总宽度，确保所有块的宽度总和为100%
+                    for (Map<String, Object> block : splitBlocks) {
+                        totalWidth += (Integer) block.get("width_percent");
+                    }
+                    
+                    if (totalWidth != 100) {
+                        logger.warn("目录 {} 的分块宽度总和为 {}%，不等于100%，将进行自动调整", path, totalWidth);
+                    }
+                    
+                    // 创建分块配置
+                    totalWidth = 0;
+                    for (int i = 0; i < splitBlocks.size(); i++) {
+                        Map<String, Object> block = splitBlocks.get(i);
+                        int widthPercent = (Integer) block.get("width_percent");
+                        Map<String, String> keyMapping = (Map<String, String>) block.get("key_mapping");
+                        
+                        // 如果是最后一个块且总宽度不足100%，调整宽度
+                        if (i == splitBlocks.size() - 1 && totalWidth + widthPercent != 100) {
+                            widthPercent = 100 - totalWidth;
+                            logger.info("调整最后一个分块的宽度为 {}%，确保总宽度为100%", widthPercent);
+                        }
+                        
+                        blocks.add(new DirectoryConfig.SplitBlock(widthPercent, keyMapping));
+                        totalWidth += widthPercent;
+                    }
+                    
+                    // 创建目录配置（带有或不带有底部关键字和映射）
+                    DirectoryConfig dirConfig;
+                    if (bottomKeyWords != null && bottomKeyMapping != null) {
+                        dirConfig = new DirectoryConfig(path, area, blocks, bottomKeyWords, bottomKeyMapping);
+                    } else if (bottomKeyWords != null) {
+                        dirConfig = new DirectoryConfig(path, area, blocks, bottomKeyWords);
+                    } else {
+                        dirConfig = new DirectoryConfig(path, area, blocks);
+                    }
+                    dirConfig.setRecognitionType(recognitionType);
+                    configs.add(dirConfig);
+                    logger.info("加载分块目录配置: {}, 分块数: {}", path, blocks.size());
                }
-                
-                // 创建目录配置（带有或不带有底部关键字和映射）
-                DirectoryConfig dirConfig;
-                if (bottomKeyWords != null && bottomKeyMapping != null) {
-                    dirConfig = new DirectoryConfig(path, area, blocks, bottomKeyWords, bottomKeyMapping);
-                } else if (bottomKeyWords != null) {
-                    dirConfig = new DirectoryConfig(path, area, blocks, bottomKeyWords);
-                } else {
-                    dirConfig = new DirectoryConfig(path, area, blocks);
-                }
-                dirConfig.setRecognitionType(recognitionType);
-                configs.add(dirConfig);
-                logger.info("加载分块目录配置: {}, 分块数: {}", path, blocks.size());
            } else {
                Map<String, String> keyMapping = (Map<String, String>) dir.get("key_mapping");
                
--- a/src/main/java/com/ocr/DirectoryConfig.java
+++ b/src/main/java/com/ocr/DirectoryConfig.java
@ -105,6 +105,10 @@ public class DirectoryConfig {
        return recognitionArea;
    }

+    public void setRecognitionArea(RecognitionArea recognitionArea) {
+        this.recognitionArea = recognitionArea;
+    }
+
    public List<SplitBlock> getSplitBlocks() {
        return splitBlocks;
    }
--- a/src/main/java/com/ocr/FolderMonitor.java
+++ b/src/main/java/com/ocr/FolderMonitor.java
@ -17,6 +17,8 @@ import org.apache.http.util.EntityUtils;

 import javax.imageio.ImageIO;
 import java.awt.image.BufferedImage;
+import java.awt.geom.AffineTransform;
+import java.awt.Graphics2D;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
@ -87,6 +89,9 @@ public class FolderMonitor {
                    case "templateA":
                        extractedData = processImageWithTemplateA(imageFullPath);
                        break;
+                    case "rotate90":
+                        extractedData = processImageWithRotate90(imageFullPath);
+                        break;
                    case "normal":
                    default:
                        extractedData = processImageNormal(imageFullPath);
@ -107,6 +112,9 @@ public class FolderMonitor {
            
            logger.info("添加文件信息 - orgName: {}, ecgDataFilePath: {}", parentFolderName, fileName);
            
+            // 处理已提取的时间格式
+            processTimeFields(extractedData);
+            
            // 检查是否需要进行底部识别
            Map<String, Object> config = configManager.getConfig();
            if (config.containsKey("bottom_recognition")) {
@ -697,6 +705,484 @@ public class FolderMonitor {
        return processImageNormal(imageFullPath);
    }

+    /**
+     * 处理图片，将图片逆时针旋转90度
+     * @param imageFullPath 图片完整路径
+     * @return 识别结果
+     */
+    private Map<String, String> processImageWithRotate90(String imageFullPath) throws Exception {
+        logger.info("使用旋转90度识别逻辑处理: {}", imageFullPath);
+        
+        // 读取原始图像
+        BufferedImage originalImage = ImageIO.read(new File(imageFullPath));
+        
+        // 创建一个旋转后的图像（宽高交换）
+        int width = originalImage.getWidth();
+        int height = originalImage.getHeight();
+        BufferedImage rotatedImage = new BufferedImage(height, width, originalImage.getType());
+        
+        // 执行逆时针旋转90度操作
+        Graphics2D g2d = rotatedImage.createGraphics();
+        g2d.translate(height, 0);
+        g2d.rotate(Math.PI / 2);
+        g2d.drawImage(originalImage, 0, 0, null);
+        g2d.dispose();
+        
+        // 将旋转后的图片保存回原始位置
+        // ImageIO.write(rotatedImage, "PNG", new File(imageFullPath));
+        // logger.info("已将图片逆时针旋转90度并保存: {}", imageFullPath);
+        
+        // 在旋转后的图片上执行OCR识别
+        String result = tesseract.doOCR(new File(imageFullPath));
+        logger.info("旋转90度后OCR识别结果: {}", result);
+        
+        // 处理OCR结果
+        return processOcrResult(result, directoryConfig.getKeyMapping());
+    }
+    
+    /**
+     * 处理OCR识别结果
+     * @param ocrResult OCR结果文本
+     * @param keyMapping 关键词映射
+     * @return 提取的数据
+     */
+    private Map<String, String> processOcrResult(String ocrResult, Map<String, String> keyMapping) {
+        Map<String, String> extractedData = new HashMap<>();
+        String[] lines = ocrResult.split("\\r?\\n");
+        logger.info("OCR结果分割为 {} 行", lines.length);
+        
+        // 处理第一行 - 整行处理获取时间等信息
+        if (lines.length > 0) {
+            String firstLine = lines[0].trim().replaceAll("\\s+", " ");
+            if (!firstLine.isEmpty()) {
+                logger.info("处理第一行(整行): {}", firstLine);
+                
+                // 创建时间相关映射
+                Map<String, String> timeMapping = new HashMap<>();
+                for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
+                    if (entry.getValue().contains("time") || entry.getValue().contains("Time") || 
+                        entry.getValue().contains("日期") || entry.getValue().contains("collectionTime")) {
+                        timeMapping.put(entry.getKey(), entry.getValue());
+                    }
+                }
+                
+                processLine(firstLine, timeMapping, extractedData);
+            }
+        }
+        
+        // 处理第二行 - 主要提取ID
+        if (lines.length > 1) {
+            String secondLine = lines[1].trim().replaceAll("\\s+", " ");
+            if (!secondLine.isEmpty()) {
+                logger.info("处理第二行(主要提取ID): {}", secondLine);
+                
+                // 针对ID创建映射
+                Map<String, String> idMapping = new HashMap<>();
+                for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
+                    if (entry.getValue().contains("id") || entry.getValue().contains("Id") || 
+                        entry.getValue().equals("ID") || entry.getValue().equals("examId")) {
+                        idMapping.put(entry.getKey(), entry.getValue());
+                    }
+                }
+                
+                // 处理ID提取
+                processLine(secondLine, idMapping, extractedData);
+            }
+        }
+        
+        // 处理第三行 - 同时提取姓名、性别和年龄
+        if (lines.length > 2) {
+            String thirdLine = lines[2].trim().replaceAll("\\s+", " ");
+            if (!thirdLine.isEmpty()) {
+                logger.info("处理第三行(提取姓名、性别和年龄): {}", thirdLine);
+                
+                // 提取姓名
+                String nameField = null;
+                for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
+                    if (entry.getValue().contains("name") || entry.getValue().contains("姓名")) {
+                        nameField = entry.getValue();
+                        break;
+                    }
+                }
+                
+                if (nameField != null && thirdLine.contains("姓") && thirdLine.contains("名")) {
+                    // 通过"姓名"标记提取姓名，修改正则表达式以匹配包含引号的情况
+                    String namePattern = "姓\\s*名\\s*[:：]?\\s*[\"\\s]*([^\\d年龄]{2,8})";
+                    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(namePattern);
+                    java.util.regex.Matcher matcher = pattern.matcher(thirdLine);
+                    
+                    if (matcher.find()) {
+                        String name = matcher.group(1).replaceAll("\\s+", "");
+                        // 去除名字中的引号字符
+                        name = name.replaceAll("\"", "")  // 双引号
+                                  .replaceAll("\u201C", "") // 左双引号
+                                  .replaceAll("\u201D", ""); // 右双引号
+                        extractedData.put(nameField, name);
+                        logger.info("从第三行提取姓名: {}", name);
+                    } else {
+                        // 尝试使用另一种提取方法
+                        int nameStart = thirdLine.indexOf("姓名");
+                        if (nameStart < 0) nameStart = thirdLine.indexOf("姓 名");
+                        int ageStart = thirdLine.indexOf("年龄");
+                        if (ageStart < 0) ageStart = thirdLine.indexOf("年 龄");
+                        
+                        if (nameStart >= 0 && ageStart > nameStart) {
+                            String nameSection = thirdLine.substring(nameStart + 2, ageStart).trim();
+                            // 清理冒号、引号等
+                            nameSection = nameSection.replaceAll("[:：]", "").trim();
+                            nameSection = nameSection.replaceAll("\"", "").trim();
+                            nameSection = nameSection.replaceAll("\u201C", "").trim(); // 左双引号
+                            nameSection = nameSection.replaceAll("\u201D", "").trim(); // 右双引号
+                            // 移除空格
+                            nameSection = nameSection.replaceAll("\\s+", "");
+                            
+                            if (!nameSection.isEmpty()) {
+                                extractedData.put(nameField, nameSection);
+                                logger.info("通过分割提取姓名: {}", nameSection);
+                            }
+                        }
+                    }
+                }
+                
+                // 针对性别和年龄创建特定的映射
+                Map<String, String> genderAgeMapping = new HashMap<>();
+                for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
+                    if (entry.getValue().contains("gender") || entry.getValue().contains("性别") ||
+                        entry.getValue().contains("age") || entry.getValue().contains("年龄")) {
+                        genderAgeMapping.put(entry.getKey(), entry.getValue());
+                    }
+                }
+                
+                // 提取性别和年龄
+                processLine(thirdLine, genderAgeMapping, extractedData);
+                
+                // 如果姓名未提取成功，尝试手动提取
+                if (!extractedData.keySet().stream().anyMatch(k -> k.contains("name") || k.contains("姓名"))) {
+                    // 针对特定的行结构："姓 名 : " 秦 浣 彷 年 龄 : 27"
+                    if (thirdLine.contains("姓") && thirdLine.contains("名") && thirdLine.contains("年") && thirdLine.contains("龄")) {
+                        // 获取"姓名"和"年龄"之间的内容
+                        int nameStart = thirdLine.indexOf("名");
+                        int ageStart = thirdLine.indexOf("年龄");
+                        
+                        if (nameStart >= 0 && ageStart > nameStart) {
+                            String nameSection = thirdLine.substring(nameStart + 1, ageStart).trim();
+                            // 清理冒号、引号等
+                            nameSection = nameSection.replaceAll("[:：\"]", "").trim();
+                            // 移除空格
+                            nameSection = nameSection.replaceAll("\\s+", "");
+                            
+                            if (!nameSection.isEmpty() && nameField != null) {
+                                extractedData.put(nameField, nameSection);
+                                logger.info("通过位置提取姓名: {}", nameSection);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        
+        // 处理剩余行 - 使用完整的关键字映射
+        for (int i = 3; i < lines.length; i++) {
+            String line = lines[i].trim().replaceAll("\\s+", " ");
+            if (line.isEmpty()) continue;
+            
+            logger.info("处理行 {}: {}", i+1, line);
+            processLine(line, keyMapping, extractedData);
+        }
+        
+        // 确保所有提取到的数据都被存入结果中
+        logger.info("提取结果: {}", extractedData);
+        
+        return extractedData;
+    }
+    
+    /**
+     * 处理单行文本，提取关键字和对应的值
+     * @param line 要处理的文本行
+     * @param keyMapping 关键字映射
+     * @param extractedData 用于存储提取的数据的Map
+     */
+    private void processLine(String line, Map<String, String> keyMapping, Map<String, String> extractedData) {
+        // 在一行中查找所有key
+        for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
+            String key = entry.getKey();
+            String value = entry.getValue();
+            
+            if (line.contains(key)) {
+                // 提取key后面的值
+                String[] parts = line.split(key);
+                if (parts.length > 1) {
+                    String extractedValue = parts[1].trim();
+                    
+                    // 清除值前面的冒号和空格
+                    if (extractedValue.startsWith(":") || extractedValue.startsWith("：")) {
+                        extractedValue = extractedValue.substring(1).trim();
+                    }
+                    
+                    // 根据字段类型进行特殊处理
+                    if (value.contains("id") || value.contains("Id") || value.equals("ID") || value.equals("examId")) {
+                        // 对ID进行处理，只保留数字
+                        extractedValue = extractedValue.replaceAll("[^0-9]", "");
+                    } else if (value.contains("gender") || value.contains("性别")) {
+                        // 处理性别信息，只保留"男"或"女"
+                        if (extractedValue.contains("男")) {
+                            extractedValue = "男";
+                        } else if (extractedValue.contains("女")) {
+                            extractedValue = "女";
+                        }
+                    } else if (value.contains("age") || value.contains("年龄")) {
+                        // 处理年龄，只保留数字，并确保是有效的年龄值（通常小于120）
+                        if (extractedValue.matches(".*\\d+.*")) {
+                            // 提取第一组连续数字（通常是年龄）
+                            java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("\\d+");
+                            java.util.regex.Matcher matcher = pattern.matcher(extractedValue);
+                            if (matcher.find()) {
+                                String ageStr = matcher.group();
+                                try {
+                                    int age = Integer.parseInt(ageStr);
+                                    // 判断是否是合理的年龄值
+                                    if (age > 0 && age < 120) {
+                                        extractedValue = String.valueOf(age);
+                                    } else {
+                                        // 如果不合理，可能误提取了其他数字，尝试进一步处理
+                                        // 通常年龄是较小的数字，而身高是较大的数字
+                                        if (age > 120 && ageStr.length() >= 3) {
+                                            // 可能包含了身高，尝试只取前两位
+                                            extractedValue = ageStr.substring(0, Math.min(2, ageStr.length()));
+                                            logger.info("年龄值过大，截取前两位: {} -> {}", ageStr, extractedValue);
+                                        }
+                                    }
+                                } catch (NumberFormatException e) {
+                                    logger.warn("年龄解析错误: {}", ageStr);
+                                }
+                            }
+                        }
+                    } else if (value.contains("collectionTime") || value.contains("exam_time") || value.contains("time") || value.contains("日期")) {
+                        // 处理日期时间格式
+                        extractedValue = extractedValue.replaceAll("\\s+", " ");
+                        
+                        // 使用-分割字符串
+                        String[] dateParts = extractedValue.split("-");
+                        if (dateParts.length >= 3) {
+                            try {
+                                // 处理年份
+                                int year = Integer.parseInt(dateParts[0]);
+                                int currentYear = java.time.LocalDate.now().getYear();
+                                if (year > currentYear) {
+                                    year = currentYear;
+                                    logger.info("年份大于当前年份，使用当前年份: {}", year);
+                                }
+                                
+                                // 处理月份
+                                int month = Integer.parseInt(dateParts[1]);
+                                if (month > 12) {
+                                    String monthStr = String.valueOf(month);
+                                    if (monthStr.length() >= 2) {
+                                        // 取最后两位
+                                        month = Integer.parseInt(monthStr.substring(monthStr.length() - 2));
+                                        logger.info("月份大于12，取后两位: {}", month);
+                                    }
+                                }
+                                
+                                // 处理日期（取第一部分）
+                                String dayPart = dateParts[2].split("\\s+")[0]; // 只取日期部分，去掉时间
+                                int day = Integer.parseInt(dayPart);
+                                
+                                // 日期有效性检查
+                                if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1 && day <= 31) {
+                                    extractedValue = String.format("%04d-%02d-%02d", year, month, day);
+                                    logger.info("格式化后的日期: {}", extractedValue);
+                                } else {
+                                    logger.warn("日期无效: 年={}, 月={}, 日={}", year, month, day);
+                                }
+                            } catch (NumberFormatException e) {
+                                logger.warn("日期解析错误: {}", extractedValue);
+                            }
+                        } else {
+                            logger.warn("日期格式不正确: {}", extractedValue);
+                        }
+                    } else if (value.contains("name") || value.contains("姓名")) {
+                        // 处理姓名，去除多余的空格，并验证是否为有效姓名
+                        extractedValue = extractedValue.replaceAll("\\s+", "");
+                        
+                        // 检查姓名是否包含"医院"、"卫生院"等机构名称，如果包含则可能是误提取
+                        if (extractedValue.contains("医院") || extractedValue.contains("卫生院") || 
+                            extractedValue.contains("诊所") || extractedValue.contains("中心")) {
+                            // 这可能是医院名称而不是患者姓名，不进行存储
+                            logger.warn("疑似将机构名称误识别为姓名: {}", extractedValue);
+                            continue; // 跳过这个字段，不添加到结果中
+                        }
+                        
+                        // 检查姓名长度，正常中文姓名长度为2-4个字符
+                        if (extractedValue.length() > 10) {
+                            logger.warn("疑似姓名长度异常: {}", extractedValue);
+                            continue; // 跳过这个字段，不添加到结果中
+                        }
+                    }
+                    
+                    extractedData.put(value, extractedValue);
+                    logger.info("找到匹配: {} = {}", value, extractedValue);
+                }
+            }
+        }
+    }
+
+    /**
+     * 处理已提取的时间格式
+     * @param extractedData 已提取的数据
+     */
+    private void processTimeFields(Map<String, String> extractedData) {
+        // 处理已提取的时间格式
+        logger.info("开始处理时间字段，当前提取数据: {}", extractedData);
+        
+        for (Map.Entry<String, String> entry : extractedData.entrySet()) {
+            String key = entry.getKey();
+            String value = entry.getValue();
+            
+            if (key.contains("time") || key.contains("Time") || key.contains("collection") || key.contains("collectionTime") || 
+                key.contains("时间") || key.contains("日期")) {
+                logger.info("处理时间字段: {} = {}", key, value);
+                // 处理日期时间格式
+                value = value.replaceAll("\\s+", " ");
+                
+                // 识别数字格式，尝试提取年月日和时分秒
+                String dateTimePattern = "(\\d{4})[-/]?(\\d{1,2})[-/]?(\\d{1,2})\\s*(\\d{1,2})[:\\s]?(\\d{1,2})[:\\s]?(\\d{1,2})";
+                java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(dateTimePattern);
+                java.util.regex.Matcher matcher = pattern.matcher(value);
+                
+                if (matcher.find()) {
+                    try {
+                        int year = Integer.parseInt(matcher.group(1));
+                        int month = Integer.parseInt(matcher.group(2));
+                        int day = Integer.parseInt(matcher.group(3));
+                        int hour = Integer.parseInt(matcher.group(4));
+                        int minute = Integer.parseInt(matcher.group(5));
+                        int second = Integer.parseInt(matcher.group(6));
+                        
+                        logger.info("原始日期时间值: 年={}, 月={}, 日={}, 时={}, 分={}, 秒={}", 
+                                  year, month, day, hour, minute, second);
+                        
+                        // 获取当前年份
+                        int currentYear = java.time.LocalDate.now().getYear();
+                        
+                        // 修正明显错误的日期值
+                        // 如果年份大于当前年份，使用当前年份
+                        if (year > currentYear) {
+                            logger.info("年份 {} 大于当前年份 {}，将使用当前年份", year, currentYear);
+                            year = currentYear;
+                            logger.info("修正日期中的年份为当前年份: {}", year);
+                        }
+                        
+                        // 如果月份大于12，取后两位
+                        if (month > 12) {
+                            logger.info("月份 {} 无效，开始修正", month);
+                            String monthStr = String.valueOf(month);
+                            if (monthStr.length() >= 2) {
+                                // 取最后两位
+                                int newMonth = Integer.parseInt(monthStr.substring(monthStr.length() - 2));
+                                logger.info("尝试取月份后两位: {} -> {}", month, newMonth);
+                                month = newMonth;
+                                // 如果取两位后仍然大于12，则只取最后一位
+                                if (month > 12) {
+                                    newMonth = Integer.parseInt(monthStr.substring(monthStr.length() - 1));
+                                    logger.info("月份仍然无效，取最后一位: {} -> {}", month, newMonth);
+                                    month = newMonth;
+                                }
+                            } else {
+                                int newMonth = month % 10;
+                                logger.info("对个位数月份取模: {} -> {}", month, newMonth);
+                                month = newMonth; // 对于个位数，使用取模方式
+                            }
+                            logger.info("修正后的月份: {}", month);
+                        }
+                        
+                        if (day > 31) {
+                            logger.info("日期 {} 无效，开始修正", day);
+                            int newDay = day % 100; // 对于识别错误，尝试取后两位
+                            logger.info("尝试取模100修正日期: {} -> {}", day, newDay);
+                            day = newDay;
+                            
+                            if (day > 31) {
+                                newDay = day % 10;
+                                logger.info("日期仍然无效，取模10: {} -> {}", day, newDay);
+                                day = newDay;
+                            }
+                            
+                            logger.info("修正后的日期: {}", day);
+                        }
+                        
+                        // 日期有效性检查
+                        if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1 && day <= 31) {
+                            String formattedDate = String.format("%04d-%02d-%02d", year, month, day);
+                            logger.info("字段 {} 的日期已修正: {} -> {}", key, value, formattedDate);
+                            extractedData.put(key, formattedDate);
+                        } else {
+                            logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}", 
+                                      key, year, month, day);
+                        }
+                    } catch (NumberFormatException e) {
+                        logger.warn("日期时间解析错误: {}, 异常: {}", value, e.getMessage());
+                    }
+                } else {
+                    logger.info("字段 {} 不符合完整日期时间格式，尝试仅匹配日期部分", key);
+                    // 如果无法匹配完整的日期时间格式，尝试仅匹配日期部分
+                    String datePattern = "(\\d{4})[-/]?(\\d{1,2})[-/]?(\\d{1,2})";
+                    pattern = java.util.regex.Pattern.compile(datePattern);
+                    matcher = pattern.matcher(value);
+                    
+                    if (matcher.find()) {
+                        try {
+                            int year = Integer.parseInt(matcher.group(1));
+                            int month = Integer.parseInt(matcher.group(2));
+                            int day = Integer.parseInt(matcher.group(3));
+                            
+                            logger.info("仅日期部分匹配: 年={}, 月={}, 日={}", year, month, day);
+                            
+                            // 获取当前年份
+                            int currentYear = java.time.LocalDate.now().getYear();
+                            
+                            // 修正明显错误的日期值
+                            if (month > 12) {
+                                logger.info("月份 {} 无效，尝试修正", month);
+                                int newMonth = month % 10; // 对于识别错误如"904"，取最后一位数字"4"
+                                logger.info("取模10修正月份: {} -> {}", month, newMonth);
+                                month = newMonth;
+                            }
+                            if (day > 31) {
+                                logger.info("日期 {} 无效，尝试修正", day);
+                                int newDay = day % 100; // 对于识别错误，尝试取后两位
+                                logger.info("取模100修正日期: {} -> {}", day, newDay);
+                                day = newDay;
+                                
+                                if (day > 31) {
+                                    newDay = day % 10;
+                                    logger.info("日期仍然无效，取模10: {} -> {}", day, newDay);
+                                    day = newDay;
+                                }
+                            }
+                            
+                            // 日期有效性检查
+                            if (year > 1900 && year < 2100 && month >= 1 && month <= 12 && day >= 1 && day <= 31) {
+                                String formattedDate = String.format("%04d-%02d-%02d", year, month, day);
+                                logger.info("字段 {} 的日期已修正: {} -> {}", key, value, formattedDate);
+                                extractedData.put(key, formattedDate);
+                            } else {
+                                logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}", 
+                                          key, year, month, day);
+                            }
+                        } catch (NumberFormatException e) {
+                            logger.warn("日期解析错误: {}, 异常: {}", value, e.getMessage());
+                        }
+                    } else {
+                        logger.warn("字段 {} 完全不匹配日期格式，保留原值: {}", key, value);
+                    }
+                }
+            }
+        }
+        
+        logger.info("时间字段处理完成，处理后数据: {}", extractedData);
+    }
+
    /**
     * 关闭资源
     */
@ -709,4 +1195,4 @@ public class FolderMonitor {
            logger.error("关闭HttpClient失败", e);
        }
    }
-} 
+}