修改配置

2025-05-27 15:37:04 +08:00 · 2025-05-27 15:37:04 +08:00 · 43fc5b7971
commit 43fc5b7971
parent 685aa5a13a
4 changed files with 452 additions and 159 deletions
--- a/config.yaml
+++ b/config.yaml
@ -3,10 +3,20 @@ output:
  all_results: "./ocr_results/all_results.json"  # 所有识别结果
  current_results: "./ocr_results/current_results.json"  # 当前批次识别结果
  processed_files: "./ocr_results/processed_files.txt"  # 已处理文件列表
+  missing_key_files: "./ocr_results/missing_key_files.txt"  # 缺少关键字的文件路径
+  missing_key_results: "./ocr_results/missing_key_results.json"  # 缺少关键字的识别结果
+
+# 需要检查的关键字配置
+required_keys:
+  - "name"  # 姓名
+  - "examId"  # 检查编号
+  - "age"  # 年龄
+  - "gender"  # 性别
+  - "hr"  # 心率

 # 后端接口配置
-upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
-#upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
+# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
+upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData

 # 底部识别配置
 bottom_recognition:
@ -40,49 +50,43 @@ bottom_recognition:

 # 图片目录配置
 directories:
-  - path: "./test_images"
-    recognition_type: "normal"
-    key_mapping:
-      "编 号": "number"
-      "姓 名": "name"
-      "性 别": "gender"
-      "年 龄": "age"
-      "科 室": "department"
-      "床 号": "bed_number"
-      "HR": "HR"
-      "PR": "PR"
-      "QRS": "QRS"
-      "QT/QTC": "QT/QTC"
-      "P/QRS/T": "P/QRS/T"
-      "RV5/SV1": "RV5/SV1"
-      "RV5+SV1": "RV5+SV1"
-    bottom_key_words:  # 特定目录的底部关键字配置
-      - "检查日期"
-      - "日期"
-    bottom_key_mapping:  # 特定目录的底部关键字映射
-      "检查日期": "checkDate"
-      "日期": "date"

-  - path: "./ocr_images"
-    recognition_type: "normal"
-    key_mapping:
-      "ID": "id"
-      "申请科室": "department"
-      "病床号": "bed_number" 
-      "HR": "HR"
-      "P": "P"
-      "QRS": "QRS"
-      "QT/QTc": "QT/QTc"
-      "P/QRS/T": "P/QRS/T"
-      "RV5/SV1": "RV5/SV1"
+  - path: "../ecgimage/北屯中心卫生院"
+    recognition_type: "split"
+    recognition_area:
+      start_x: 0
+      start_y: 0
+      width: 60
+      height: 20
+    split_blocks:
+      - width_percent: 40
+        key_mapping:
+          "编 号": "examId"
+          "姓 名": "name"
+          "性 别": "gender"
+          "年 龄": "age"
+          "科 室": "department"
+          "床 号": "bed_number"
+      - width_percent: 35
+        key_mapping:
+          "HR": "hr"
+          "PR": "pr"
+          "QRS": "qrs"
+          "QT/QTC": "qt/qtc"
+          "P/QRS/T": "pAxle/qrsAxle/tAxle"
+          "RV5/SV1": "rv5/sv1"
+          "RV5+SV1": "rv5Sv1"
+      - width_percent: 35
+        key_mapping:
+          "备注": "notes"
+
    bottom_key_words:
      - "检查日期"
-      - "时间"
    bottom_key_mapping:
      "检查日期": "examDate"
      "时间": "examTime"

-  - path: "./礼泉县裴寨卫生院"  # 分块识别目录 - 3个分块
+  - path: "../ecgimage/礼泉县裴寨卫生院"  # 分块识别目录 - 3个分块
    recognition_type: "split"
    recognition_area:  # 识别区域配置
      start_x: 0  # 起始X坐标（百分比）
@ -117,44 +121,35 @@ directories:
    bottom_key_mapping:  # 特定目录的底部关键字映射
      "检查时间": "collectionTime"

-  - path: "./special_images"  # 分块识别目录 - 4个分块
-    recognition_type: "templateA"
+  - path: "../ecgimage/药王洞卫生院"  # 分块识别目录 - 4个分块
+    recognition_type: "split"
    recognition_area:
      start_x: 0
      start_y: 0
-      width: 100
-      height: 100
+      width: 50
+      height: 20
    split_blocks:
-      - width_percent: 25
+      - width_percent: 37
        key_mapping:
-          "患者": "patient"
-          "ID": "id"
-      - width_percent: 25
+          "编 号": "examId"
+          "姓 名": "name"
+          "性 别": "gender"
+          "年 龄": "age"
+          "科 室": "department"
+          "床 号": "bed_number"
+      - width_percent: 33
        key_mapping:
-          "检查项目": "exam_item"
-          "检查日期": "exam_date"
-      - width_percent: 25
+          "HR": "hr"
+          "PR": "pr"
+          "QRS": "qrs"
+          "QT/QTc": "qt/qtc"
+          "P/QRS/T": "pAxle/qrsAxle/tAxle"
+          "RV5/SV1": "rv5/sv1"
+          "RV5+SV1": "rv5Sv1"
+      - width_percent: 35
        key_mapping:
          "结果1": "result1"
-          "结果2": "result2"
-      - width_percent: 25
-        key_mapping:
-          "医师": "doctor"
-          "结论": "conclusion"
-  - path: "./建陵卫生院"  # 需要旋转90度的图片目录
-    recognition_type: "rotate90"  # 新增旋转类型
-    recognition_area:  # 添加旋转后的识别区域
-      start_x: 0
-      start_y: 0
-      width: 100  # 宽度百分比
-      height: 25  # 高度百分比
-    key_mapping:
-      "ID": "examId"
-      "姓 名": "name"
-      "年 龄": "age"
-      "性 别": "gender"
-      "时 间": "collectionTime"
-  - path: "./礼泉县城关卫生院"
+  - path: "../ecgimage/礼泉县城关卫生院"
    recognition_type: "CG"
    key_mapping:
      "姓名": "name"
@ -173,8 +168,20 @@ directories:
      - "检查时间"
    bottom_key_mapping:
      "检查时间": "collectionTime"
-
-  - path: "./史德卫生院"
+  - path: "../ecgimage/建陵卫生院"  # 需要旋转90度的图片目录
+    recognition_type: "rotate90"  # 新增旋转类型
+    recognition_area: # 添加旋转后的识别区域
+      start_x: 0
+      start_y: 0
+      width: 100  # 宽度百分比
+      height: 25  # 高度百分比
+    key_mapping:
+      "ID": "examId"
+      "姓 名": "name"
+      "年 龄": "age"
+      "性 别": "gender"
+      "时 间": "collectionTime"
+  - path: "./images/史德卫生院"
    recognition_type: "LQXSD"
    key_mapping:
      "姓名": "name"
@ -193,7 +200,6 @@ directories:
      - "检查时间"
    bottom_key_mapping:
      "检查时间": "collectionTime"
-
 # OCR程序与语言包路径配置

 # 新增Tesseract相关配置
@ -202,7 +208,7 @@ directories:
 # language 必须，指定语言包

 tesseract:
-  bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
+  bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe"
  data_path: "./tessdata"
  # data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
  language: "chi_sim+eng"
--- a/src/main/java/com/ocr/ConfigManager.java
+++ b/src/main/java/com/ocr/ConfigManager.java
@ -14,35 +14,75 @@ import java.util.*;
 public class ConfigManager {
    private static final Logger logger = LoggerFactory.getLogger(ConfigManager.class);
    private final List<DirectoryConfig> directoryConfigs;
-    private final String allResultsPath;
-    private final String currentResultsPath;
-    private final String processedFilesPath;
+    private String allResultsPath;
+    private String currentResultsPath;
+    private String processedFilesPath;
+    private String missingKeyFilesPath;
+    private String missingKeyResultsPath;
+    private List<String> requiredKeys;
    private final ObjectMapper objectMapper;
-    private final Map<String, Object> config;
+    private Map<String, Object> config;
+    private final String configPath;

    public ConfigManager(String configPath) {
+        this.configPath = configPath;
        this.objectMapper = new ObjectMapper();
-        this.config = loadConfig(configPath);
+        loadConfig();
        this.directoryConfigs = loadDirectoryConfigs(config);
-        
-        @SuppressWarnings("unchecked")
-        Map<String, String> output = (Map<String, String>) config.get("output");
-        this.allResultsPath = output.get("all_results");
-        this.currentResultsPath = output.get("current_results");
-        this.processedFilesPath = output.get("processed_files");
-        
-        initializeOutputFiles();
    }

-    @SuppressWarnings("unchecked")
-    private Map<String, Object> loadConfig(String configPath) {
-        try (InputStream input = new FileInputStream(configPath)) {
+    private Map<String, Object> loadConfig() {
+        try {
+            // 读取配置文件
            Yaml yaml = new Yaml();
-            return yaml.load(input);
+            try (InputStream input = new FileInputStream(configPath)) {
+                config = yaml.load(input);
+            }
+            
+            // 设置输出文件路径
+            Map<String, String> output = (Map<String, String>) config.get("output");
+            this.allResultsPath = output.get("all_results");
+            this.currentResultsPath = output.get("current_results");
+            this.processedFilesPath = output.get("processed_files");
+            this.missingKeyFilesPath = output.get("missing_key_files");
+            this.missingKeyResultsPath = output.get("missing_key_results");
+            
+            // 设置必需关键字
+            this.requiredKeys = (List<String>) config.get("required_keys");
+            
+            // 确保输出目录存在
+            createOutputDirectories();
+            
        } catch (Exception e) {
            logger.error("加载配置文件失败", e);
            throw new RuntimeException("加载配置文件失败", e);
        }
+        return config;
+    }
+
+    private void createOutputDirectories() {
+        try {
+            // 创建输出目录
+            Files.createDirectories(Paths.get(allResultsPath).getParent());
+            Files.createDirectories(Paths.get(currentResultsPath).getParent());
+            Files.createDirectories(Paths.get(processedFilesPath).getParent());
+            Files.createDirectories(Paths.get(missingKeyFilesPath).getParent());
+            Files.createDirectories(Paths.get(missingKeyResultsPath).getParent());
+            
+            // 如果文件不存在，创建空文件
+            if (!Files.exists(Paths.get(processedFilesPath))) {
+                Files.createFile(Paths.get(processedFilesPath));
+            }
+            if (!Files.exists(Paths.get(missingKeyFilesPath))) {
+                Files.createFile(Paths.get(missingKeyFilesPath));
+            }
+            if (!Files.exists(Paths.get(missingKeyResultsPath))) {
+                Files.createFile(Paths.get(missingKeyResultsPath));
+            }
+        } catch (IOException e) {
+            logger.error("创建输出目录失败", e);
+            throw new RuntimeException("创建输出目录失败", e);
+        }
    }

    @SuppressWarnings("unchecked")
@ -232,6 +272,18 @@ public class ConfigManager {
        return processedFilesPath;
    }

+    public String getMissingKeyFilesPath() {
+        return missingKeyFilesPath;
+    }
+
+    public String getMissingKeyResultsPath() {
+        return missingKeyResultsPath;
+    }
+
+    public List<String> getRequiredKeys() {
+        return requiredKeys;
+    }
+
    public ObjectMapper getObjectMapper() {
        return objectMapper;
    }
--- a/src/main/java/com/ocr/FolderMonitor.java
+++ b/src/main/java/com/ocr/FolderMonitor.java
@ -64,14 +64,14 @@ public class FolderMonitor {
        this.httpClient = HttpClients.createDefault();
    }

-    public void processImage(Path imagePath) {
+    public Map<String, String> processImage(Path imagePath) {
        try {
            logger.info("开始处理图片: {}", imagePath);
            
            // 检查文件是否已处理
            if (isFileProcessed(imagePath)) {
                logger.info("文件已处理过，跳过: {}", imagePath);
-                return;
+                return new HashMap<>();
            }
            
            // 获取图片的完整路径
@ -105,80 +105,41 @@ public class FolderMonitor {
                }
            } catch (Exception e) {
                logger.error("识别逻辑处理异常", e);
-                return;
+                return new HashMap<>();
            }
            
-            // 添加orgName（父文件夹名称）和ecgDataFilePath（图片名称）
-            File imageFile = imagePath.toFile();
-            String fileName = imageFile.getName();
-            String parentFolderName = imageFile.getParentFile().getName();
-            
-            extractedData.put("orgName", parentFolderName);
-            extractedData.put("ecgDataFilePath", fileName);
-            
-            logger.info("添加文件信息 - orgName: {}, ecgDataFilePath: {}", parentFolderName, fileName);
-            
-            // 处理已提取的时间格式
-            processTimeFields(extractedData);
-            
-            // 检查是否需要进行底部识别
-            Map<String, Object> config = configManager.getConfig();
-            if (config.containsKey("bottom_recognition")) {
-                @SuppressWarnings("unchecked")
-                Map<String, Object> bottomConfig = (Map<String, Object>) config.get("bottom_recognition");
-                boolean enableBottomRecognition = (boolean) bottomConfig.getOrDefault("enable", false);
+            // 检查是否缺少必需的关键字
+            List<String> requiredKeys = configManager.getRequiredKeys();
+            if (requiredKeys != null && !requiredKeys.isEmpty()) {
+                List<String> missingKeys = new ArrayList<>();
+                for (String key : requiredKeys) {
+                    if (!extractedData.containsKey(key) || extractedData.get(key) == null || extractedData.get(key).trim().isEmpty()) {
+                        missingKeys.add(key);
+                    }
+                }
                
-                if (enableBottomRecognition) {
-                    // 查找目录特定的底部关键字配置
-                    List<String> keyWords;
-                    Map<String, String> keyMapping = null;
+                if (!missingKeys.isEmpty()) {
+                    logger.warn("图片缺少必需的关键字: {}, 文件路径: {}", missingKeys, imageFullPath);
                    
-                    // 获取全局关键字映射
-                    @SuppressWarnings("unchecked")
-                    Map<String, String> globalKeyMapping = bottomConfig.containsKey("key_mapping") ? 
-                        (Map<String, String>) bottomConfig.get("key_mapping") : Collections.emptyMap();
+                    // 记录缺少关键字的文件路径
+                    String missingKeyFilesPath = configManager.getMissingKeyFilesPath();
+                    Files.write(Paths.get(missingKeyFilesPath), 
+                              (imageFullPath + "\n").getBytes(StandardCharsets.UTF_8),
+                              java.nio.file.StandardOpenOption.APPEND);
                    
-                    // 获取当前目录的特定配置
-                    if (directoryConfig.getBottomKeyWords() != null && !directoryConfig.getBottomKeyWords().isEmpty()) {
-                        keyWords = directoryConfig.getBottomKeyWords();
-                        logger.info("使用目录特定的底部关键字: {}", keyWords);
-                        
-                        // 获取目录特定的关键字映射
-                        keyMapping = directoryConfig.getBottomKeyMapping();
-                        if (keyMapping != null && !keyMapping.isEmpty()) {
-                            logger.info("使用目录特定的底部关键字映射: {}", keyMapping);
-                        } else if (!globalKeyMapping.isEmpty()) {
-                            // 如果目录没有特定映射但有全局映射，使用全局的
-                            keyMapping = globalKeyMapping;
-                            logger.info("使用全局底部关键字映射: {}", keyMapping);
-                        }
-                    } else {
-                        // 使用全局配置
-                        @SuppressWarnings("unchecked")
-                        List<String> globalKeyWords = (List<String>) bottomConfig.getOrDefault("key_words", Collections.emptyList());
-                        keyWords = globalKeyWords;
-                        logger.info("使用全局底部关键字: {}", keyWords);
-                        
-                        // 使用全局关键字映射
-                        keyMapping = globalKeyMapping;
-                        if (!keyMapping.isEmpty()) {
-                            logger.info("使用全局底部关键字映射: {}", keyMapping);
-                        }
-                    }
+                    // 记录缺少关键字的识别结果
+                    Map<String, Object> missingKeyResult = new HashMap<>();
+                    missingKeyResult.put("file_path", imageFullPath);
+                    missingKeyResult.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
+                    missingKeyResult.put("missing_keys", missingKeys);
+                    missingKeyResult.put("extracted_data", extractedData);
                    
-                    // 创建带有目录特定关键字和映射的配置
-                    Map<String, Object> dirBottomConfig = new HashMap<>(bottomConfig);
-                    dirBottomConfig.put("key_words", keyWords);
-                    if (keyMapping != null && !keyMapping.isEmpty()) {
-                        dirBottomConfig.put("key_mapping", keyMapping);
-                    }
-                    
-                    // 调用通用底部识别方法
-                    Map<String, String> bottomData = recognizeBottomArea(imageFullPath, dirBottomConfig);
-                    if (!bottomData.isEmpty()) {
-                        logger.info("添加底部识别结果: {}", bottomData);
-                        extractedData.putAll(bottomData);
-                    }
+                    String missingKeyResultsPath = configManager.getMissingKeyResultsPath();
+                    ObjectMapper mapper = new ObjectMapper();
+                    String jsonResult = mapper.writeValueAsString(missingKeyResult) + "\n";
+                    Files.write(Paths.get(missingKeyResultsPath), 
+                              jsonResult.getBytes(StandardCharsets.UTF_8),
+                              java.nio.file.StandardOpenOption.APPEND);
                }
            }
            
@ -197,8 +158,11 @@ public class FolderMonitor {
            // 标记文件为已处理
            markFileAsProcessed(imagePath);
            
+            return extractedData;
+            
        } catch (Exception e) {
            logger.error("处理图片失败: " + imagePath, e);
+            return new HashMap<>();
        }
    }

@ -710,6 +674,144 @@ public class FolderMonitor {
        // 这里只是示例，实际可根据模板A的需求实现
        return processImageNormal(imageFullPath);
    }
+    // 新版CG识别逻辑：按区域裁剪后分别OCR提取字段
+/*
+    private Map<String, String> processImageWithCG(String imageFullPath) {
+        Map<String, String> extractedData = new HashMap<>();
+        try {
+            BufferedImage image = ImageIO.read(new File(imageFullPath));
+            int width = image.getWidth();
+            int height = image.getHeight();
+
+            // 1. 标题区和患者信息区分开裁剪
+            int titleHeight = (int) (height * 0.01); // 标题区5%
+            int infoHeight = (int) (height * 0.10);  // 患者信息区10%
+            // 跳过标题区，只识别患者信息区
+            BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight);
+            String infoOcr = tesseract.doOCR(infoArea);
+            String[] infoLines = infoOcr.split("\\r?\\n");
+            String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)";
+            String agePattern = "(\\d+)\\s*[岁%]";
+            String idPattern = "[I1l][D][:：]?\\s*([A-Za-z0-9]+)";
+            for (String line : infoLines) {
+                line = line.replaceAll("\\s+", " ").trim();
+                if (extractedData.get("name") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(namePattern).matcher(line);
+                    if (m.find()) {
+                        extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格
+                        extractedData.put("gender", m.group(2));
+                        java.util.regex.Matcher ageM = java.util.regex.Pattern.compile(agePattern).matcher(line);
+                        if (ageM.find()) {
+                            extractedData.put("age", ageM.group(1));
+                        }
+                    }
+                }
+                if (extractedData.get("id") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(idPattern).matcher(line);
+                    if (m.find()) {
+                        extractedData.put("id", m.group(1));
+                    }
+                }
+            }
+
+            // 2. 左侧参数区（提取HR、P、PR、QRS、QT/QTC、P/QRS/T、RV5/SV1）
+            int paramWidth = (int) (width * 0.32); // 左侧32%
+            int paramStartY = titleHeight;
+            int paramHeight = (int) (height * 0.355); // 参数区高度约38%
+            BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight);
+            String paramOcr = tesseract.doOCR(paramArea);
+            String[] paramLines = paramOcr.split("\\r?\\n");
+            String hrPattern = "HR\\s*[:：.·]?\\s*([\\dOo./]+[bB][pP][mM])";
+            String pPattern = "P\\s*[:：.·]?\\s*([\\dOo./]+[mM][sS])";
+            String prPattern = "PR\\s*[:：.·]?\\s*([\\dOo./]+[mM][sS])";
+            String qrsPattern = "QRS\\s*[:：.·]?\\s*([\\dOo./]+[mM][sS])";
+            String qtPattern = "QT/QT[cC]?\\s*[:：.·]?\\s*([\\dOo./]+[mM][sS])";
+            String pqrstPattern = "P/QRS/T\\s*[:：.·]?\\s*([\\dOo./]+)";
+            String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[:：.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?";
+            for (String line : paramLines) {
+                line = line.replaceAll("\\s+", " ").trim();
+                if (extractedData.get("HR") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
+                    if (m.find()) {
+                        String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
+                        extractedData.put("HR", hrValue);
+                    }
+                }
+                if (extractedData.get("P") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
+                    if (m.find()) {
+                        String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
+                        extractedData.put("P", pValue);
+                    }
+                }
+                if (extractedData.get("PR") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
+                    if (m.find()) {
+                        String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
+                        extractedData.put("PR", prValue);
+                    }
+                }
+                if (extractedData.get("QRS") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
+                    if (m.find()) {
+                        String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
+                        extractedData.put("QRS", qrsValue);
+                    }
+                }
+                if (extractedData.get("QT/QTC") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
+                    if (m.find()) {
+                        String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", "");
+                        extractedData.put("QT/QTC", qtValue);
+                    }
+                }
+                if (extractedData.get("P/QRS/T") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
+                    if (m.find()) {
+                        String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", "");
+                        extractedData.put("P/QRS/T", pqrstValue);
+                    }
+                }
+                if (extractedData.get("RV5/SV1") == null) {
+                    if (line.toLowerCase().contains("sv1")) {
+                        java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line);
+                        if (m.find()) {
+                            String rv5 = m.group(1).replaceAll("[Oo]", "0");
+                            String sv1 = m.group(2).replaceAll("[Oo]", "0");
+                            String rv5sv1Value = rv5 + "/" + sv1;
+                            extractedData.put("RV5/SV1", rv5sv1Value);
+
+                        }
+                    }
+                }
+            }
+
+            // 3. 底部区域（提取检查时间）
+            int bottomHeight = (int) (height * 0.05); // 底部12%
+            int bottomStartY = height - bottomHeight;
+            BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
+            String bottomOcr = tesseract.doOCR(bottomArea);
+            String[] bottomLines = bottomOcr.split("\\r?\\n");
+            String checkTimePattern = "检查[:：]?\\s*([\\d-]+ [\\d:]+)";
+            for (String line : bottomLines) {
+                String lineNoSpace = line.replaceAll("\\s+", "");
+                if (extractedData.get("collectionTime") == null) {
+                    java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[:：]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
+                    if (m.find()) {
+                        String dateTime = m.group(1);
+                        if (!dateTime.contains(" ")) {
+                            dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10);
+                        }
+                        extractedData.put("collectionTime", dateTime);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            logger.error("processImageWithCG 区域识别异常", e);
+        }
+        return extractedData;
+    }
+*/

    // 新版CG识别逻辑：按区域裁剪后分别OCR提取字段
    private Map<String, String> processImageWithCG(String imageFullPath) {
--- a/src/main/java/com/ocr/ImageOcrMonitor.java
+++ b/src/main/java/com/ocr/ImageOcrMonitor.java
@ -15,15 +15,19 @@ import java.io.*;
 import java.nio.file.*;
 import java.util.*;
 import java.util.concurrent.*;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;

 public class ImageOcrMonitor {
    private static final Logger logger = LoggerFactory.getLogger(ImageOcrMonitor.class);
    private static final long CHECK_INTERVAL = 120000; // 2分钟
+    private static final long RETRY_INTERVAL = 600000; // 10分钟
    private final ConfigManager configManager;
    private final Map<String, FolderMonitor> folderMonitors;
    private final Map<String, WatchService> watchServices;
    private Tesseract tesseract;
    private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
+    private int currentRetryIndex = 0; // 当前重试的文件索引

    public ImageOcrMonitor(String configPath) {
        this.configManager = new ConfigManager(configPath);
@ -31,6 +35,7 @@ public class ImageOcrMonitor {
        this.watchServices = new HashMap<>();
        initializeMonitors();
        initTesseract();
+        startRetryTask();
    }

    private void initializeMonitors() {
@ -128,6 +133,134 @@ public class ImageOcrMonitor {
        }
    }

+    private void startRetryTask() {
+        scheduler.scheduleAtFixedRate(() -> {
+            try {
+                logger.info("开始扫描识别失败的文件...");
+                retryFailedRecognition();
+            } catch (Exception e) {
+                logger.error("重试识别失败", e);
+            }
+        }, RETRY_INTERVAL, RETRY_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    private void retryFailedRecognition() {
+        try {
+            // 读取missing_key_files.txt
+            Path missingKeyFilesPath = Paths.get(configManager.getMissingKeyFilesPath());
+            if (!Files.exists(missingKeyFilesPath)) {
+                logger.info("没有需要重试的文件");
+                return;
+            }
+
+            List<String> failedFiles = Files.readAllLines(missingKeyFilesPath);
+            if (failedFiles.isEmpty()) {
+                logger.info("没有需要重试的文件");
+                currentRetryIndex = 0; // 重置索引
+                return;
+            }
+
+            // 如果索引超出范围，重置为0
+            if (currentRetryIndex >= failedFiles.size()) {
+                currentRetryIndex = 0;
+            }
+
+            // 获取当前要处理的文件
+            String currentFile = failedFiles.get(currentRetryIndex);
+            logger.info("开始处理第 {} 个失败文件: {}", currentRetryIndex + 1, currentFile);
+
+            // 读取现有的missing_key_results.json
+            Path missingKeyResultsPath = Paths.get(configManager.getMissingKeyResultsPath());
+            List<Map<String, Object>> existingResults = new ArrayList<>();
+            if (Files.exists(missingKeyResultsPath)) {
+                String content = new String(Files.readAllBytes(missingKeyResultsPath));
+                if (!content.trim().isEmpty()) {
+                    existingResults = configManager.getObjectMapper().readValue(content, List.class);
+                }
+            }
+
+            // 处理当前文件
+            Path path = Paths.get(currentFile);
+            if (!Files.exists(path)) {
+                logger.warn("文件不存在，跳过: {}", currentFile);
+                currentRetryIndex++; // 移动到下一个文件
+                return;
+            }
+
+            // 找到对应的FolderMonitor
+            FolderMonitor monitor = findMonitorForFile(path);
+            if (monitor == null) {
+                logger.warn("找不到对应的FolderMonitor，跳过: {}", currentFile);
+                currentRetryIndex++; // 移动到下一个文件
+                return;
+            }
+
+            // 重新识别
+            Map<String, String> extractedData = monitor.processImage(path);
+            
+            // 检查是否还有缺失的关键字
+            List<String> missingKeys = new ArrayList<>();
+            for (String requiredKey : configManager.getRequiredKeys()) {
+                if (!extractedData.containsKey(requiredKey) || 
+                    extractedData.get(requiredKey) == null || 
+                    extractedData.get(requiredKey).trim().isEmpty()) {
+                    missingKeys.add(requiredKey);
+                }
+            }
+
+            if (missingKeys.isEmpty()) {
+                // 识别成功，从结果中移除
+                logger.info("文件识别成功，移除失败记录: {}", currentFile);
+                failedFiles.remove(currentRetryIndex);
+                // 更新missing_key_files.txt
+                Files.write(missingKeyFilesPath, failedFiles);
+                // 不需要更新索引，因为列表已经缩短
+            } else {
+                // 仍然失败，更新结果
+                logger.info("文件仍然识别失败，更新结果: {}", currentFile);
+                // 更新或添加结果
+                boolean found = false;
+                for (Map<String, Object> result : existingResults) {
+                    if (currentFile.equals(result.get("file_path"))) {
+                        result.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
+                        result.put("missing_keys", missingKeys);
+                        result.put("extracted_data", extractedData);
+                        found = true;
+                        break;
+                    }
+                }
+                if (!found) {
+                    Map<String, Object> result = new HashMap<>();
+                    result.put("file_path", currentFile);
+                    result.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
+                    result.put("missing_keys", missingKeys);
+                    result.put("extracted_data", extractedData);
+                    existingResults.add(result);
+                }
+                // 更新missing_key_results.json
+                String jsonContent = configManager.getObjectMapper().writerWithDefaultPrettyPrinter()
+                    .writeValueAsString(existingResults);
+                Files.write(missingKeyResultsPath, jsonContent.getBytes());
+                currentRetryIndex++; // 移动到下一个文件
+            }
+
+            logger.info("本次重试完成，当前处理进度: {}/{}", currentRetryIndex + 1, failedFiles.size());
+        } catch (Exception e) {
+            logger.error("重试识别过程发生错误", e);
+            currentRetryIndex++; // 发生错误时也移动到下一个文件
+        }
+    }
+
+    private FolderMonitor findMonitorForFile(Path filePath) {
+        String absolutePath = filePath.toAbsolutePath().toString();
+        for (Map.Entry<String, FolderMonitor> entry : folderMonitors.entrySet()) {
+            if (absolutePath.startsWith(entry.getKey())) {
+                return entry.getValue();
+            }
+        }
+        return null;
+    }
+
    public static void main(String[] args) {
        try {
            logger.info("OCR监控程序启动...");