修改配置

This commit is contained in:
yy2205 2025-05-27 15:37:04 +08:00
parent 685aa5a13a
commit 43fc5b7971
4 changed files with 452 additions and 159 deletions

View File

@ -3,10 +3,20 @@ output:
all_results: "./ocr_results/all_results.json" # 所有识别结果
current_results: "./ocr_results/current_results.json" # 当前批次识别结果
processed_files: "./ocr_results/processed_files.txt" # 已处理文件列表
missing_key_files: "./ocr_results/missing_key_files.txt" # 缺少关键字的文件路径
missing_key_results: "./ocr_results/missing_key_results.json" # 缺少关键字的识别结果
# 需要检查的关键字配置
required_keys:
- "name" # 姓名
- "examId" # 检查编号
- "age" # 年龄
- "gender" # 性别
- "hr" # 心率
# 后端接口配置
upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
#upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
# 底部识别配置
bottom_recognition:
@ -40,49 +50,43 @@ bottom_recognition:
# 图片目录配置
directories:
- path: "./test_images"
recognition_type: "normal"
key_mapping:
"编 号": "number"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
"HR": "HR"
"PR": "PR"
"QRS": "QRS"
"QT/QTC": "QT/QTC"
"P/QRS/T": "P/QRS/T"
"RV5/SV1": "RV5/SV1"
"RV5+SV1": "RV5+SV1"
bottom_key_words: # 特定目录的底部关键字配置
- "检查日期"
- "日期"
bottom_key_mapping: # 特定目录的底部关键字映射
"检查日期": "checkDate"
"日期": "date"
- path: "./ocr_images"
recognition_type: "normal"
key_mapping:
"ID": "id"
"申请科室": "department"
"病床号": "bed_number"
"HR": "HR"
"P": "P"
"QRS": "QRS"
"QT/QTc": "QT/QTc"
"P/QRS/T": "P/QRS/T"
"RV5/SV1": "RV5/SV1"
- path: "../ecgimage/北屯中心卫生院"
recognition_type: "split"
recognition_area:
start_x: 0
start_y: 0
width: 60
height: 20
split_blocks:
- width_percent: 40
key_mapping:
"编 号": "examId"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
- width_percent: 35
key_mapping:
"HR": "hr"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
- width_percent: 35
key_mapping:
"备注": "notes"
bottom_key_words:
- "检查日期"
- "时间"
bottom_key_mapping:
"检查日期": "examDate"
"时间": "examTime"
- path: "./礼泉县裴寨卫生院" # 分块识别目录 - 3个分块
- path: "../ecgimage/礼泉县裴寨卫生院" # 分块识别目录 - 3个分块
recognition_type: "split"
recognition_area: # 识别区域配置
start_x: 0 # 起始X坐标百分比
@ -117,44 +121,35 @@ directories:
bottom_key_mapping: # 特定目录的底部关键字映射
"检查时间": "collectionTime"
- path: "./special_images" # 分块识别目录 - 4个分块
recognition_type: "templateA"
- path: "../ecgimage/药王洞卫生院" # 分块识别目录 - 4个分块
recognition_type: "split"
recognition_area:
start_x: 0
start_y: 0
width: 100
height: 100
width: 50
height: 20
split_blocks:
- width_percent: 25
- width_percent: 37
key_mapping:
"患者": "patient"
"ID": "id"
- width_percent: 25
"编 号": "examId"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
- width_percent: 33
key_mapping:
"检查项目": "exam_item"
"检查日期": "exam_date"
- width_percent: 25
"HR": "hr"
"PR": "pr"
"QRS": "qrs"
"QT/QTc": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
- width_percent: 35
key_mapping:
"结果1": "result1"
"结果2": "result2"
- width_percent: 25
key_mapping:
"医师": "doctor"
"结论": "conclusion"
- path: "./建陵卫生院" # 需要旋转90度的图片目录
recognition_type: "rotate90" # 新增旋转类型
recognition_area: # 添加旋转后的识别区域
start_x: 0
start_y: 0
width: 100 # 宽度百分比
height: 25 # 高度百分比
key_mapping:
"ID": "examId"
"姓 名": "name"
"年 龄": "age"
"性 别": "gender"
"时 间": "collectionTime"
- path: "./礼泉县城关卫生院"
- path: "../ecgimage/礼泉县城关卫生院"
recognition_type: "CG"
key_mapping:
"姓名": "name"
@ -173,8 +168,20 @@ directories:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
- path: "./史德卫生院"
- path: "../ecgimage/建陵卫生院" # 需要旋转90度的图片目录
recognition_type: "rotate90" # 新增旋转类型
recognition_area: # 添加旋转后的识别区域
start_x: 0
start_y: 0
width: 100 # 宽度百分比
height: 25 # 高度百分比
key_mapping:
"ID": "examId"
"姓 名": "name"
"年 龄": "age"
"性 别": "gender"
"时 间": "collectionTime"
- path: "./images/史德卫生院"
recognition_type: "LQXSD"
key_mapping:
"姓名": "name"
@ -193,7 +200,6 @@ directories:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
# OCR程序与语言包路径配置
# 新增Tesseract相关配置
@ -202,7 +208,7 @@ directories:
# language 必须,指定语言包
tesseract:
bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe"
data_path: "./tessdata"
# data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
language: "chi_sim+eng"

View File

@ -14,35 +14,75 @@ import java.util.*;
public class ConfigManager {
private static final Logger logger = LoggerFactory.getLogger(ConfigManager.class);
private final List<DirectoryConfig> directoryConfigs;
private final String allResultsPath;
private final String currentResultsPath;
private final String processedFilesPath;
private String allResultsPath;
private String currentResultsPath;
private String processedFilesPath;
private String missingKeyFilesPath;
private String missingKeyResultsPath;
private List<String> requiredKeys;
private final ObjectMapper objectMapper;
private final Map<String, Object> config;
private Map<String, Object> config;
private final String configPath;
public ConfigManager(String configPath) {
this.configPath = configPath;
this.objectMapper = new ObjectMapper();
this.config = loadConfig(configPath);
loadConfig();
this.directoryConfigs = loadDirectoryConfigs(config);
@SuppressWarnings("unchecked")
Map<String, String> output = (Map<String, String>) config.get("output");
this.allResultsPath = output.get("all_results");
this.currentResultsPath = output.get("current_results");
this.processedFilesPath = output.get("processed_files");
initializeOutputFiles();
}
@SuppressWarnings("unchecked")
private Map<String, Object> loadConfig(String configPath) {
try (InputStream input = new FileInputStream(configPath)) {
private Map<String, Object> loadConfig() {
try {
// 读取配置文件
Yaml yaml = new Yaml();
return yaml.load(input);
try (InputStream input = new FileInputStream(configPath)) {
config = yaml.load(input);
}
// 设置输出文件路径
Map<String, String> output = (Map<String, String>) config.get("output");
this.allResultsPath = output.get("all_results");
this.currentResultsPath = output.get("current_results");
this.processedFilesPath = output.get("processed_files");
this.missingKeyFilesPath = output.get("missing_key_files");
this.missingKeyResultsPath = output.get("missing_key_results");
// 设置必需关键字
this.requiredKeys = (List<String>) config.get("required_keys");
// 确保输出目录存在
createOutputDirectories();
} catch (Exception e) {
logger.error("加载配置文件失败", e);
throw new RuntimeException("加载配置文件失败", e);
}
return config;
}
private void createOutputDirectories() {
try {
// 创建输出目录
Files.createDirectories(Paths.get(allResultsPath).getParent());
Files.createDirectories(Paths.get(currentResultsPath).getParent());
Files.createDirectories(Paths.get(processedFilesPath).getParent());
Files.createDirectories(Paths.get(missingKeyFilesPath).getParent());
Files.createDirectories(Paths.get(missingKeyResultsPath).getParent());
// 如果文件不存在创建空文件
if (!Files.exists(Paths.get(processedFilesPath))) {
Files.createFile(Paths.get(processedFilesPath));
}
if (!Files.exists(Paths.get(missingKeyFilesPath))) {
Files.createFile(Paths.get(missingKeyFilesPath));
}
if (!Files.exists(Paths.get(missingKeyResultsPath))) {
Files.createFile(Paths.get(missingKeyResultsPath));
}
} catch (IOException e) {
logger.error("创建输出目录失败", e);
throw new RuntimeException("创建输出目录失败", e);
}
}
@SuppressWarnings("unchecked")
@ -232,6 +272,18 @@ public class ConfigManager {
return processedFilesPath;
}
public String getMissingKeyFilesPath() {
return missingKeyFilesPath;
}
public String getMissingKeyResultsPath() {
return missingKeyResultsPath;
}
public List<String> getRequiredKeys() {
return requiredKeys;
}
public ObjectMapper getObjectMapper() {
return objectMapper;
}

View File

@ -64,14 +64,14 @@ public class FolderMonitor {
this.httpClient = HttpClients.createDefault();
}
public void processImage(Path imagePath) {
public Map<String, String> processImage(Path imagePath) {
try {
logger.info("开始处理图片: {}", imagePath);
// 检查文件是否已处理
if (isFileProcessed(imagePath)) {
logger.info("文件已处理过,跳过: {}", imagePath);
return;
return new HashMap<>();
}
// 获取图片的完整路径
@ -105,80 +105,41 @@ public class FolderMonitor {
}
} catch (Exception e) {
logger.error("识别逻辑处理异常", e);
return;
return new HashMap<>();
}
// 添加orgName父文件夹名称和ecgDataFilePath图片名称
File imageFile = imagePath.toFile();
String fileName = imageFile.getName();
String parentFolderName = imageFile.getParentFile().getName();
extractedData.put("orgName", parentFolderName);
extractedData.put("ecgDataFilePath", fileName);
logger.info("添加文件信息 - orgName: {}, ecgDataFilePath: {}", parentFolderName, fileName);
// 处理已提取的时间格式
processTimeFields(extractedData);
// 检查是否需要进行底部识别
Map<String, Object> config = configManager.getConfig();
if (config.containsKey("bottom_recognition")) {
@SuppressWarnings("unchecked")
Map<String, Object> bottomConfig = (Map<String, Object>) config.get("bottom_recognition");
boolean enableBottomRecognition = (boolean) bottomConfig.getOrDefault("enable", false);
// 检查是否缺少必需的关键字
List<String> requiredKeys = configManager.getRequiredKeys();
if (requiredKeys != null && !requiredKeys.isEmpty()) {
List<String> missingKeys = new ArrayList<>();
for (String key : requiredKeys) {
if (!extractedData.containsKey(key) || extractedData.get(key) == null || extractedData.get(key).trim().isEmpty()) {
missingKeys.add(key);
}
}
if (enableBottomRecognition) {
// 查找目录特定的底部关键字配置
List<String> keyWords;
Map<String, String> keyMapping = null;
if (!missingKeys.isEmpty()) {
logger.warn("图片缺少必需的关键字: {}, 文件路径: {}", missingKeys, imageFullPath);
// 获取全局关键字映射
@SuppressWarnings("unchecked")
Map<String, String> globalKeyMapping = bottomConfig.containsKey("key_mapping") ?
(Map<String, String>) bottomConfig.get("key_mapping") : Collections.emptyMap();
// 记录缺少关键字的文件路径
String missingKeyFilesPath = configManager.getMissingKeyFilesPath();
Files.write(Paths.get(missingKeyFilesPath),
(imageFullPath + "\n").getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
// 获取当前目录的特定配置
if (directoryConfig.getBottomKeyWords() != null && !directoryConfig.getBottomKeyWords().isEmpty()) {
keyWords = directoryConfig.getBottomKeyWords();
logger.info("使用目录特定的底部关键字: {}", keyWords);
// 获取目录特定的关键字映射
keyMapping = directoryConfig.getBottomKeyMapping();
if (keyMapping != null && !keyMapping.isEmpty()) {
logger.info("使用目录特定的底部关键字映射: {}", keyMapping);
} else if (!globalKeyMapping.isEmpty()) {
// 如果目录没有特定映射但有全局映射使用全局的
keyMapping = globalKeyMapping;
logger.info("使用全局底部关键字映射: {}", keyMapping);
}
} else {
// 使用全局配置
@SuppressWarnings("unchecked")
List<String> globalKeyWords = (List<String>) bottomConfig.getOrDefault("key_words", Collections.emptyList());
keyWords = globalKeyWords;
logger.info("使用全局底部关键字: {}", keyWords);
// 使用全局关键字映射
keyMapping = globalKeyMapping;
if (!keyMapping.isEmpty()) {
logger.info("使用全局底部关键字映射: {}", keyMapping);
}
}
// 记录缺少关键字的识别结果
Map<String, Object> missingKeyResult = new HashMap<>();
missingKeyResult.put("file_path", imageFullPath);
missingKeyResult.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
missingKeyResult.put("missing_keys", missingKeys);
missingKeyResult.put("extracted_data", extractedData);
// 创建带有目录特定关键字和映射的配置
Map<String, Object> dirBottomConfig = new HashMap<>(bottomConfig);
dirBottomConfig.put("key_words", keyWords);
if (keyMapping != null && !keyMapping.isEmpty()) {
dirBottomConfig.put("key_mapping", keyMapping);
}
// 调用通用底部识别方法
Map<String, String> bottomData = recognizeBottomArea(imageFullPath, dirBottomConfig);
if (!bottomData.isEmpty()) {
logger.info("添加底部识别结果: {}", bottomData);
extractedData.putAll(bottomData);
}
String missingKeyResultsPath = configManager.getMissingKeyResultsPath();
ObjectMapper mapper = new ObjectMapper();
String jsonResult = mapper.writeValueAsString(missingKeyResult) + "\n";
Files.write(Paths.get(missingKeyResultsPath),
jsonResult.getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
}
}
@ -197,8 +158,11 @@ public class FolderMonitor {
// 标记文件为已处理
markFileAsProcessed(imagePath);
return extractedData;
} catch (Exception e) {
logger.error("处理图片失败: " + imagePath, e);
return new HashMap<>();
}
}
@ -710,6 +674,144 @@ public class FolderMonitor {
// 这里只是示例实际可根据模板A的需求实现
return processImageNormal(imageFullPath);
}
// 新版CG识别逻辑按区域裁剪后分别OCR提取字段
/*
private Map<String, String> processImageWithCG(String imageFullPath) {
Map<String, String> extractedData = new HashMap<>();
try {
BufferedImage image = ImageIO.read(new File(imageFullPath));
int width = image.getWidth();
int height = image.getHeight();
// 1. 标题区和患者信息区分开裁剪
int titleHeight = (int) (height * 0.01); // 标题区5%
int infoHeight = (int) (height * 0.10); // 患者信息区10%
// 跳过标题区只识别患者信息区
BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight);
String infoOcr = tesseract.doOCR(infoArea);
String[] infoLines = infoOcr.split("\\r?\\n");
String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)";
String agePattern = "(\\d+)\\s*[岁%]";
String idPattern = "[I1l][D][:]?\\s*([A-Za-z0-9]+)";
for (String line : infoLines) {
line = line.replaceAll("\\s+", " ").trim();
if (extractedData.get("name") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(namePattern).matcher(line);
if (m.find()) {
extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格
extractedData.put("gender", m.group(2));
java.util.regex.Matcher ageM = java.util.regex.Pattern.compile(agePattern).matcher(line);
if (ageM.find()) {
extractedData.put("age", ageM.group(1));
}
}
}
if (extractedData.get("id") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(idPattern).matcher(line);
if (m.find()) {
extractedData.put("id", m.group(1));
}
}
}
// 2. 左侧参数区提取HRPPRQRSQT/QTCP/QRS/TRV5/SV1
int paramWidth = (int) (width * 0.32); // 左侧32%
int paramStartY = titleHeight;
int paramHeight = (int) (height * 0.355); // 参数区高度约38%
BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight);
String paramOcr = tesseract.doOCR(paramArea);
String[] paramLines = paramOcr.split("\\r?\\n");
String hrPattern = "HR\\s*[:.·]?\\s*([\\dOo./]+[bB][pP][mM])";
String pPattern = "P\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String prPattern = "PR\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String qrsPattern = "QRS\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String qtPattern = "QT/QT[cC]?\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String pqrstPattern = "P/QRS/T\\s*[:.·]?\\s*([\\dOo./]+)";
String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[:.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?";
for (String line : paramLines) {
line = line.replaceAll("\\s+", " ").trim();
if (extractedData.get("HR") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("HR", hrValue);
}
}
if (extractedData.get("P") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("P", pValue);
}
}
if (extractedData.get("PR") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("PR", prValue);
}
}
if (extractedData.get("QRS") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("QRS", qrsValue);
}
}
if (extractedData.get("QT/QTC") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", "");
extractedData.put("QT/QTC", qtValue);
}
}
if (extractedData.get("P/QRS/T") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", "");
extractedData.put("P/QRS/T", pqrstValue);
}
}
if (extractedData.get("RV5/SV1") == null) {
if (line.toLowerCase().contains("sv1")) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line);
if (m.find()) {
String rv5 = m.group(1).replaceAll("[Oo]", "0");
String sv1 = m.group(2).replaceAll("[Oo]", "0");
String rv5sv1Value = rv5 + "/" + sv1;
extractedData.put("RV5/SV1", rv5sv1Value);
}
}
}
}
// 3. 底部区域提取检查时间
int bottomHeight = (int) (height * 0.05); // 底部12%
int bottomStartY = height - bottomHeight;
BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
String bottomOcr = tesseract.doOCR(bottomArea);
String[] bottomLines = bottomOcr.split("\\r?\\n");
String checkTimePattern = "检查[:]?\\s*([\\d-]+ [\\d:]+)";
for (String line : bottomLines) {
String lineNoSpace = line.replaceAll("\\s+", "");
if (extractedData.get("collectionTime") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
if (m.find()) {
String dateTime = m.group(1);
if (!dateTime.contains(" ")) {
dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10);
}
extractedData.put("collectionTime", dateTime);
}
}
}
} catch (Exception e) {
logger.error("processImageWithCG 区域识别异常", e);
}
return extractedData;
}
*/
// 新版CG识别逻辑按区域裁剪后分别OCR提取字段
private Map<String, String> processImageWithCG(String imageFullPath) {

View File

@ -15,15 +15,19 @@ import java.io.*;
import java.nio.file.*;
import java.util.*;
import java.util.concurrent.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
public class ImageOcrMonitor {
private static final Logger logger = LoggerFactory.getLogger(ImageOcrMonitor.class);
private static final long CHECK_INTERVAL = 120000; // 2分钟
private static final long RETRY_INTERVAL = 600000; // 10分钟
private final ConfigManager configManager;
private final Map<String, FolderMonitor> folderMonitors;
private final Map<String, WatchService> watchServices;
private Tesseract tesseract;
private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
private int currentRetryIndex = 0; // 当前重试的文件索引
public ImageOcrMonitor(String configPath) {
this.configManager = new ConfigManager(configPath);
@ -31,6 +35,7 @@ public class ImageOcrMonitor {
this.watchServices = new HashMap<>();
initializeMonitors();
initTesseract();
startRetryTask();
}
private void initializeMonitors() {
@ -128,6 +133,134 @@ public class ImageOcrMonitor {
}
}
private void startRetryTask() {
scheduler.scheduleAtFixedRate(() -> {
try {
logger.info("开始扫描识别失败的文件...");
retryFailedRecognition();
} catch (Exception e) {
logger.error("重试识别失败", e);
}
}, RETRY_INTERVAL, RETRY_INTERVAL, TimeUnit.MILLISECONDS);
}
private void retryFailedRecognition() {
try {
// 读取missing_key_files.txt
Path missingKeyFilesPath = Paths.get(configManager.getMissingKeyFilesPath());
if (!Files.exists(missingKeyFilesPath)) {
logger.info("没有需要重试的文件");
return;
}
List<String> failedFiles = Files.readAllLines(missingKeyFilesPath);
if (failedFiles.isEmpty()) {
logger.info("没有需要重试的文件");
currentRetryIndex = 0; // 重置索引
return;
}
// 如果索引超出范围重置为0
if (currentRetryIndex >= failedFiles.size()) {
currentRetryIndex = 0;
}
// 获取当前要处理的文件
String currentFile = failedFiles.get(currentRetryIndex);
logger.info("开始处理第 {} 个失败文件: {}", currentRetryIndex + 1, currentFile);
// 读取现有的missing_key_results.json
Path missingKeyResultsPath = Paths.get(configManager.getMissingKeyResultsPath());
List<Map<String, Object>> existingResults = new ArrayList<>();
if (Files.exists(missingKeyResultsPath)) {
String content = new String(Files.readAllBytes(missingKeyResultsPath));
if (!content.trim().isEmpty()) {
existingResults = configManager.getObjectMapper().readValue(content, List.class);
}
}
// 处理当前文件
Path path = Paths.get(currentFile);
if (!Files.exists(path)) {
logger.warn("文件不存在,跳过: {}", currentFile);
currentRetryIndex++; // 移动到下一个文件
return;
}
// 找到对应的FolderMonitor
FolderMonitor monitor = findMonitorForFile(path);
if (monitor == null) {
logger.warn("找不到对应的FolderMonitor跳过: {}", currentFile);
currentRetryIndex++; // 移动到下一个文件
return;
}
// 重新识别
Map<String, String> extractedData = monitor.processImage(path);
// 检查是否还有缺失的关键字
List<String> missingKeys = new ArrayList<>();
for (String requiredKey : configManager.getRequiredKeys()) {
if (!extractedData.containsKey(requiredKey) ||
extractedData.get(requiredKey) == null ||
extractedData.get(requiredKey).trim().isEmpty()) {
missingKeys.add(requiredKey);
}
}
if (missingKeys.isEmpty()) {
// 识别成功从结果中移除
logger.info("文件识别成功,移除失败记录: {}", currentFile);
failedFiles.remove(currentRetryIndex);
// 更新missing_key_files.txt
Files.write(missingKeyFilesPath, failedFiles);
// 不需要更新索引因为列表已经缩短
} else {
// 仍然失败更新结果
logger.info("文件仍然识别失败,更新结果: {}", currentFile);
// 更新或添加结果
boolean found = false;
for (Map<String, Object> result : existingResults) {
if (currentFile.equals(result.get("file_path"))) {
result.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
result.put("missing_keys", missingKeys);
result.put("extracted_data", extractedData);
found = true;
break;
}
}
if (!found) {
Map<String, Object> result = new HashMap<>();
result.put("file_path", currentFile);
result.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
result.put("missing_keys", missingKeys);
result.put("extracted_data", extractedData);
existingResults.add(result);
}
// 更新missing_key_results.json
String jsonContent = configManager.getObjectMapper().writerWithDefaultPrettyPrinter()
.writeValueAsString(existingResults);
Files.write(missingKeyResultsPath, jsonContent.getBytes());
currentRetryIndex++; // 移动到下一个文件
}
logger.info("本次重试完成,当前处理进度: {}/{}", currentRetryIndex + 1, failedFiles.size());
} catch (Exception e) {
logger.error("重试识别过程发生错误", e);
currentRetryIndex++; // 发生错误时也移动到下一个文件
}
}
private FolderMonitor findMonitorForFile(Path filePath) {
String absolutePath = filePath.toAbsolutePath().toString();
for (Map.Entry<String, FolderMonitor> entry : folderMonitors.entrySet()) {
if (absolutePath.startsWith(entry.getKey())) {
return entry.getValue();
}
}
return null;
}
public static void main(String[] args) {
try {
logger.info("OCR监控程序启动...");