新版察右前旗巴音塔拉中心卫生院识别逻辑:按区域裁剪后分别OCR提取字段

This commit is contained in:
Flow 2025-05-28 14:13:48 +08:00
parent 043a27eeb1
commit 4ff687bf81

View File

@ -1300,7 +1300,7 @@ public class FolderMonitor {
int height = image.getHeight();
int infoHeight = (int) (height * 0.0427); // 顶部2.5%你可以根据实际图片微调
int infoWidth = (int) (width * 0.25); // 宽度25%只截取左侧1/4
int infoWidth = (int) (width * 0.25); // 宽度25%只截取左侧1/4
BufferedImage infoArea = image.getSubimage(0, 0, infoWidth, infoHeight);
String infoOcr = tesseract.doOCR(infoArea);
String[] infoLines = infoOcr.split("\\r?\\n");
@ -1331,7 +1331,7 @@ public class FolderMonitor {
int paramHeight = (int) (height * 0.21); // 参数区高度约38%
BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight);
// 2. 临时保存区域图片调试用
// ImageIO.write(paramArea, "png", new File("test_info.png"));
// ImageIO.write(paramArea, "png", new File("test_info.png"));
String paramOcr = tesseract.doOCR(paramArea);
String[] paramLines = paramOcr.split("\\r?\\n");
String hrPattern = "HR\\s*[:.·]?\\s*([\\dOo.]+)\\s*[bB][pP][mM]";
@ -1436,25 +1436,50 @@ public class FolderMonitor {
}
// 3. 底部区域提取检查时间
int bottomHeight = (int) (height * 0.05); // 底部12%
int bottomStartY = height - bottomHeight;
BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
String bottomOcr = tesseract.doOCR(bottomArea);
String[] bottomLines = bottomOcr.split("\\r?\\n");
String checkTimePattern = "检查[:]?\\s*([\\d-]+ [\\d:]+)";
for (String line : bottomLines) {
String lineNoSpace = line.replaceAll("\\s+", "");
if (extractedData.get("collectionTime") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile("检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
if (m.find()) {
String dateTime = m.group(1);
if (!dateTime.contains(" ")) {
dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10);
}
extractedData.put("collectionTime", dateTime);
}
}
// 只截取右下角区域
double widthPercent = 0.11; // 右侧12%
double heightPercent = 0.018; // 底部1.8%
int rightWidth = (int) (width * widthPercent);
int bottomHeight = (int) (height * heightPercent);
// 向左再多裁剪一点比如再往左偏移5%的宽度
int extraLeftCrop = (int) (width * 0.135);
int startX = width - rightWidth - extraLeftCrop;
// 向下再多裁剪一点比如再往上偏移0.5%的高度
int extraBottomCrop = (int) (height * 0.005);
int startY = height - bottomHeight - extraBottomCrop;
// 防止越界
if (startX < 0) startX = 0;
if (startY < 0) startY = 0;
BufferedImage rightBottomArea = image.getSubimage(startX, startY, rightWidth, bottomHeight);
String rightBottomOcr = tesseract.doOCR(rightBottomArea);
logger.info("原始OCR结果: {}", rightBottomOcr);
// 提取所有数字
String[] numbers = rightBottomOcr.replaceAll("[^0-9]", " ").trim().split("\\s+");
logger.info("提取的数字: {}", Arrays.toString(numbers));
if (numbers.length >= 6) {
// 确保有足够的数字来组成时间
String year = numbers[0];
String month = numbers[1];
String day = numbers[2];
String hour = numbers[3];
String minute = numbers[4];
String second = numbers[5];
// 格式化时间字符串
String dateTime = String.format("%s-%02d-%02d %02d:%02d:%02d",
year,
Integer.parseInt(month),
Integer.parseInt(day),
Integer.parseInt(hour),
Integer.parseInt(minute),
Integer.parseInt(second));
logger.info("格式化后的时间: {}", dateTime);
extractedData.put("collectionTime", dateTime);
} else {
logger.warn("提取的数字不足以组成完整时间: {}", Arrays.toString(numbers));
}
} catch (Exception e) {
logger.error("processImageWithCG 区域识别异常", e);