From dade8afb998aa0eb19f02882814c561f717f2724 Mon Sep 17 00:00:00 2001 From: lxd <1004405501@qq.com> Date: Tue, 13 May 2025 17:17:29 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=A4=84=E7=90=86?= =?UTF-8?q?=E7=A4=BC=E6=B3=89=E5=8E=BF=E5=9F=8E=E5=85=B3=E5=8D=AB=E7=94=9F?= =?UTF-8?q?=E9=99=A2ocr=E8=AF=86=E5=88=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/java/com/ocr/FolderMonitor.java | 140 +++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/src/main/java/com/ocr/FolderMonitor.java b/src/main/java/com/ocr/FolderMonitor.java index 4f946b0..7c422a5 100644 --- a/src/main/java/com/ocr/FolderMonitor.java +++ b/src/main/java/com/ocr/FolderMonitor.java @@ -87,6 +87,9 @@ public class FolderMonitor { case "templateA": extractedData = processImageWithTemplateA(imageFullPath); break; + case "CG": + extractedData = processImageWithCG(imageFullPath); + break; case "normal": default: extractedData = processImageNormal(imageFullPath); @@ -697,6 +700,143 @@ public class FolderMonitor { return processImageNormal(imageFullPath); } + // 新版CG识别逻辑:按区域裁剪后分别OCR提取字段 + private Map processImageWithCG(String imageFullPath) { + Map extractedData = new HashMap<>(); + try { + BufferedImage image = ImageIO.read(new File(imageFullPath)); + int width = image.getWidth(); + int height = image.getHeight(); + + // 1. 标题区和患者信息区分开裁剪 + int titleHeight = (int) (height * 0.01); // 标题区5% + int infoHeight = (int) (height * 0.10); // 患者信息区10% + // 跳过标题区,只识别患者信息区 + BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight); + String infoOcr = tesseract.doOCR(infoArea); + String[] infoLines = infoOcr.split("\\r?\\n"); + String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)"; + String agePattern = "(\\d+)\\s*[岁%]"; + String idPattern = "[I1l][D][::]?\\s*([A-Za-z0-9]+)"; + for (String line : infoLines) { + line = line.replaceAll("\\s+", " ").trim(); + if (extractedData.get("name") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(namePattern).matcher(line); + if (m.find()) { + extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格 + extractedData.put("gender", m.group(2)); + java.util.regex.Matcher ageM = java.util.regex.Pattern.compile(agePattern).matcher(line); + if (ageM.find()) { + extractedData.put("age", ageM.group(1)); + } + } + } + if (extractedData.get("id") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(idPattern).matcher(line); + if (m.find()) { + extractedData.put("id", m.group(1)); + } + } + } + + // 2. 左侧参数区(提取HR、P、PR、QRS、QT/QTC、P/QRS/T、RV5/SV1) + int paramWidth = (int) (width * 0.32); // 左侧32% + int paramStartY = titleHeight; + int paramHeight = (int) (height * 0.355); // 参数区高度约38% + BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight); + String paramOcr = tesseract.doOCR(paramArea); + String[] paramLines = paramOcr.split("\\r?\\n"); + String hrPattern = "HR\\s*[::.·]?\\s*([\\dOo./]+[bB][pP][mM])"; + String pPattern = "P\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + String prPattern = "PR\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + String qrsPattern = "QRS\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + String qtPattern = "QT/QT[cC]?\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + String pqrstPattern = "P/QRS/T\\s*[::.·]?\\s*([\\dOo./]+)"; + String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[::.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?"; + for (String line : paramLines) { + line = line.replaceAll("\\s+", " ").trim(); + if (extractedData.get("HR") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("HR", hrValue); + } + } + if (extractedData.get("P") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("P", pValue); + } + } + if (extractedData.get("PR") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("PR", prValue); + } + } + if (extractedData.get("QRS") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); + extractedData.put("QRS", qrsValue); + } + } + if (extractedData.get("QT/QTC") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", ""); + extractedData.put("QT/QTC", qtValue); + } + } + if (extractedData.get("P/QRS/T") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); + if (m.find()) { + String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", ""); + extractedData.put("P/QRS/T", pqrstValue); + } + } + if (extractedData.get("RV5/SV1") == null) { + if (line.toLowerCase().contains("sv1")) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line); + if (m.find()) { + String rv5 = m.group(1).replaceAll("[Oo]", "0"); + String sv1 = m.group(2).replaceAll("[Oo]", "0"); + String rv5sv1Value = rv5 + "/" + sv1; + extractedData.put("RV5/SV1", rv5sv1Value); + + } + } + } + } + + // 3. 底部区域(提取检查时间) + int bottomHeight = (int) (height * 0.05); // 底部12% + int bottomStartY = height - bottomHeight; + BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight); + String bottomOcr = tesseract.doOCR(bottomArea); + String[] bottomLines = bottomOcr.split("\\r?\\n"); + String checkTimePattern = "检查[::]?\\s*([\\d-]+ [\\d:]+)"; + for (String line : bottomLines) { + String lineNoSpace = line.replaceAll("\\s+", ""); + if (extractedData.get("collectionTime") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[::]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace); + if (m.find()) { + String dateTime = m.group(1); + if (!dateTime.contains(" ")) { + dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10); + } + extractedData.put("collectionTime", dateTime); + } + } + } + } catch (Exception e) { + logger.error("processImageWithCG 区域识别异常", e); + } + return extractedData; + } + /** * 关闭资源 */ From 9a29647a42a86f768df1dc688999379ed4f42df5 Mon Sep 17 00:00:00 2001 From: lxd <1004405501@qq.com> Date: Tue, 13 May 2025 17:19:22 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=A4=84=E7=90=86?= =?UTF-8?q?=E7=A4=BC=E6=B3=89=E5=8E=BF=E5=9F=8E=E5=85=B3=E5=8D=AB=E7=94=9F?= =?UTF-8?q?=E9=99=A2ocr=E8=AF=86=E5=88=AB=20=E6=96=B0=E5=A2=9E=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/config.yaml b/config.yaml index acfde09..f75dfeb 100644 --- a/config.yaml +++ b/config.yaml @@ -142,6 +142,26 @@ directories: "医师": "doctor" "结论": "conclusion" + - path: "./礼泉县城关卫生院" + recognition_type: "CG" + key_mapping: + "姓名": "name" + "性别": "gender" + "年龄": "age" + "ID": "examId" + "HR": "hr" + "P": "P" + "PR": "pr" + "QRS": "qrs" + "QT/QTC": "qt/qtc" + "P/QRS/T": "pAxle/qrsAxle/tAxle" + "RV5/SV1": "rv5/sv1" + "RV5+SV1": "rv5Sv1" + bottom_key_words: + - "检查时间" + bottom_key_mapping: + "检查时间": "collectionTime" + # OCR程序与语言包路径配置 # 新增Tesseract相关配置 From e636f0d273f791f041ca4df6b107e11d607414fb Mon Sep 17 00:00:00 2001 From: lxd <1004405501@qq.com> Date: Wed, 14 May 2025 15:50:46 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E5=A4=84=E7=90=86=E7=A4=BC=E6=B3=89?= =?UTF-8?q?=E5=8E=BF=E5=9F=8E=E5=85=B3=E5=8D=AB=E7=94=9F=E9=99=A2ocr?= =?UTF-8?q?=E8=AF=86=E5=88=AB=20=E5=AD=97=E6=AE=B5=E4=B8=8D=E5=AF=B9?= =?UTF-8?q?=E5=BA=94=E9=97=AE=E9=A2=98=20=E6=96=B0=E5=A2=9ELQXSD=20?= =?UTF-8?q?=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/java/com/ocr/FolderMonitor.java | 291 +++++++++++++++++++++-- 1 file changed, 276 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/ocr/FolderMonitor.java b/src/main/java/com/ocr/FolderMonitor.java index 7c422a5..dba3c06 100644 --- a/src/main/java/com/ocr/FolderMonitor.java +++ b/src/main/java/com/ocr/FolderMonitor.java @@ -90,6 +90,9 @@ public class FolderMonitor { case "CG": extractedData = processImageWithCG(imageFullPath); break; + case "LQXSD": + extractedData = processImageWithLQXSD(imageFullPath); + break; case "normal": default: extractedData = processImageNormal(imageFullPath); @@ -750,62 +753,72 @@ public class FolderMonitor { String pPattern = "P\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; String prPattern = "PR\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; String qrsPattern = "QRS\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; - String qtPattern = "QT/QT[cC]?\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + String qtPattern = "(?:QT|QI)/QT[cC]?\\s*[::.·]?\\s*([\\dOo./]+[mM][sS])"; + String qtcPattern = "[@Q]Tc\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]"; String pqrstPattern = "P/QRS/T\\s*[::.·]?\\s*([\\dOo./]+)"; String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[::.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?"; for (String line : paramLines) { line = line.replaceAll("\\s+", " ").trim(); - if (extractedData.get("HR") == null) { + if (extractedData.get("hr") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("HR", hrValue); + extractedData.put("hr", hrValue); } } - if (extractedData.get("P") == null) { + if (extractedData.get("p") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("P", pValue); + extractedData.put("p", pValue); } } - if (extractedData.get("PR") == null) { + if (extractedData.get("pr") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("PR", prValue); + extractedData.put("pr", prValue); } } - if (extractedData.get("QRS") == null) { + if (extractedData.get("qrs") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", ""); - extractedData.put("QRS", qrsValue); + extractedData.put("qrs", qrsValue); } } - if (extractedData.get("QT/QTC") == null) { + if (extractedData.get("qt/qtc") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", ""); - extractedData.put("QT/QTC", qtValue); + extractedData.put("qt/qtc", qtValue); } } - if (extractedData.get("P/QRS/T") == null) { + if (extractedData.get("pAxle/qrsAxle/tAxle") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line); if (m.find()) { String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", ""); - extractedData.put("P/QRS/T", pqrstValue); + extractedData.put("pAxle/qrsAxle/tAxle", pqrstValue); } } - if (extractedData.get("RV5/SV1") == null) { + if (extractedData.get("rv5/sv1") == null) { if (line.toLowerCase().contains("sv1")) { java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line); if (m.find()) { String rv5 = m.group(1).replaceAll("[Oo]", "0"); String sv1 = m.group(2).replaceAll("[Oo]", "0"); String rv5sv1Value = rv5 + "/" + sv1; - extractedData.put("RV5/SV1", rv5sv1Value); + extractedData.put("rv5/sv1", rv5sv1Value); + // 计算rv5Sv1的值(rv5和sv1的和) + try { + double rv5Value = Double.parseDouble(rv5); + double sv1Value = Double.parseDouble(sv1); + double sum = rv5Value + sv1Value; + extractedData.put("rv5Sv1", String.format("%.3f", sum)); + } catch (NumberFormatException e) { + logger.error("计算rv5Sv1时发生数字格式错误: rv5={}, sv1={}", rv5, sv1, e); + } } } } @@ -837,6 +850,254 @@ public class FolderMonitor { return extractedData; } + // 新增礼泉县夹德中心卫生院模板识别方法 + private Map processImageWithLQXSD(String imageFullPath) { + Map extractedData = new HashMap<>(); + try { + BufferedImage image = ImageIO.read(new File(imageFullPath)); + int width = image.getWidth(); + int height = image.getHeight(); + + // 1. 顶部患者信息区(约前12%) + int infoHeight = (int) (height * 0.12); + BufferedImage infoArea = image.getSubimage(0, 0, width, infoHeight); + String infoOcr = tesseract.doOCR(infoArea); + String[] infoLines = infoOcr.split("\\r?\\n"); + String namePattern = "姓\\s*名\\s*[::]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)"; + String genderPattern = "性\\s*别\\s*[::]?\\s*([男女])"; + String agePattern = "年\\s*龄\\s*[::]?\\s*(\\d+)\\s*[岁%]"; + String idPattern = "序\\s*号\\s*[:;;]?\\s*([A-Za-z0-9]+)"; + for (String line : infoLines) { + line = line.trim(); + // 姓名 + if (extractedData.get("name") == null) { + // 先尝试使用正则表达式匹配 + java.util.regex.Matcher m = java.util.regex.Pattern.compile(namePattern).matcher(line); + if (m.find()) { + String name = m.group(1).replaceAll("\\s+", ""); + if (name.contains("门诊号")) { + name = name.split("门诊号")[0]; + } + extractedData.put("name", name); + } else { + // 如果正则匹配失败,尝试直接提取 + if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名")) { + // 统一处理各种可能的姓名标识 + String processedLine = line.replaceAll("姓\\s*名\\s*[::]?", "姓名:"); + String[] parts = processedLine.split("姓名:"); + if (parts.length > 1) { + String afterName = parts[1].trim(); + // 找到下一个字段的位置 + int nextFieldIndex = -1; + if (afterName.contains("门诊号") || afterName.contains("门 诊 号")) { + nextFieldIndex = afterName.indexOf("门诊号"); + if (nextFieldIndex == -1) { + nextFieldIndex = afterName.indexOf("门 诊 号"); + } + } else if (afterName.contains("性别") || afterName.contains("性 别")) { + nextFieldIndex = afterName.indexOf("性别"); + if (nextFieldIndex == -1) { + nextFieldIndex = afterName.indexOf("性 别"); + } + } + + if (nextFieldIndex > 0) { + String name = afterName.substring(0, nextFieldIndex) + .replaceAll("[::]", "") + .replaceAll("\\s+", "") + .trim(); + extractedData.put("name", name); + } + } + } + } + } + // 性别 + if (extractedData.get("gender") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(genderPattern).matcher(line); + if (m.find()) { + extractedData.put("gender", m.group(1)); + } + } + // 年龄 + if (extractedData.get("age") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(agePattern).matcher(line); + if (m.find()) { + extractedData.put("age", m.group(1)); + } + } + // ID + if (extractedData.get("examId") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(idPattern).matcher(line); + if (m.find()) { + extractedData.put("examId", m.group(1)); + } + } + } + + // 2. 参数区(底部1/3,适配新模板) + int paramStartY = (int) (height * 0.81); + int paramHeight = height - paramStartY; + BufferedImage paramArea = image.getSubimage(0, paramStartY, width, paramHeight); + String paramOcr = tesseract.doOCR(paramArea); + String[] paramLines = paramOcr.split("\\r?\\n"); + String hrPattern = "心率[::]?\\s*([\\dOo./]+)"; + String pPattern = "P时限[::]?\\s*([\\dOo./]+)"; + String prPattern = "PR间期[::]?\\s*([\\dOo./]+)"; + String qrsPattern = "QRS时限[::]?\\s*([\\dOo./]+)"; + String qtPattern = "[@Q]T\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]"; + String qtcPattern = "[@Q]Tc\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]"; + String pAxlePattern = "P轴[::]?\\s*([\\dOo.]+)"; + String qrsAxlePattern = "QRS轴[::]?\\s*([\\dOo.]+)"; + String tAxlePattern = "T轴[::]?\\s*([\\dOo.]+)"; + String rv5sv1Pattern = "RV5/SV1[::]?\\s*([\\dOo./]+)"; + String rv5plusSv1Pattern = "RV5\\+SV1[::]?\\s*([\\dOo./]+)"; + + // 用于存储QT和QTc的值 + String qtValue = null; + String qtcValue = null; + + // 用于存储三个电轴的值 + String pAxleValue = null; + String qrsAxleValue = null; + String tAxleValue = null; + + for (String line : paramLines) { + line = line.replaceAll("\\s+", "").trim(); + // HR + if (extractedData.get("hr") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern).matcher(line); + if (m.find()) { + String hrValue = m.group(1).replaceAll("[Oo]", "0"); + extractedData.put("hr", hrValue); + } + } + // P + if (extractedData.get("P") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line); + if (m.find()) { + String pValue = m.group(1).replaceAll("[Oo]", "0"); + extractedData.put("P", pValue); + } + } + // PR + if (extractedData.get("pr") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern).matcher(line); + if (m.find()) { + extractedData.put("pr", m.group(1).replaceAll("[Oo]", "0")); + } + } + // QRS + if (extractedData.get("qrs") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern).matcher(line); + if (m.find()) { + extractedData.put("qrs", m.group(1).replaceAll("[Oo]", "0")); + } + } + // QT + if (qtValue == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern).matcher(line); + if (m.find()) { + qtValue = m.group(1).replaceAll("[Oo]", "0"); + } + } + // QTc + if (qtcValue == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtcPattern).matcher(line); + if (m.find()) { + qtcValue = m.group(1).replaceAll("[Oo]", "0"); + } + } + // P轴 + if (pAxleValue == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line); + if (m.find()) { + pAxleValue = m.group(1).replaceAll("[Oo]", "0"); + } + } + // QRS轴 + if (qrsAxleValue == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsAxlePattern).matcher(line); + if (m.find()) { + qrsAxleValue = m.group(1).replaceAll("[Oo]", "0"); + } + } + // T轴 + if (tAxleValue == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(tAxlePattern).matcher(line); + if (m.find()) { + tAxleValue = m.group(1).replaceAll("[Oo]", "0"); + } + } + // RV5/SV1 + if (extractedData.get("rv5/sv1") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(rv5sv1Pattern).matcher(line); + if (m.find()) { + extractedData.put("rv5/sv1", m.group(1).replaceAll("[Oo]", "0")); + } + } + // RV5+SV1 + if (extractedData.get("rv5Sv1") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile(rv5plusSv1Pattern).matcher(line); + if (m.find()) { + extractedData.put("rv5Sv1", m.group(1).replaceAll("[Oo]", "0")); + } + } + } + + // 处理QT/QTc的值 + if (qtValue != null || qtcValue != null) { + if (qtValue != null && qtcValue != null) { + extractedData.put("qt/qtc", qtValue + "/" + qtcValue); + } else if (qtValue != null) { + extractedData.put("qt/qtc", qtValue); + } else if (qtcValue != null) { + extractedData.put("qt/qtc", qtcValue); + } + } + + // 处理三个电轴的值 + if (pAxleValue != null || qrsAxleValue != null || tAxleValue != null) { + StringBuilder axleValue = new StringBuilder(); + if (pAxleValue != null) { + axleValue.append(pAxleValue); + } + if (qrsAxleValue != null) { + if (axleValue.length() > 0) { + axleValue.append("/"); + } + axleValue.append(qrsAxleValue); + } + if (tAxleValue != null) { + if (axleValue.length() > 0) { + axleValue.append("/"); + } + axleValue.append(tAxleValue); + } + extractedData.put("pAxle/qrsAxle/tAxle", axleValue.toString()); + } + + // 3. 底部区域(提取检查时间,底部5%) + int bottomHeight = (int) (height * 0.05); + int bottomStartY = height - bottomHeight; + BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight); + String bottomOcr = tesseract.doOCR(bottomArea); + String[] bottomLines = bottomOcr.split("\\r?\\n"); + for (String line : bottomLines) { + String lineNoSpace = line.replaceAll("\\s+", ""); + if (extractedData.get("collectionTime") == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile("日期[::]?(\\d{4}-\\d{2}-\\d{2})").matcher(lineNoSpace); + if (m.find()) { + extractedData.put("collectionTime", m.group(1)); + } + } + } + } catch (Exception e) { + logger.error("processImageWithLQXSD 识别异常", e); + } + return extractedData; + } + /** * 关闭资源 */ From a9828cccb178595142d99dbf6e4e6c224cadeecd Mon Sep 17 00:00:00 2001 From: lxd <1004405501@qq.com> Date: Wed, 14 May 2025 17:42:43 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=20=E6=96=B0=E5=A2=9ELQXSD=20=E6=96=B9?= =?UTF-8?q?=E6=B3=95=20=E5=92=8C=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 26 +++++- src/main/java/com/ocr/FolderMonitor.java | 108 +++++++++++++++-------- 2 files changed, 93 insertions(+), 41 deletions(-) diff --git a/config.yaml b/config.yaml index f75dfeb..0f6618b 100644 --- a/config.yaml +++ b/config.yaml @@ -5,8 +5,8 @@ output: processed_files: "./ocr_results/processed_files.txt" # 已处理文件列表 # 后端接口配置 -# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData -upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData +upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData +#upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData # 底部识别配置 bottom_recognition: @@ -162,6 +162,26 @@ directories: bottom_key_mapping: "检查时间": "collectionTime" + - path: "./史德卫生院" + recognition_type: "LQXSD" + key_mapping: + "姓名": "name" + "性别": "gender" + "年龄": "age" + "ID": "examId" + "HR": "hr" + "P": "P" + "PR": "pr" + "QRS": "qrs" + "QT/QTC": "qt/qtc" + "P/QRS/T": "pAxle/qrsAxle/tAxle" + "RV5/SV1": "rv5/sv1" + "RV5+SV1": "rv5Sv1" + bottom_key_words: + - "检查时间" + bottom_key_mapping: + "检查时间": "collectionTime" + # OCR程序与语言包路径配置 # 新增Tesseract相关配置 @@ -170,7 +190,7 @@ directories: # language 必须,指定语言包 tesseract: - bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe" + bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe" data_path: "./tessdata" # data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata" language: "chi_sim+eng" diff --git a/src/main/java/com/ocr/FolderMonitor.java b/src/main/java/com/ocr/FolderMonitor.java index dba3c06..ebb754c 100644 --- a/src/main/java/com/ocr/FolderMonitor.java +++ b/src/main/java/com/ocr/FolderMonitor.java @@ -863,7 +863,7 @@ public class FolderMonitor { BufferedImage infoArea = image.getSubimage(0, 0, width, infoHeight); String infoOcr = tesseract.doOCR(infoArea); String[] infoLines = infoOcr.split("\\r?\\n"); - String namePattern = "姓\\s*名\\s*[::]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)"; + String namePattern = "[姓姊]\\s*名\\s*[::]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)"; String genderPattern = "性\\s*别\\s*[::]?\\s*([男女])"; String agePattern = "年\\s*龄\\s*[::]?\\s*(\\d+)\\s*[岁%]"; String idPattern = "序\\s*号\\s*[:;;]?\\s*([A-Za-z0-9]+)"; @@ -881,9 +881,10 @@ public class FolderMonitor { extractedData.put("name", name); } else { // 如果正则匹配失败,尝试直接提取 - if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名")) { + if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名") || + line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) { // 统一处理各种可能的姓名标识 - String processedLine = line.replaceAll("姓\\s*名\\s*[::]?", "姓名:"); + String processedLine = line.replaceAll("[姓姊]\\s*名\\s*[::]?", "姓名:"); String[] parts = processedLine.split("姓名:"); if (parts.length > 1) { String afterName = parts[1].trim(); @@ -933,6 +934,27 @@ public class FolderMonitor { extractedData.put("examId", m.group(1)); } } + // 识别日期 + if (extractedData.get("collectionTime") == null) { + // 处理原有格式和带空格的格式 + String datePattern = "日\\s*期\\s*[::]?\\s*(\\d{4}[-/]\\d{1,2}[-/]\\d{1,2})"; + // 预处理行,处理字符间可能有空格的情况 + String processedLine = line.replaceAll("\\s+", ""); + + // 先尝试原始行 + java.util.regex.Matcher m = java.util.regex.Pattern.compile(datePattern).matcher(line); + if (m.find()) { + String dateValue = m.group(1).replaceAll("/", "-"); + extractedData.put("collectionTime", dateValue); + } else { + // 如果原始行匹配失败,尝试处理后的行 + m = java.util.regex.Pattern.compile(datePattern).matcher(processedLine); + if (m.find()) { + String dateValue = m.group(1).replaceAll("/", "-"); + extractedData.put("collectionTime", dateValue); + } + } + } } // 2. 参数区(底部1/3,适配新模板) @@ -942,27 +964,30 @@ public class FolderMonitor { String paramOcr = tesseract.doOCR(paramArea); String[] paramLines = paramOcr.split("\\r?\\n"); String hrPattern = "心率[::]?\\s*([\\dOo./]+)"; - String pPattern = "P时限[::]?\\s*([\\dOo./]+)"; - String prPattern = "PR间期[::]?\\s*([\\dOo./]+)"; + String pPattern = "P\\s*(?:时限)?[::]?\\s*([\\dOo./+-]+)(?:\\s*[mM][sS])?"; + String pAxlePattern = "P\\s*(?:轴)?[::]?\\s*([\\dOo./+-]+)\\s*°"; + String prPattern = "PR间期[::;]?\\s*([\\dOo./]+)"; String qrsPattern = "QRS时限[::]?\\s*([\\dOo./]+)"; - String qtPattern = "[@Q]T\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]"; - String qtcPattern = "[@Q]Tc\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]"; - String pAxlePattern = "P轴[::]?\\s*([\\dOo.]+)"; + String qtPattern = "(?:[@Q]?T|QT)\\s*间\\s*期\\s*[::.·]?\\s*(\\d+)\\s*[mM][sS]"; + String qtcPattern = "(?:[@Q]Tc|afe)\\s*间\\s*期\\s*[::.·]?\\s*(\\d+)\\s*[mM][sS]"; String qrsAxlePattern = "QRS轴[::]?\\s*([\\dOo.]+)"; String tAxlePattern = "T轴[::]?\\s*([\\dOo.]+)"; - String rv5sv1Pattern = "RV5/SV1[::]?\\s*([\\dOo./]+)"; + String rv5sv1Pattern = "RV5/SV1.*?([\\dOo.]+/[\\dOo.]+|[\\dOo.]+).*?[mM][vVyY]?"; String rv5plusSv1Pattern = "RV5\\+SV1[::]?\\s*([\\dOo./]+)"; // 用于存储QT和QTc的值 String qtValue = null; String qtcValue = null; - // 用于存储三个电轴的值 - String pAxleValue = null; - String qrsAxleValue = null; - String tAxleValue = null; + // 用于存储三个电轴的值 + String pAxleValue = null; + String qrsAxleValue = null; + String tAxleValue = null; - for (String line : paramLines) { + // 标记是否已识别为P轴 + boolean isPAxleDetected = false; + + for (String line : paramLines) { line = line.replaceAll("\\s+", "").trim(); // HR if (extractedData.get("hr") == null) { @@ -972,14 +997,42 @@ public class FolderMonitor { extractedData.put("hr", hrValue); } } - // P - if (extractedData.get("P") == null) { + + // 处理所有P值 - 先检查是否含有度数符号判断是P轴还是P时限 + if (line.contains("P") && line.contains("°")) { + // 如果行中同时包含P和度数符号,尝试提取P轴值 + java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line); + if (m.find() && pAxleValue == null) { + pAxleValue = m.group(1).replaceAll("[Oo]", "0"); + isPAxleDetected = true; // 标记已识别为P轴 + } + } else if ((line.contains("P") && !line.contains("PR") && !isPAxleDetected) || line.contains("P时限")) { + // 不含度数符号且不是PR,或明确包含"P时限"字样,尝试提取P时限 + java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line); + if (m.find() && extractedData.get("P") == null) { + String pValue = m.group(1).replaceAll("[Oo]", "0"); + extractedData.put("P", pValue); + } + } + + // P - 仅当未识别为P轴时才尝试识别为P时限,但明确包含"P时限"的行除外 + if ((extractedData.get("P") == null && !isPAxleDetected) || line.contains("P时限")) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line); if (m.find()) { String pValue = m.group(1).replaceAll("[Oo]", "0"); extractedData.put("P", pValue); } } + + // P轴 - 检查传统格式,仅当P轴未识别时 + if (pAxleValue == null) { + java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[::]?\\s*([\\dOo.+-]+)").matcher(line); + if (m.find()) { + pAxleValue = m.group(1).replaceAll("[Oo]", "0"); + isPAxleDetected = true; // 标记已识别为P轴 + } + } + // PR if (extractedData.get("pr") == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern).matcher(line); @@ -1008,13 +1061,6 @@ public class FolderMonitor { qtcValue = m.group(1).replaceAll("[Oo]", "0"); } } - // P轴 - if (pAxleValue == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line); - if (m.find()) { - pAxleValue = m.group(1).replaceAll("[Oo]", "0"); - } - } // QRS轴 if (qrsAxleValue == null) { java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsAxlePattern).matcher(line); @@ -1077,21 +1123,7 @@ public class FolderMonitor { extractedData.put("pAxle/qrsAxle/tAxle", axleValue.toString()); } - // 3. 底部区域(提取检查时间,底部5%) - int bottomHeight = (int) (height * 0.05); - int bottomStartY = height - bottomHeight; - BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight); - String bottomOcr = tesseract.doOCR(bottomArea); - String[] bottomLines = bottomOcr.split("\\r?\\n"); - for (String line : bottomLines) { - String lineNoSpace = line.replaceAll("\\s+", ""); - if (extractedData.get("collectionTime") == null) { - java.util.regex.Matcher m = java.util.regex.Pattern.compile("日期[::]?(\\d{4}-\\d{2}-\\d{2})").matcher(lineNoSpace); - if (m.find()) { - extractedData.put("collectionTime", m.group(1)); - } - } - } + } catch (Exception e) { logger.error("processImageWithLQXSD 识别异常", e); }