新增LQXSD 方法 和配置文件

This commit is contained in:
lxd 2025-05-14 17:42:43 +08:00
parent e636f0d273
commit a9828cccb1
2 changed files with 93 additions and 41 deletions

View File

@ -5,8 +5,8 @@ output:
processed_files: "./ocr_results/processed_files.txt" # 已处理文件列表
# 后端接口配置
# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
#upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
# 底部识别配置
bottom_recognition:
@ -162,6 +162,26 @@ directories:
bottom_key_mapping:
"检查时间": "collectionTime"
- path: "./史德卫生院"
recognition_type: "LQXSD"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
# OCR程序与语言包路径配置
# 新增Tesseract相关配置
@ -170,7 +190,7 @@ directories:
# language 必须,指定语言包
tesseract:
bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe"
bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
data_path: "./tessdata"
# data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
language: "chi_sim+eng"

View File

@ -863,7 +863,7 @@ public class FolderMonitor {
BufferedImage infoArea = image.getSubimage(0, 0, width, infoHeight);
String infoOcr = tesseract.doOCR(infoArea);
String[] infoLines = infoOcr.split("\\r?\\n");
String namePattern = "\\s*名\\s*[:]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)";
String namePattern = "[姊]\\s*名\\s*[:]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)";
String genderPattern = "\\s*别\\s*[:]?\\s*([男女])";
String agePattern = "\\s*龄\\s*[:]?\\s*(\\d+)\\s*[岁%]";
String idPattern = "\\s*号\\s*[:;]?\\s*([A-Za-z0-9]+)";
@ -881,9 +881,10 @@ public class FolderMonitor {
extractedData.put("name", name);
} else {
// 如果正则匹配失败尝试直接提取
if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名")) {
if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名") ||
line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) {
// 统一处理各种可能的姓名标识
String processedLine = line.replaceAll("\\s*名\\s*[:]?", "姓名:");
String processedLine = line.replaceAll("[姊]\\s*名\\s*[:]?", "姓名:");
String[] parts = processedLine.split("姓名:");
if (parts.length > 1) {
String afterName = parts[1].trim();
@ -933,6 +934,27 @@ public class FolderMonitor {
extractedData.put("examId", m.group(1));
}
}
// 识别日期
if (extractedData.get("collectionTime") == null) {
// 处理原有格式和带空格的格式
String datePattern = "\\s*期\\s*[:]?\\s*(\\d{4}[-/]\\d{1,2}[-/]\\d{1,2})";
// 预处理行处理字符间可能有空格的情况
String processedLine = line.replaceAll("\\s+", "");
// 先尝试原始行
java.util.regex.Matcher m = java.util.regex.Pattern.compile(datePattern).matcher(line);
if (m.find()) {
String dateValue = m.group(1).replaceAll("/", "-");
extractedData.put("collectionTime", dateValue);
} else {
// 如果原始行匹配失败尝试处理后的行
m = java.util.regex.Pattern.compile(datePattern).matcher(processedLine);
if (m.find()) {
String dateValue = m.group(1).replaceAll("/", "-");
extractedData.put("collectionTime", dateValue);
}
}
}
}
// 2. 参数区底部1/3适配新模板
@ -942,27 +964,30 @@ public class FolderMonitor {
String paramOcr = tesseract.doOCR(paramArea);
String[] paramLines = paramOcr.split("\\r?\\n");
String hrPattern = "心率[:]?\\s*([\\dOo./]+)";
String pPattern = "P时限[:]?\\s*([\\dOo./]+)";
String prPattern = "PR间期[:]?\\s*([\\dOo./]+)";
String pPattern = "P\\s*(?:时限)?[:]?\\s*([\\dOo./+-]+)(?:\\s*[mM][sS])?";
String pAxlePattern = "P\\s*(?:轴)?[:]?\\s*([\\dOo./+-]+)\\s*°";
String prPattern = "PR间期[:;]?\\s*([\\dOo./]+)";
String qrsPattern = "QRS时限[:]?\\s*([\\dOo./]+)";
String qtPattern = "[@Q]T\\s*间\\s*期\\s*[:]?\\s*(\\d+)\\s*[mM][sS]";
String qtcPattern = "[@Q]Tc\\s*间\\s*期\\s*[:]?\\s*(\\d+)\\s*[mM][sS]";
String pAxlePattern = "P轴[:]?\\s*([\\dOo.]+)";
String qtPattern = "(?:[@Q]?T|QT)\\s*间\\s*期\\s*[:.·]?\\s*(\\d+)\\s*[mM][sS]";
String qtcPattern = "(?:[@Q]Tc|afe)\\s*间\\s*期\\s*[:.·]?\\s*(\\d+)\\s*[mM][sS]";
String qrsAxlePattern = "QRS轴[:]?\\s*([\\dOo.]+)";
String tAxlePattern = "T轴[:]?\\s*([\\dOo.]+)";
String rv5sv1Pattern = "RV5/SV1[:]?\\s*([\\dOo./]+)";
String rv5sv1Pattern = "RV5/SV1.*?([\\dOo.]+/[\\dOo.]+|[\\dOo.]+).*?[mM][vVyY]?";
String rv5plusSv1Pattern = "RV5\\+SV1[:]?\\s*([\\dOo./]+)";
// 用于存储QT和QTc的值
String qtValue = null;
String qtcValue = null;
// 用于存储三个电轴的值
String pAxleValue = null;
String qrsAxleValue = null;
String tAxleValue = null;
// 用于存储三个电轴的值
String pAxleValue = null;
String qrsAxleValue = null;
String tAxleValue = null;
for (String line : paramLines) {
// 标记是否已识别为P轴
boolean isPAxleDetected = false;
for (String line : paramLines) {
line = line.replaceAll("\\s+", "").trim();
// HR
if (extractedData.get("hr") == null) {
@ -972,14 +997,42 @@ public class FolderMonitor {
extractedData.put("hr", hrValue);
}
}
// P
if (extractedData.get("P") == null) {
// 处理所有P值 - 先检查是否含有度数符号判断是P轴还是P时限
if (line.contains("P") && line.contains("°")) {
// 如果行中同时包含P和度数符号尝试提取P轴值
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line);
if (m.find() && pAxleValue == null) {
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
isPAxleDetected = true; // 标记已识别为P轴
}
} else if ((line.contains("P") && !line.contains("PR") && !isPAxleDetected) || line.contains("P时限")) {
// 不含度数符号且不是PR或明确包含"P时限"字样尝试提取P时限
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line);
if (m.find() && extractedData.get("P") == null) {
String pValue = m.group(1).replaceAll("[Oo]", "0");
extractedData.put("P", pValue);
}
}
// P - 仅当未识别为P轴时才尝试识别为P时限但明确包含"P时限"的行除外
if ((extractedData.get("P") == null && !isPAxleDetected) || line.contains("P时限")) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line);
if (m.find()) {
String pValue = m.group(1).replaceAll("[Oo]", "0");
extractedData.put("P", pValue);
}
}
// P轴 - 检查传统格式仅当P轴未识别时
if (pAxleValue == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[:]?\\s*([\\dOo.+-]+)").matcher(line);
if (m.find()) {
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
isPAxleDetected = true; // 标记已识别为P轴
}
}
// PR
if (extractedData.get("pr") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern).matcher(line);
@ -1008,13 +1061,6 @@ public class FolderMonitor {
qtcValue = m.group(1).replaceAll("[Oo]", "0");
}
}
// P轴
if (pAxleValue == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line);
if (m.find()) {
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
}
}
// QRS轴
if (qrsAxleValue == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsAxlePattern).matcher(line);
@ -1077,21 +1123,7 @@ public class FolderMonitor {
extractedData.put("pAxle/qrsAxle/tAxle", axleValue.toString());
}
// 3. 底部区域提取检查时间底部5%
int bottomHeight = (int) (height * 0.05);
int bottomStartY = height - bottomHeight;
BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
String bottomOcr = tesseract.doOCR(bottomArea);
String[] bottomLines = bottomOcr.split("\\r?\\n");
for (String line : bottomLines) {
String lineNoSpace = line.replaceAll("\\s+", "");
if (extractedData.get("collectionTime") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("日期[:]?(\\d{4}-\\d{2}-\\d{2})").matcher(lineNoSpace);
if (m.find()) {
extractedData.put("collectionTime", m.group(1));
}
}
}
} catch (Exception e) {
logger.error("processImageWithLQXSD 识别异常", e);
}