新增LQXSD 方法 和配置文件
This commit is contained in:
parent
e636f0d273
commit
a9828cccb1
26
config.yaml
26
config.yaml
@ -5,8 +5,8 @@ output:
|
||||
processed_files: "./ocr_results/processed_files.txt" # 已处理文件列表
|
||||
|
||||
# 后端接口配置
|
||||
# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
|
||||
upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
|
||||
upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
|
||||
#upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
|
||||
|
||||
# 底部识别配置
|
||||
bottom_recognition:
|
||||
@ -162,6 +162,26 @@ directories:
|
||||
bottom_key_mapping:
|
||||
"检查时间": "collectionTime"
|
||||
|
||||
- path: "./史德卫生院"
|
||||
recognition_type: "LQXSD"
|
||||
key_mapping:
|
||||
"姓名": "name"
|
||||
"性别": "gender"
|
||||
"年龄": "age"
|
||||
"ID": "examId"
|
||||
"HR": "hr"
|
||||
"P": "P"
|
||||
"PR": "pr"
|
||||
"QRS": "qrs"
|
||||
"QT/QTC": "qt/qtc"
|
||||
"P/QRS/T": "pAxle/qrsAxle/tAxle"
|
||||
"RV5/SV1": "rv5/sv1"
|
||||
"RV5+SV1": "rv5Sv1"
|
||||
bottom_key_words:
|
||||
- "检查时间"
|
||||
bottom_key_mapping:
|
||||
"检查时间": "collectionTime"
|
||||
|
||||
# OCR程序与语言包路径配置
|
||||
|
||||
# 新增Tesseract相关配置
|
||||
@ -170,7 +190,7 @@ directories:
|
||||
# language 必须,指定语言包
|
||||
|
||||
tesseract:
|
||||
bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe"
|
||||
bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
|
||||
data_path: "./tessdata"
|
||||
# data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
|
||||
language: "chi_sim+eng"
|
||||
|
@ -863,7 +863,7 @@ public class FolderMonitor {
|
||||
BufferedImage infoArea = image.getSubimage(0, 0, width, infoHeight);
|
||||
String infoOcr = tesseract.doOCR(infoArea);
|
||||
String[] infoLines = infoOcr.split("\\r?\\n");
|
||||
String namePattern = "姓\\s*名\\s*[::]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)";
|
||||
String namePattern = "[姓姊]\\s*名\\s*[::]?\\s*([\\u4e00-\\u9fa5]+)(?=\\s*[门]?诊\\s*号|\\s*性\\s*别|$)";
|
||||
String genderPattern = "性\\s*别\\s*[::]?\\s*([男女])";
|
||||
String agePattern = "年\\s*龄\\s*[::]?\\s*(\\d+)\\s*[岁%]";
|
||||
String idPattern = "序\\s*号\\s*[:;;]?\\s*([A-Za-z0-9]+)";
|
||||
@ -881,9 +881,10 @@ public class FolderMonitor {
|
||||
extractedData.put("name", name);
|
||||
} else {
|
||||
// 如果正则匹配失败,尝试直接提取
|
||||
if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名")) {
|
||||
if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名") ||
|
||||
line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) {
|
||||
// 统一处理各种可能的姓名标识
|
||||
String processedLine = line.replaceAll("姓\\s*名\\s*[::]?", "姓名:");
|
||||
String processedLine = line.replaceAll("[姓姊]\\s*名\\s*[::]?", "姓名:");
|
||||
String[] parts = processedLine.split("姓名:");
|
||||
if (parts.length > 1) {
|
||||
String afterName = parts[1].trim();
|
||||
@ -933,6 +934,27 @@ public class FolderMonitor {
|
||||
extractedData.put("examId", m.group(1));
|
||||
}
|
||||
}
|
||||
// 识别日期
|
||||
if (extractedData.get("collectionTime") == null) {
|
||||
// 处理原有格式和带空格的格式
|
||||
String datePattern = "日\\s*期\\s*[::]?\\s*(\\d{4}[-/]\\d{1,2}[-/]\\d{1,2})";
|
||||
// 预处理行,处理字符间可能有空格的情况
|
||||
String processedLine = line.replaceAll("\\s+", "");
|
||||
|
||||
// 先尝试原始行
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(datePattern).matcher(line);
|
||||
if (m.find()) {
|
||||
String dateValue = m.group(1).replaceAll("/", "-");
|
||||
extractedData.put("collectionTime", dateValue);
|
||||
} else {
|
||||
// 如果原始行匹配失败,尝试处理后的行
|
||||
m = java.util.regex.Pattern.compile(datePattern).matcher(processedLine);
|
||||
if (m.find()) {
|
||||
String dateValue = m.group(1).replaceAll("/", "-");
|
||||
extractedData.put("collectionTime", dateValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 参数区(底部1/3,适配新模板)
|
||||
@ -942,15 +964,15 @@ public class FolderMonitor {
|
||||
String paramOcr = tesseract.doOCR(paramArea);
|
||||
String[] paramLines = paramOcr.split("\\r?\\n");
|
||||
String hrPattern = "心率[::]?\\s*([\\dOo./]+)";
|
||||
String pPattern = "P时限[::]?\\s*([\\dOo./]+)";
|
||||
String prPattern = "PR间期[::]?\\s*([\\dOo./]+)";
|
||||
String pPattern = "P\\s*(?:时限)?[::]?\\s*([\\dOo./+-]+)(?:\\s*[mM][sS])?";
|
||||
String pAxlePattern = "P\\s*(?:轴)?[::]?\\s*([\\dOo./+-]+)\\s*°";
|
||||
String prPattern = "PR间期[::;]?\\s*([\\dOo./]+)";
|
||||
String qrsPattern = "QRS时限[::]?\\s*([\\dOo./]+)";
|
||||
String qtPattern = "[@Q]T\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]";
|
||||
String qtcPattern = "[@Q]Tc\\s*间\\s*期\\s*[::]?\\s*(\\d+)\\s*[mM][sS]";
|
||||
String pAxlePattern = "P轴[::]?\\s*([\\dOo.]+)";
|
||||
String qtPattern = "(?:[@Q]?T|QT)\\s*间\\s*期\\s*[::.·]?\\s*(\\d+)\\s*[mM][sS]";
|
||||
String qtcPattern = "(?:[@Q]Tc|afe)\\s*间\\s*期\\s*[::.·]?\\s*(\\d+)\\s*[mM][sS]";
|
||||
String qrsAxlePattern = "QRS轴[::]?\\s*([\\dOo.]+)";
|
||||
String tAxlePattern = "T轴[::]?\\s*([\\dOo.]+)";
|
||||
String rv5sv1Pattern = "RV5/SV1[::]?\\s*([\\dOo./]+)";
|
||||
String rv5sv1Pattern = "RV5/SV1.*?([\\dOo.]+/[\\dOo.]+|[\\dOo.]+).*?[mM][vVyY]?";
|
||||
String rv5plusSv1Pattern = "RV5\\+SV1[::]?\\s*([\\dOo./]+)";
|
||||
|
||||
// 用于存储QT和QTc的值
|
||||
@ -962,6 +984,9 @@ public class FolderMonitor {
|
||||
String qrsAxleValue = null;
|
||||
String tAxleValue = null;
|
||||
|
||||
// 标记是否已识别为P轴
|
||||
boolean isPAxleDetected = false;
|
||||
|
||||
for (String line : paramLines) {
|
||||
line = line.replaceAll("\\s+", "").trim();
|
||||
// HR
|
||||
@ -972,14 +997,42 @@ public class FolderMonitor {
|
||||
extractedData.put("hr", hrValue);
|
||||
}
|
||||
}
|
||||
// P
|
||||
if (extractedData.get("P") == null) {
|
||||
|
||||
// 处理所有P值 - 先检查是否含有度数符号判断是P轴还是P时限
|
||||
if (line.contains("P") && line.contains("°")) {
|
||||
// 如果行中同时包含P和度数符号,尝试提取P轴值
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line);
|
||||
if (m.find() && pAxleValue == null) {
|
||||
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
|
||||
isPAxleDetected = true; // 标记已识别为P轴
|
||||
}
|
||||
} else if ((line.contains("P") && !line.contains("PR") && !isPAxleDetected) || line.contains("P时限")) {
|
||||
// 不含度数符号且不是PR,或明确包含"P时限"字样,尝试提取P时限
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line);
|
||||
if (m.find() && extractedData.get("P") == null) {
|
||||
String pValue = m.group(1).replaceAll("[Oo]", "0");
|
||||
extractedData.put("P", pValue);
|
||||
}
|
||||
}
|
||||
|
||||
// P - 仅当未识别为P轴时才尝试识别为P时限,但明确包含"P时限"的行除外
|
||||
if ((extractedData.get("P") == null && !isPAxleDetected) || line.contains("P时限")) {
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern).matcher(line);
|
||||
if (m.find()) {
|
||||
String pValue = m.group(1).replaceAll("[Oo]", "0");
|
||||
extractedData.put("P", pValue);
|
||||
}
|
||||
}
|
||||
|
||||
// P轴 - 检查传统格式,仅当P轴未识别时
|
||||
if (pAxleValue == null) {
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[::]?\\s*([\\dOo.+-]+)").matcher(line);
|
||||
if (m.find()) {
|
||||
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
|
||||
isPAxleDetected = true; // 标记已识别为P轴
|
||||
}
|
||||
}
|
||||
|
||||
// PR
|
||||
if (extractedData.get("pr") == null) {
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern).matcher(line);
|
||||
@ -1008,13 +1061,6 @@ public class FolderMonitor {
|
||||
qtcValue = m.group(1).replaceAll("[Oo]", "0");
|
||||
}
|
||||
}
|
||||
// P轴
|
||||
if (pAxleValue == null) {
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pAxlePattern).matcher(line);
|
||||
if (m.find()) {
|
||||
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
|
||||
}
|
||||
}
|
||||
// QRS轴
|
||||
if (qrsAxleValue == null) {
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsAxlePattern).matcher(line);
|
||||
@ -1077,21 +1123,7 @@ public class FolderMonitor {
|
||||
extractedData.put("pAxle/qrsAxle/tAxle", axleValue.toString());
|
||||
}
|
||||
|
||||
// 3. 底部区域(提取检查时间,底部5%)
|
||||
int bottomHeight = (int) (height * 0.05);
|
||||
int bottomStartY = height - bottomHeight;
|
||||
BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
|
||||
String bottomOcr = tesseract.doOCR(bottomArea);
|
||||
String[] bottomLines = bottomOcr.split("\\r?\\n");
|
||||
for (String line : bottomLines) {
|
||||
String lineNoSpace = line.replaceAll("\\s+", "");
|
||||
if (extractedData.get("collectionTime") == null) {
|
||||
java.util.regex.Matcher m = java.util.regex.Pattern.compile("日期[::]?(\\d{4}-\\d{2}-\\d{2})").matcher(lineNoSpace);
|
||||
if (m.find()) {
|
||||
extractedData.put("collectionTime", m.group(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("processImageWithLQXSD 识别异常", e);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user