新增察右前旗巴音塔拉中心卫生院

This commit is contained in:
lxd 2025-05-28 10:15:19 +08:00
parent 43fc5b7971
commit 043a27eeb1
2 changed files with 635 additions and 383 deletions

View File

@ -200,6 +200,25 @@ directories:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
- path: "./察右前旗巴音塔拉中心卫生院"
recognition_type: "BYTLZX"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
# OCR程序与语言包路径配置
# 新增Tesseract相关配置
@ -208,7 +227,7 @@ directories:
# language 必须,指定语言包
tesseract:
bin_path: "C:/Program Files/Tesseract-OCR/tesseract.exe"
bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
data_path: "./tessdata"
# data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
language: "chi_sim+eng"

View File

@ -34,6 +34,8 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FolderMonitor {
private static final Logger logger = LoggerFactory.getLogger(FolderMonitor.class);
@ -54,11 +56,11 @@ public class FolderMonitor {
tessConfig = (Map<String, Object>) configManager.getConfig().get("tesseract");
}
String tessdataPathStr = tessConfig != null && tessConfig.containsKey("data_path")
? tessConfig.get("data_path").toString()
: java.nio.file.Paths.get("tessdata").toAbsolutePath().toString();
? tessConfig.get("data_path").toString()
: java.nio.file.Paths.get("tessdata").toAbsolutePath().toString();
String language = tessConfig != null && tessConfig.containsKey("language")
? tessConfig.get("language").toString()
: "chi_sim+eng";
? tessConfig.get("language").toString()
: "chi_sim+eng";
this.tesseract.setDatapath(tessdataPathStr);
this.tesseract.setLanguage(language);
this.httpClient = HttpClients.createDefault();
@ -95,6 +97,9 @@ public class FolderMonitor {
case "LQXSD":
extractedData = processImageWithLQXSD(imageFullPath);
break;
case "BYTLZX":
extractedData = processImageWithBYTLZX(imageFullPath);
break;
case "rotate90":
extractedData = processImageWithRotate90(imageFullPath);
break;
@ -113,7 +118,8 @@ public class FolderMonitor {
if (requiredKeys != null && !requiredKeys.isEmpty()) {
List<String> missingKeys = new ArrayList<>();
for (String key : requiredKeys) {
if (!extractedData.containsKey(key) || extractedData.get(key) == null || extractedData.get(key).trim().isEmpty()) {
if (!extractedData.containsKey(key) || extractedData.get(key) == null
|| extractedData.get(key).trim().isEmpty()) {
missingKeys.add(key);
}
}
@ -124,13 +130,14 @@ public class FolderMonitor {
// 记录缺少关键字的文件路径
String missingKeyFilesPath = configManager.getMissingKeyFilesPath();
Files.write(Paths.get(missingKeyFilesPath),
(imageFullPath + "\n").getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
(imageFullPath + "\n").getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
// 记录缺少关键字的识别结果
Map<String, Object> missingKeyResult = new HashMap<>();
missingKeyResult.put("file_path", imageFullPath);
missingKeyResult.put("process_time", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
missingKeyResult.put("process_time",
LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
missingKeyResult.put("missing_keys", missingKeys);
missingKeyResult.put("extracted_data", extractedData);
@ -138,8 +145,8 @@ public class FolderMonitor {
ObjectMapper mapper = new ObjectMapper();
String jsonResult = mapper.writeValueAsString(missingKeyResult) + "\n";
Files.write(Paths.get(missingKeyResultsPath),
jsonResult.getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
jsonResult.getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
}
}
@ -182,7 +189,8 @@ public class FolderMonitor {
for (String line : lines) {
line = line.trim().replaceAll("\\s+", " ");
if (line.isEmpty()) continue;
if (line.isEmpty())
continue;
logger.info("处理行: {}", line);
@ -237,11 +245,11 @@ public class FolderMonitor {
// 确保不超出边界
if (currentX + blockWidth > width) {
blockWidth = width - currentX;
logger.warn("分块 {} 宽度超出边界,调整为 {} 像素", i+1, blockWidth);
logger.warn("分块 {} 宽度超出边界,调整为 {} 像素", i + 1, blockWidth);
}
if (blockWidth <= 0) {
logger.warn("分块 {} 宽度为0或负值跳过处理", i+1);
logger.warn("分块 {} 宽度为0或负值跳过处理", i + 1);
continue;
}
@ -254,7 +262,7 @@ public class FolderMonitor {
// 执行OCR识别
String blockResult = tesseract.doOCR(tempFile);
logger.info("分块 {} OCR识别结果: {}", i+1, blockResult);
logger.info("分块 {} OCR识别结果: {}", i + 1, blockResult);
// 删除临时文件
tempFile.delete();
@ -270,9 +278,10 @@ public class FolderMonitor {
/**
* 处理一个区块的OCR识别结果
*
* @param blockResult OCR识别结果文本
* @param keyMapping 关键词映射
* @param resultMap 结果Map
* @param keyMapping 关键词映射
* @param resultMap 结果Map
*/
private void processBlockResult(String blockResult, Map<String, String> keyMapping, Map<String, String> resultMap) {
if (blockResult == null || blockResult.trim().isEmpty()) {
@ -289,7 +298,8 @@ public class FolderMonitor {
// 为每一行寻找匹配的关键字
for (String line : lines) {
line = line.trim();
if (line.isEmpty()) continue;
if (line.isEmpty())
continue;
logger.info("处理行: {}", line);
@ -331,8 +341,8 @@ public class FolderMonitor {
private void markFileAsProcessed(Path imagePath) throws IOException {
String processedFilesPath = configManager.getProcessedFilesPath();
Files.write(Paths.get(processedFilesPath),
(imagePath.toString() + "\n").getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
(imagePath.toString() + "\n").getBytes(StandardCharsets.UTF_8),
java.nio.file.StandardOpenOption.APPEND);
}
private void saveToCurrentResults(Map<String, Object> resultObject) throws IOException {
@ -345,7 +355,7 @@ public class FolderMonitor {
String jsonStr = objectMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(currentResults);
Files.write(Paths.get(currentResultsPath),
jsonStr.getBytes(StandardCharsets.UTF_8));
jsonStr.getBytes(StandardCharsets.UTF_8));
logger.info("保存当前识别结果到: {}", currentResultsPath);
@ -379,13 +389,14 @@ public class FolderMonitor {
String jsonStr = objectMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(allResults);
Files.write(Paths.get(allResultsPath),
jsonStr.getBytes(StandardCharsets.UTF_8));
jsonStr.getBytes(StandardCharsets.UTF_8));
logger.info("合并结果到历史记录: {}", allResultsPath);
}
/**
* 将OCR结果JSON文件上传到后端服务器
*
* @param jsonFilePath JSON文件路径
* @throws IOException IO异常
*/
@ -408,13 +419,16 @@ public class FolderMonitor {
org.apache.http.client.methods.HttpPost httpPost = new org.apache.http.client.methods.HttpPost(uploadUrl);
httpPost.setHeader("Content-Type", "application/json");
httpPost.setHeader("Accept", "application/json");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36");
httpPost.setEntity(new org.apache.http.entity.StringEntity(jsonContent, java.nio.charset.StandardCharsets.UTF_8));
httpPost.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36");
httpPost.setEntity(
new org.apache.http.entity.StringEntity(jsonContent, java.nio.charset.StandardCharsets.UTF_8));
// 发送请求
try (org.apache.http.client.methods.CloseableHttpResponse response = httpClient.execute(httpPost)) {
int statusCode = response.getStatusLine().getStatusCode();
org.apache.http.HttpEntity responseEntity = response.getEntity();
String responseBody = responseEntity != null ? org.apache.http.util.EntityUtils.toString(responseEntity) : null;
String responseBody = responseEntity != null ? org.apache.http.util.EntityUtils.toString(responseEntity)
: null;
if (statusCode >= 200 && statusCode < 300) {
logger.info("成功上传结果到后端,状态码: {}, 响应: {}", statusCode, responseBody);
} else {
@ -430,8 +444,9 @@ public class FolderMonitor {
/**
* 通用底部识别方法便于灵活调用
*
* @param imageFullPath 图片完整路径
* @param bottomConfig 底部识别配置
* @param bottomConfig 底部识别配置
* @return 识别结果
*/
public Map<String, String> recognizeBottomArea(String imageFullPath, Map<String, Object> bottomConfig) {
@ -440,8 +455,9 @@ public class FolderMonitor {
/**
* 处理图片底部区域识别特定关键字
*
* @param imageFullPath 图片完整路径
* @param bottomConfig 底部识别配置
* @param bottomConfig 底部识别配置
* @return 识别结果
*/
@SuppressWarnings("unchecked")
@ -455,7 +471,8 @@ public class FolderMonitor {
int heightPercent = ((Number) bottomConfig.getOrDefault("height_percent", 20)).intValue();
int widthPercent = ((Number) bottomConfig.getOrDefault("width_percent", 100)).intValue();
List<String> keyWords = (List<String>) bottomConfig.getOrDefault("key_words", Collections.emptyList());
Map<String, String> keyMapping = (Map<String, String>) bottomConfig.getOrDefault("key_mapping", Collections.emptyMap());
Map<String, String> keyMapping = (Map<String, String>) bottomConfig.getOrDefault("key_mapping",
Collections.emptyMap());
if (keyWords.isEmpty()) {
logger.warn("未配置需要识别的关键字,跳过底部识别");
@ -481,13 +498,17 @@ public class FolderMonitor {
int startY = imageHeight - bottomHeight; // 底部
// 确保不超出边界
if (startX < 0) startX = 0;
if (startY < 0) startY = 0;
if (bottomWidth > imageWidth) bottomWidth = imageWidth;
if (bottomHeight > imageHeight) bottomHeight = imageHeight;
if (startX < 0)
startX = 0;
if (startY < 0)
startY = 0;
if (bottomWidth > imageWidth)
bottomWidth = imageWidth;
if (bottomHeight > imageHeight)
bottomHeight = imageHeight;
logger.info("截取底部区域 - 起始X: {}px, 起始Y: {}px, 宽度: {}px, 高度: {}px",
startX, startY, bottomWidth, bottomHeight);
startX, startY, bottomWidth, bottomHeight);
// 截取底部区域
BufferedImage bottomArea = image.getSubimage(startX, startY, bottomWidth, bottomHeight);
@ -516,7 +537,8 @@ public class FolderMonitor {
boolean foundAnyMatch = false;
for (String line : lines) {
line = line.trim();
if (line.isEmpty()) continue;
if (line.isEmpty())
continue;
logger.info("处理底部行: {}", line);
@ -609,9 +631,8 @@ public class FolderMonitor {
value = remainingText.substring(0, endPos).trim();
} else {
// 提取前30个字符或者全部如果少于30个字符
value = remainingText.length() > 30 ?
remainingText.substring(0, 30).trim() :
remainingText.trim();
value = remainingText.length() > 30 ? remainingText.substring(0, 30).trim()
: remainingText.trim();
}
// 处理日期时间只保留年月日部分
@ -675,143 +696,165 @@ public class FolderMonitor {
return processImageNormal(imageFullPath);
}
// 新版CG识别逻辑按区域裁剪后分别OCR提取字段
/*
private Map<String, String> processImageWithCG(String imageFullPath) {
Map<String, String> extractedData = new HashMap<>();
try {
BufferedImage image = ImageIO.read(new File(imageFullPath));
int width = image.getWidth();
int height = image.getHeight();
// 1. 标题区和患者信息区分开裁剪
int titleHeight = (int) (height * 0.01); // 标题区5%
int infoHeight = (int) (height * 0.10); // 患者信息区10%
// 跳过标题区只识别患者信息区
BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight);
String infoOcr = tesseract.doOCR(infoArea);
String[] infoLines = infoOcr.split("\\r?\\n");
String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)";
String agePattern = "(\\d+)\\s*[岁%]";
String idPattern = "[I1l][D][:]?\\s*([A-Za-z0-9]+)";
for (String line : infoLines) {
line = line.replaceAll("\\s+", " ").trim();
if (extractedData.get("name") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(namePattern).matcher(line);
if (m.find()) {
extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格
extractedData.put("gender", m.group(2));
java.util.regex.Matcher ageM = java.util.regex.Pattern.compile(agePattern).matcher(line);
if (ageM.find()) {
extractedData.put("age", ageM.group(1));
}
}
}
if (extractedData.get("id") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(idPattern).matcher(line);
if (m.find()) {
extractedData.put("id", m.group(1));
}
}
}
// 2. 左侧参数区提取HRPPRQRSQT/QTCP/QRS/TRV5/SV1
int paramWidth = (int) (width * 0.32); // 左侧32%
int paramStartY = titleHeight;
int paramHeight = (int) (height * 0.355); // 参数区高度约38%
BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight);
String paramOcr = tesseract.doOCR(paramArea);
String[] paramLines = paramOcr.split("\\r?\\n");
String hrPattern = "HR\\s*[:.·]?\\s*([\\dOo./]+[bB][pP][mM])";
String pPattern = "P\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String prPattern = "PR\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String qrsPattern = "QRS\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String qtPattern = "QT/QT[cC]?\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
String pqrstPattern = "P/QRS/T\\s*[:.·]?\\s*([\\dOo./]+)";
String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[:.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?";
for (String line : paramLines) {
line = line.replaceAll("\\s+", " ").trim();
if (extractedData.get("HR") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("HR", hrValue);
}
}
if (extractedData.get("P") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("P", pValue);
}
}
if (extractedData.get("PR") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("PR", prValue);
}
}
if (extractedData.get("QRS") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("QRS", qrsValue);
}
}
if (extractedData.get("QT/QTC") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", "");
extractedData.put("QT/QTC", qtValue);
}
}
if (extractedData.get("P/QRS/T") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", "");
extractedData.put("P/QRS/T", pqrstValue);
}
}
if (extractedData.get("RV5/SV1") == null) {
if (line.toLowerCase().contains("sv1")) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line);
if (m.find()) {
String rv5 = m.group(1).replaceAll("[Oo]", "0");
String sv1 = m.group(2).replaceAll("[Oo]", "0");
String rv5sv1Value = rv5 + "/" + sv1;
extractedData.put("RV5/SV1", rv5sv1Value);
}
}
}
}
// 3. 底部区域提取检查时间
int bottomHeight = (int) (height * 0.05); // 底部12%
int bottomStartY = height - bottomHeight;
BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
String bottomOcr = tesseract.doOCR(bottomArea);
String[] bottomLines = bottomOcr.split("\\r?\\n");
String checkTimePattern = "检查[:]?\\s*([\\d-]+ [\\d:]+)";
for (String line : bottomLines) {
String lineNoSpace = line.replaceAll("\\s+", "");
if (extractedData.get("collectionTime") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
if (m.find()) {
String dateTime = m.group(1);
if (!dateTime.contains(" ")) {
dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10);
}
extractedData.put("collectionTime", dateTime);
}
}
}
} catch (Exception e) {
logger.error("processImageWithCG 区域识别异常", e);
}
return extractedData;
}
*/
/*
* private Map<String, String> processImageWithCG(String imageFullPath) {
* Map<String, String> extractedData = new HashMap<>();
* try {
* BufferedImage image = ImageIO.read(new File(imageFullPath));
* int width = image.getWidth();
* int height = image.getHeight();
*
* // 1. 标题区和患者信息区分开裁剪
* int titleHeight = (int) (height * 0.01); // 标题区5%
* int infoHeight = (int) (height * 0.10); // 患者信息区10%
* // 跳过标题区只识别患者信息区
* BufferedImage infoArea = image.getSubimage(0, titleHeight, width,
* infoHeight);
* String infoOcr = tesseract.doOCR(infoArea);
* String[] infoLines = infoOcr.split("\\r?\\n");
* String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)";
* String agePattern = "(\\d+)\\s*[岁%]";
* String idPattern = "[I1l][D][:]?\\s*([A-Za-z0-9]+)";
* for (String line : infoLines) {
* line = line.replaceAll("\\s+", " ").trim();
* if (extractedData.get("name") == null) {
* java.util.regex.Matcher m =
* java.util.regex.Pattern.compile(namePattern).matcher(line);
* if (m.find()) {
* extractedData.put("name", m.group(1).replaceAll(" ", "")); // 去除姓名中的空格
* extractedData.put("gender", m.group(2));
* java.util.regex.Matcher ageM =
* java.util.regex.Pattern.compile(agePattern).matcher(line);
* if (ageM.find()) {
* extractedData.put("age", ageM.group(1));
* }
* }
* }
* if (extractedData.get("id") == null) {
* java.util.regex.Matcher m =
* java.util.regex.Pattern.compile(idPattern).matcher(line);
* if (m.find()) {
* extractedData.put("id", m.group(1));
* }
* }
* }
*
* // 2. 左侧参数区提取HRPPRQRSQT/QTCP/QRS/TRV5/SV1
* int paramWidth = (int) (width * 0.32); // 左侧32%
* int paramStartY = titleHeight;
* int paramHeight = (int) (height * 0.355); // 参数区高度约38%
* BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth,
* paramHeight);
* String paramOcr = tesseract.doOCR(paramArea);
* String[] paramLines = paramOcr.split("\\r?\\n");
* String hrPattern = "HR\\s*[:.·]?\\s*([\\dOo./]+[bB][pP][mM])";
* String pPattern = "P\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
* String prPattern = "PR\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
* String qrsPattern = "QRS\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
* String qtPattern = "QT/QT[cC]?\\s*[:.·]?\\s*([\\dOo./]+[mM][sS])";
* String pqrstPattern = "P/QRS/T\\s*[:.·]?\\s*([\\dOo./]+)";
* String rv5sv1Pattern =
* "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[:.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?";
* for (String line : paramLines) {
* line = line.replaceAll("\\s+", " ").trim();
* if (extractedData.get("HR") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern,
* java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
* if (m.find()) {
* String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]",
* "");
* extractedData.put("HR", hrValue);
* }
* }
* if (extractedData.get("P") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern,
* java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
* if (m.find()) {
* String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
* extractedData.put("P", pValue);
* }
* }
* if (extractedData.get("PR") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern,
* java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
* if (m.find()) {
* String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]",
* "");
* extractedData.put("PR", prValue);
* }
* }
* if (extractedData.get("QRS") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern,
* java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
* if (m.find()) {
* String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]",
* "");
* extractedData.put("QRS", qrsValue);
* }
* }
* if (extractedData.get("QT/QTC") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern,
* java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
* if (m.find()) {
* String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]",
* "");
* extractedData.put("QT/QTC", qtValue);
* }
* }
* if (extractedData.get("P/QRS/T") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern,
* java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
* if (m.find()) {
* String pqrstValue = m.group(1).replaceAll("[Oo]",
* "0").replaceAll("[^\\d/degDEG.]", "");
* extractedData.put("P/QRS/T", pqrstValue);
* }
* }
* if (extractedData.get("RV5/SV1") == null) {
* if (line.toLowerCase().contains("sv1")) {
* java.util.regex.Matcher m =
* java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").
* matcher(line);
* if (m.find()) {
* String rv5 = m.group(1).replaceAll("[Oo]", "0");
* String sv1 = m.group(2).replaceAll("[Oo]", "0");
* String rv5sv1Value = rv5 + "/" + sv1;
* extractedData.put("RV5/SV1", rv5sv1Value);
*
* }
* }
* }
* }
*
* // 3. 底部区域提取检查时间
* int bottomHeight = (int) (height * 0.05); // 底部12%
* int bottomStartY = height - bottomHeight;
* BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width,
* bottomHeight);
* String bottomOcr = tesseract.doOCR(bottomArea);
* String[] bottomLines = bottomOcr.split("\\r?\\n");
* String checkTimePattern = "检查[:]?\\s*([\\d-]+ [\\d:]+)";
* for (String line : bottomLines) {
* String lineNoSpace = line.replaceAll("\\s+", "");
* if (extractedData.get("collectionTime") == null) {
* java.util.regex.Matcher m = java.util.regex.Pattern.compile(
* "检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace)
* ;
* if (m.find()) {
* String dateTime = m.group(1);
* if (!dateTime.contains(" ")) {
* dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10);
* }
* extractedData.put("collectionTime", dateTime);
* }
* }
* }
* } catch (Exception e) {
* logger.error("processImageWithCG 区域识别异常", e);
* }
* return extractedData;
* }
*/
// 新版CG识别逻辑按区域裁剪后分别OCR提取字段
private Map<String, String> processImageWithCG(String imageFullPath) {
@ -823,7 +866,7 @@ public class FolderMonitor {
// 1. 标题区和患者信息区分开裁剪
int titleHeight = (int) (height * 0.01); // 标题区5%
int infoHeight = (int) (height * 0.10); // 患者信息区10%
int infoHeight = (int) (height * 0.10); // 患者信息区10%
// 跳过标题区只识别患者信息区
BufferedImage infoArea = image.getSubimage(0, titleHeight, width, infoHeight);
String infoOcr = tesseract.doOCR(infoArea);
@ -870,42 +913,48 @@ public class FolderMonitor {
for (String line : paramLines) {
line = line.replaceAll("\\s+", " ").trim();
if (extractedData.get("hr") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("hr", hrValue);
}
}
if (extractedData.get("p") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("p", pValue);
}
}
if (extractedData.get("pr") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("pr", prValue);
}
}
if (extractedData.get("qrs") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("qrs", qrsValue);
}
}
if (extractedData.get("qt/qtc") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qtValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", "");
extractedData.put("qt/qtc", qtValue);
}
}
if (extractedData.get("pAxle/qrsAxle/tAxle") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", "");
extractedData.put("pAxle/qrsAxle/tAxle", pqrstValue);
@ -913,7 +962,8 @@ public class FolderMonitor {
}
if (extractedData.get("rv5/sv1") == null) {
if (line.toLowerCase().contains("sv1")) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line);
if (m.find()) {
String rv5 = m.group(1).replaceAll("[Oo]", "0");
String sv1 = m.group(2).replaceAll("[Oo]", "0");
@ -944,7 +994,8 @@ public class FolderMonitor {
for (String line : bottomLines) {
String lineNoSpace = line.replaceAll("\\s+", "");
if (extractedData.get("collectionTime") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
java.util.regex.Matcher m = java.util.regex.Pattern
.compile("检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
if (m.find()) {
String dateTime = m.group(1);
if (!dateTime.contains(" ")) {
@ -992,7 +1043,7 @@ public class FolderMonitor {
} else {
// 如果正则匹配失败尝试直接提取
if (line.contains("姓名") || line.contains("姓名:") || line.contains("姓 名") ||
line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) {
line.contains("姊名") || line.contains("姊名:") || line.contains("姊 名")) {
// 统一处理各种可能的姓名标识
String processedLine = line.replaceAll("[姓姊]\\s*名\\s*[:]?", "姓名:");
String[] parts = processedLine.split("姓名:");
@ -1070,7 +1121,7 @@ public class FolderMonitor {
// 2. 参数区底部1/3适配新模板
int paramStartY = (int) (height * 0.81);
int paramHeight = height - paramStartY;
BufferedImage paramArea = image.getSubimage(0, paramStartY, width-900, paramHeight);
BufferedImage paramArea = image.getSubimage(0, paramStartY, width - 900, paramHeight);
String paramOcr = tesseract.doOCR(paramArea);
String[] paramLines = paramOcr.split("\\r?\\n");
String hrPattern = "(?:心率|心亨)[:]?\\s*([\\dOo./]+)";
@ -1136,7 +1187,8 @@ public class FolderMonitor {
// P轴 - 检查传统格式仅当P轴未识别时
if (pAxleValue == null) {
java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[:]?\\s*([\\dOo.+-]+)").matcher(line);
java.util.regex.Matcher m = java.util.regex.Pattern.compile("P轴[:]?\\s*([\\dOo.+-]+)")
.matcher(line);
if (m.find()) {
pAxleValue = m.group(1).replaceAll("[Oo]", "0");
isPAxleDetected = true; // 标记已识别为P轴
@ -1233,15 +1285,186 @@ public class FolderMonitor {
extractedData.put("pAxle/qrsAxle/tAxle", axleValue.toString());
}
} catch (Exception e) {
logger.error("processImageWithLQXSD 识别异常", e);
}
return extractedData;
}
// 新版察右前旗巴音塔拉中心卫生院识别逻辑按区域裁剪后分别OCR提取字段
private Map<String, String> processImageWithBYTLZX(String imageFullPath) {
Map<String, String> extractedData = new HashMap<>();
try {
BufferedImage image = ImageIO.read(new File(imageFullPath));
int width = image.getWidth();
int height = image.getHeight();
int infoHeight = (int) (height * 0.0427); // 顶部2.5%你可以根据实际图片微调
int infoWidth = (int) (width * 0.25); // 宽度25%只截取左侧1/4
BufferedImage infoArea = image.getSubimage(0, 0, infoWidth, infoHeight);
String infoOcr = tesseract.doOCR(infoArea);
String[] infoLines = infoOcr.split("\\r?\\n");
String namePattern = "^([\\u4e00-\\u9fa5 ]+)\\s+(女|男)";
String agePattern = "(\\d+)\\s*[岁%]";
String idPattern = "[I1l][D][:]?\\s*([A-Za-z0-9]+)";
for (String line : infoLines) {
line = line.replaceAll("\\s+", " ").trim();
// 提取ID
Pattern pattern = Pattern.compile("[I1l][D][:]?\\s*([A-Za-z0-9]*)\\s*(男|女)");
Matcher matcher = pattern.matcher(line);
if (matcher.find()) {
String idValue = matcher.group(1).trim();
String genderValue = matcher.group(2).trim();
if (idValue.isEmpty()) {
idValue = UUID.randomUUID().toString().replace("-", "");
}
extractedData.put("examId", idValue);
extractedData.put("gender", genderValue);
break; // 找到就不用再循环了
}
}
// 2. 左侧参数区提取HRPPRQRSQT/QTCP/QRS/TRV5/SV1
int paramWidth = (int) (width * 0.32); // 左侧32%
int paramStartY = 0;
int paramHeight = (int) (height * 0.21); // 参数区高度约38%
BufferedImage paramArea = image.getSubimage(0, paramStartY, paramWidth, paramHeight);
// 2. 临时保存区域图片调试用
// ImageIO.write(paramArea, "png", new File("test_info.png"));
String paramOcr = tesseract.doOCR(paramArea);
String[] paramLines = paramOcr.split("\\r?\\n");
String hrPattern = "HR\\s*[:.·]?\\s*([\\dOo.]+)\\s*[bB][pP][mM]";
String pPattern = "P\\s*[:.·]?\\s*([\\dOo.]+)\\s*[mM][sS]";
String prPattern = "PR\\s*[:.·]?\\s*([\\dOo.]+)\\s*[mM][sS]";
String qrsPattern = "QRS\\s*[:.·]?\\s*([\\dOo.]+)\\s*[mM][sS]";
String qtPattern = "(?:QT|T)\\s*[:.·]?\\s*(\\d+)\\s*[\"“]?[mM][sS]";
String qtcPattern = "[@Q]Tc\\s*[:.·]?\\s*(\\d+)\\s*[^\\d\\s]?[mM][sS]";
String pqrstPattern = "P/QRS/T\\s*[:.·]?\\s*([\\dOo./]+)";
String rv5sv1Pattern = "([_\\s]*[Rr][Vv5][Ss][\\s/]*[Ss][Vv1Ii])\\s*[:.·]?\\s*([\\dOo./]+)\\s*[mM][vVyY]?";
// 临时变量
String qtValue = null;
String qtcValue = null;
for (String line : paramLines) {
line = line.replaceAll("\\s+", " ").trim();
if (extractedData.get("hr") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(hrPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String hrValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("hr", hrValue);
}
}
if (extractedData.get("p") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(pPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("p", pValue);
}
}
if (extractedData.get("pr") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(prPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String prValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("pr", prValue);
}
}
if (extractedData.get("qrs") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(qrsPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String qrsValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d.]", "");
extractedData.put("qrs", qrsValue);
}
}
// 识别QT
java.util.regex.Matcher mQt = java.util.regex.Pattern
.compile(qtPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (mQt.find()) {
qtValue = mQt.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", "");
}
// 识别QTc
java.util.regex.Matcher mQtc = java.util.regex.Pattern
.compile(qtcPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (mQtc.find()) {
qtcValue = mQtc.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d./]", "");
}
// 只保留 qt/qtc
if (qtValue != null && qtcValue != null) {
extractedData.put("qt/qtc", qtValue + "/" + qtcValue);
} else if (qtValue != null) {
extractedData.put("qt/qtc", qtValue);
} else if (qtcValue != null) {
extractedData.put("qt/qtc", qtcValue);
}
if (extractedData.get("pAxle/qrsAxle/tAxle") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile(pqrstPattern, java.util.regex.Pattern.CASE_INSENSITIVE).matcher(line);
if (m.find()) {
String pqrstValue = m.group(1).replaceAll("[Oo]", "0").replaceAll("[^\\d/degDEG.]", "");
extractedData.put("pAxle/qrsAxle/tAxle", pqrstValue);
}
}
if (extractedData.get("rv5/sv1") == null) {
if (line.toLowerCase().contains("sv1")) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile("([\\dOo.]+)/([\\dOo.]+)\\s*[mM][vVyY]").matcher(line);
if (m.find()) {
String rv5 = m.group(1).replaceAll("[Oo]", "0");
String sv1 = m.group(2).replaceAll("[Oo]", "0");
String rv5sv1Value = rv5 + "/" + sv1;
extractedData.put("rv5/sv1", rv5sv1Value);
// 计算rv5Sv1的值rv5和sv1的和
try {
double rv5Value = Double.parseDouble(rv5);
double sv1Value = Double.parseDouble(sv1);
double sum = rv5Value + sv1Value;
extractedData.put("rv5Sv1", String.format("%.3f", sum));
} catch (NumberFormatException e) {
logger.error("计算rv5Sv1时发生数字格式错误: rv5={}, sv1={}", rv5, sv1, e);
}
}
}
}
}
// 3. 底部区域提取检查时间
int bottomHeight = (int) (height * 0.05); // 底部12%
int bottomStartY = height - bottomHeight;
BufferedImage bottomArea = image.getSubimage(0, bottomStartY, width, bottomHeight);
String bottomOcr = tesseract.doOCR(bottomArea);
String[] bottomLines = bottomOcr.split("\\r?\\n");
String checkTimePattern = "检查[:]?\\s*([\\d-]+ [\\d:]+)";
for (String line : bottomLines) {
String lineNoSpace = line.replaceAll("\\s+", "");
if (extractedData.get("collectionTime") == null) {
java.util.regex.Matcher m = java.util.regex.Pattern
.compile("检查[:]?(\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2})").matcher(lineNoSpace);
if (m.find()) {
String dateTime = m.group(1);
if (!dateTime.contains(" ")) {
dateTime = dateTime.substring(0, 10) + " " + dateTime.substring(10);
}
extractedData.put("collectionTime", dateTime);
}
}
}
} catch (Exception e) {
logger.error("processImageWithCG 区域识别异常", e);
}
return extractedData;
}
/**
* 处理图片将图片逆时针旋转90度
*
* @param imageFullPath 图片完整路径
* @return 识别结果
*/
@ -1277,7 +1500,8 @@ public class FolderMonitor {
/**
* 处理OCR识别结果
* @param ocrResult OCR结果文本
*
* @param ocrResult OCR结果文本
* @param keyMapping 关键词映射
* @return 提取的数据
*/
@ -1296,7 +1520,7 @@ public class FolderMonitor {
Map<String, String> timeMapping = new HashMap<>();
for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
if (entry.getValue().contains("time") || entry.getValue().contains("Time") ||
entry.getValue().contains("日期") || entry.getValue().contains("collectionTime")) {
entry.getValue().contains("日期") || entry.getValue().contains("collectionTime")) {
timeMapping.put(entry.getKey(), entry.getValue());
}
}
@ -1315,7 +1539,7 @@ public class FolderMonitor {
Map<String, String> idMapping = new HashMap<>();
for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
if (entry.getValue().contains("id") || entry.getValue().contains("Id") ||
entry.getValue().equals("ID") || entry.getValue().equals("examId")) {
entry.getValue().equals("ID") || entry.getValue().equals("examId")) {
idMapping.put(entry.getKey(), entry.getValue());
}
}
@ -1349,17 +1573,19 @@ public class FolderMonitor {
if (matcher.find()) {
String name = matcher.group(1).replaceAll("\\s+", "");
// 去除名字中的引号字符
name = name.replaceAll("\"", "") // 双引号
.replaceAll("\u201C", "") // 左双引号
.replaceAll("\u201D", ""); // 右双引号
name = name.replaceAll("\"", "") // 双引号
.replaceAll("\u201C", "") // 左双引号
.replaceAll("\u201D", ""); // 右双引号
extractedData.put(nameField, name);
logger.info("从第三行提取姓名: {}", name);
} else {
// 尝试使用另一种提取方法
int nameStart = thirdLine.indexOf("姓名");
if (nameStart < 0) nameStart = thirdLine.indexOf("姓 名");
if (nameStart < 0)
nameStart = thirdLine.indexOf("姓 名");
int ageStart = thirdLine.indexOf("年龄");
if (ageStart < 0) ageStart = thirdLine.indexOf("年 龄");
if (ageStart < 0)
ageStart = thirdLine.indexOf("年 龄");
if (nameStart >= 0 && ageStart > nameStart) {
String nameSection = thirdLine.substring(nameStart + 2, ageStart).trim();
@ -1383,7 +1609,7 @@ public class FolderMonitor {
Map<String, String> genderAgeMapping = new HashMap<>();
for (Map.Entry<String, String> entry : keyMapping.entrySet()) {
if (entry.getValue().contains("gender") || entry.getValue().contains("性别") ||
entry.getValue().contains("age") || entry.getValue().contains("年龄")) {
entry.getValue().contains("age") || entry.getValue().contains("年龄")) {
genderAgeMapping.put(entry.getKey(), entry.getValue());
}
}
@ -1394,7 +1620,8 @@ public class FolderMonitor {
// 如果姓名未提取成功尝试手动提取
if (!extractedData.keySet().stream().anyMatch(k -> k.contains("name") || k.contains("姓名"))) {
// 针对特定的行结构"姓 名 : " : 27"
if (thirdLine.contains("") && thirdLine.contains("") && thirdLine.contains("") && thirdLine.contains("")) {
if (thirdLine.contains("") && thirdLine.contains("") && thirdLine.contains("")
&& thirdLine.contains("")) {
// 获取"姓名""年龄"之间的内容
int nameStart = thirdLine.indexOf("");
int ageStart = thirdLine.indexOf("年龄");
@ -1419,9 +1646,10 @@ public class FolderMonitor {
// 处理剩余行 - 使用完整的关键字映射
for (int i = 3; i < lines.length; i++) {
String line = lines[i].trim().replaceAll("\\s+", " ");
if (line.isEmpty()) continue;
if (line.isEmpty())
continue;
logger.info("处理行 {}: {}", i+1, line);
logger.info("处理行 {}: {}", i + 1, line);
processLine(line, keyMapping, extractedData);
}
@ -1433,8 +1661,9 @@ public class FolderMonitor {
/**
* 处理单行文本提取关键字和对应的值
* @param line 要处理的文本行
* @param keyMapping 关键字映射
*
* @param line 要处理的文本行
* @param keyMapping 关键字映射
* @param extractedData 用于存储提取的数据的Map
*/
private void processLine(String line, Map<String, String> keyMapping, Map<String, String> extractedData) {
@ -1492,7 +1721,8 @@ public class FolderMonitor {
}
}
}
} else if (value.contains("collectionTime") || value.contains("exam_time") || value.contains("time") || value.contains("日期")) {
} else if (value.contains("collectionTime") || value.contains("exam_time") || value.contains("time")
|| value.contains("日期")) {
// 处理日期时间格式
extractedValue = extractedValue.replaceAll("\\s+", " ");
@ -1524,7 +1754,8 @@ public class FolderMonitor {
int day = Integer.parseInt(dayPart);
// 日期有效性检查
if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1 && day <= 31) {
if (year > 1900 && year <= currentYear && month >= 1 && month <= 12 && day >= 1
&& day <= 31) {
extractedValue = String.format("%04d-%02d-%02d", year, month, day);
logger.info("格式化后的日期: {}", extractedValue);
} else {
@ -1542,7 +1773,7 @@ public class FolderMonitor {
// 检查姓名是否包含"医院""卫生院"等机构名称如果包含则可能是误提取
if (extractedValue.contains("医院") || extractedValue.contains("卫生院") ||
extractedValue.contains("诊所") || extractedValue.contains("中心")) {
extractedValue.contains("诊所") || extractedValue.contains("中心")) {
// 这可能是医院名称而不是患者姓名不进行存储
logger.warn("疑似将机构名称误识别为姓名: {}", extractedValue);
continue; // 跳过这个字段不添加到结果中
@ -1564,6 +1795,7 @@ public class FolderMonitor {
/**
* 处理已提取的时间格式
*
* @param extractedData 已提取的数据
*/
private void processTimeFields(Map<String, String> extractedData) {
@ -1574,8 +1806,9 @@ public class FolderMonitor {
String key = entry.getKey();
String value = entry.getValue();
if (key.contains("time") || key.contains("Time") || key.contains("collection") || key.contains("collectionTime") ||
key.contains("时间") || key.contains("日期")) {
if (key.contains("time") || key.contains("Time") || key.contains("collection")
|| key.contains("collectionTime") ||
key.contains("时间") || key.contains("日期")) {
logger.info("处理时间字段: {} = {}", key, value);
// 处理日期时间格式
value = value.replaceAll("\\s+", " ");
@ -1595,7 +1828,7 @@ public class FolderMonitor {
int second = Integer.parseInt(matcher.group(6));
logger.info("原始日期时间值: 年={}, 月={}, 日={}, 时={}, 分={}, 秒={}",
year, month, day, hour, minute, second);
year, month, day, hour, minute, second);
// 获取当前年份
int currentYear = java.time.LocalDate.now().getYear();
@ -1653,7 +1886,7 @@ public class FolderMonitor {
extractedData.put(key, formattedDate);
} else {
logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}",
key, year, month, day);
key, year, month, day);
}
} catch (NumberFormatException e) {
logger.warn("日期时间解析错误: {}, 异常: {}", value, e.getMessage());
@ -1703,7 +1936,7 @@ public class FolderMonitor {
extractedData.put(key, formattedDate);
} else {
logger.warn("字段 {} 的日期在有效性检查后仍然无效: 年={}, 月={}, 日={}",
key, year, month, day);
key, year, month, day);
}
} catch (NumberFormatException e) {
logger.warn("日期解析错误: {}, 异常: {}", value, e.getMessage());