OCR/config.yaml

209 lines
5.9 KiB
YAML
Raw Normal View History

2025-05-13 11:07:14 +08:00
# OCR配置文件
output:
all_results: "./ocr_results/all_results.json" # 所有识别结果
current_results: "./ocr_results/current_results.json" # 当前批次识别结果
processed_files: "./ocr_results/processed_files.txt" # 已处理文件列表
# 后端接口配置
2025-05-14 17:42:43 +08:00
upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
#upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
2025-05-13 11:07:14 +08:00
# 底部识别配置
bottom_recognition:
enable: true # 是否启用底部识别
height_percent: 20 # 底部区域高度百分比
width_percent: 100 # 底部区域宽度百分比
key_words: # 需要识别的关键字
- "检查日期"
- "检查时间"
- "时间"
- "日期"
- "报告日期"
- "报告时间"
- "记录时间"
- "诊断日期"
- "Date"
- "Time"
- "测试日期"
key_mapping: # 关键字映射到字段
"检查日期": "collectDate"
"检查时间": "collectTime"
"时间": "collectTime"
"日期": "collectDate"
"报告日期": "reportDate"
"报告时间": "reportTime"
"记录时间": "recordTime"
"诊断日期": "diagnosisDate"
"Date": "date"
"Time": "time"
"测试日期": "testDate"
# 图片目录配置
directories:
- path: "./test_images"
recognition_type: "normal"
key_mapping:
"编 号": "number"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
"HR": "HR"
"PR": "PR"
"QRS": "QRS"
"QT/QTC": "QT/QTC"
"P/QRS/T": "P/QRS/T"
"RV5/SV1": "RV5/SV1"
"RV5+SV1": "RV5+SV1"
bottom_key_words: # 特定目录的底部关键字配置
- "检查日期"
- "日期"
bottom_key_mapping: # 特定目录的底部关键字映射
"检查日期": "checkDate"
"日期": "date"
- path: "./ocr_images"
recognition_type: "normal"
key_mapping:
"ID": "id"
"申请科室": "department"
"病床号": "bed_number"
"HR": "HR"
"P": "P"
"QRS": "QRS"
"QT/QTc": "QT/QTc"
"P/QRS/T": "P/QRS/T"
"RV5/SV1": "RV5/SV1"
bottom_key_words:
- "检查日期"
- "时间"
bottom_key_mapping:
"检查日期": "examDate"
"时间": "examTime"
- path: "./礼泉县裴寨卫生院" # 分块识别目录 - 3个分块
recognition_type: "split"
recognition_area: # 识别区域配置
start_x: 0 # 起始X坐标百分比
start_y: 0 # 起始Y坐标百分比
width: 80 # 宽度(百分比)
height: 30 # 高度(百分比)
split_blocks: # 切割块配置
- width_percent: 25 # 第一块宽度百分比
key_mapping:
"编 号": "examId"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
- width_percent: 30 # 第二块宽度百分比
key_mapping:
"HR": "hr"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
- width_percent: 45 # 第三块宽度百分比
key_mapping:
"备注": "notes"
"医生": "doctor"
"日期": "date"
bottom_key_words: # 特定目录的底部关键字配置
- "检查时间"
bottom_key_mapping: # 特定目录的底部关键字映射
"检查时间": "collectionTime"
- path: "./special_images" # 分块识别目录 - 4个分块
recognition_type: "templateA"
recognition_area:
start_x: 0
start_y: 0
width: 100
height: 100
split_blocks:
- width_percent: 25
key_mapping:
"患者": "patient"
"ID": "id"
- width_percent: 25
key_mapping:
"检查项目": "exam_item"
"检查日期": "exam_date"
- width_percent: 25
key_mapping:
"结果1": "result1"
"结果2": "result2"
- width_percent: 25
key_mapping:
"医师": "doctor"
"结论": "conclusion"
2025-05-15 09:55:57 +08:00
- path: "./建陵卫生院" # 需要旋转90度的图片目录
recognition_type: "rotate90" # 新增旋转类型
recognition_area: # 添加旋转后的识别区域
start_x: 0
start_y: 0
width: 100 # 宽度百分比
height: 25 # 高度百分比
key_mapping:
"ID": "examId"
"姓 名": "name"
"年 龄": "age"
"性 别": "gender"
"时 间": "collectionTime"
- path: "./礼泉县城关卫生院"
recognition_type: "CG"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
2025-05-14 17:42:43 +08:00
- path: "./史德卫生院"
recognition_type: "LQXSD"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
2025-05-13 11:07:14 +08:00
# OCR程序与语言包路径配置
# 新增Tesseract相关配置
# bin_path 可选仅当你需要指定tesseract.exe路径时填写否则可省略
# data_path 必须指定tessdata目录的绝对路径
# language 必须,指定语言包
tesseract:
2025-05-14 17:42:43 +08:00
bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
2025-05-13 11:07:14 +08:00
data_path: "./tessdata"
# data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
language: "chi_sim+eng"