OCR/config.yaml

234 lines
6.7 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# OCR配置文件
output:
all_results: "./ocr_results/all_results.json" # 所有识别结果
current_results: "./ocr_results/current_results.json" # 当前批次识别结果
processed_files: "./ocr_results/processed_files.txt" # 已处理文件列表
missing_key_files: "./ocr_results/missing_key_files.txt" # 缺少关键字的文件路径
missing_key_results: "./ocr_results/missing_key_results.json" # 缺少关键字的识别结果
# 需要检查的关键字配置
required_keys:
- "name" # 姓名
- "examId" # 检查编号
- "age" # 年龄
- "gender" # 性别
- "hr" # 心率
# 后端接口配置
# upload_url: https://pacs.gw12320.com/adminecg/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
upload_url: http://localhost:48080/admin-api/tblist/ecganalysisparas/parsePhotoCreateData
# 底部识别配置
bottom_recognition:
enable: true # 是否启用底部识别
height_percent: 20 # 底部区域高度百分比
width_percent: 100 # 底部区域宽度百分比
key_words: # 需要识别的关键字
- "检查日期"
- "检查时间"
- "时间"
- "日期"
- "报告日期"
- "报告时间"
- "记录时间"
- "诊断日期"
- "Date"
- "Time"
- "测试日期"
key_mapping: # 关键字映射到字段
"检查日期": "collectDate"
"检查时间": "collectTime"
"时间": "collectTime"
"日期": "collectDate"
"报告日期": "reportDate"
"报告时间": "reportTime"
"记录时间": "recordTime"
"诊断日期": "diagnosisDate"
"Date": "date"
"Time": "time"
"测试日期": "testDate"
# 图片目录配置
directories:
- path: "../ecgimage/北屯中心卫生院"
recognition_type: "split"
recognition_area:
start_x: 0
start_y: 0
width: 60
height: 20
split_blocks:
- width_percent: 40
key_mapping:
"编 号": "examId"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
- width_percent: 35
key_mapping:
"HR": "hr"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
- width_percent: 35
key_mapping:
"备注": "notes"
bottom_key_words:
- "检查日期"
bottom_key_mapping:
"检查日期": "examDate"
"时间": "examTime"
- path: "../ecgimage/礼泉县裴寨卫生院" # 分块识别目录 - 3个分块
recognition_type: "split"
recognition_area: # 识别区域配置
start_x: 0 # 起始X坐标百分比
start_y: 0 # 起始Y坐标百分比
width: 80 # 宽度(百分比)
height: 30 # 高度(百分比)
split_blocks: # 切割块配置
- width_percent: 25 # 第一块宽度百分比
key_mapping:
"编 号": "examId"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
- width_percent: 30 # 第二块宽度百分比
key_mapping:
"HR": "hr"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
- width_percent: 45 # 第三块宽度百分比
key_mapping:
"备注": "notes"
"医生": "doctor"
"日期": "date"
bottom_key_words: # 特定目录的底部关键字配置
- "检查时间"
bottom_key_mapping: # 特定目录的底部关键字映射
"检查时间": "collectionTime"
- path: "../ecgimage/药王洞卫生院" # 分块识别目录 - 4个分块
recognition_type: "split"
recognition_area:
start_x: 0
start_y: 0
width: 50
height: 20
split_blocks:
- width_percent: 37
key_mapping:
"编 号": "examId"
"姓 名": "name"
"性 别": "gender"
"年 龄": "age"
"科 室": "department"
"床 号": "bed_number"
- width_percent: 33
key_mapping:
"HR": "hr"
"PR": "pr"
"QRS": "qrs"
"QT/QTc": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
- width_percent: 35
key_mapping:
"结果1": "result1"
- path: "../ecgimage/礼泉县城关卫生院"
recognition_type: "CG"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
- path: "../ecgimage/建陵卫生院" # 需要旋转90度的图片目录
recognition_type: "rotate90" # 新增旋转类型
recognition_area: # 添加旋转后的识别区域
start_x: 0
start_y: 0
width: 100 # 宽度百分比
height: 25 # 高度百分比
key_mapping:
"ID": "examId"
"姓 名": "name"
"年 龄": "age"
"性 别": "gender"
"时 间": "collectionTime"
- path: "./images/史德卫生院"
recognition_type: "LQXSD"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
- path: "./察右前旗巴音塔拉中心卫生院"
recognition_type: "BYTLZX"
key_mapping:
"姓名": "name"
"性别": "gender"
"年龄": "age"
"ID": "examId"
"HR": "hr"
"P": "P"
"PR": "pr"
"QRS": "qrs"
"QT/QTC": "qt/qtc"
"P/QRS/T": "pAxle/qrsAxle/tAxle"
"RV5/SV1": "rv5/sv1"
"RV5+SV1": "rv5Sv1"
bottom_key_words:
- "检查时间"
bottom_key_mapping:
"检查时间": "collectionTime"
# OCR程序与语言包路径配置
# 新增Tesseract相关配置
# bin_path 可选仅当你需要指定tesseract.exe路径时填写否则可省略
# data_path 必须指定tessdata目录的绝对路径
# language 必须,指定语言包
tesseract:
bin_path: "D:/Program Files/Tesseract-OCR/tesseract.exe"
data_path: "./tessdata"
# data_path: "F:/陕西省咸阳市礼泉县心电图FTP/ecgimage/tessdata"
language: "chi_sim+eng"