149 lines
4.8 KiB
Python
Executable File
149 lines
4.8 KiB
Python
Executable File
import cv2
|
||
import time
|
||
import numpy as np
|
||
import onnxruntime
|
||
|
||
# sigmoid函数
|
||
def sigmoid(x):
|
||
return 1. / (1 + np.exp(-x))
|
||
|
||
# tanh函数
|
||
def tanh(x):
|
||
return 2. / (1 + np.exp(-2 * x)) - 1
|
||
|
||
# 数据预处理
|
||
def preprocess(src_img, size):
|
||
output = cv2.resize(src_img,(size[0], size[1]),interpolation=cv2.INTER_AREA)
|
||
output = output.transpose(2,0,1)
|
||
output = output.reshape((1, 3, size[1], size[0])) / 255
|
||
|
||
return output.astype('float32')
|
||
|
||
# nms算法
|
||
def nms(dets, thresh=0.35):
|
||
# dets:N*M,N是bbox的个数,M的前4位是对应的(x1,y1,x2,y2),第5位是对应的分数
|
||
# #thresh:0.3,0.5....
|
||
x1 = dets[:, 0]
|
||
y1 = dets[:, 1]
|
||
x2 = dets[:, 2]
|
||
y2 = dets[:, 3]
|
||
scores = dets[:, 4]
|
||
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # 求每个bbox的面积
|
||
order = scores.argsort()[::-1] # 对分数进行倒排序
|
||
keep = [] # 用来保存最后留下来的bboxx下标
|
||
|
||
while order.size > 0:
|
||
i = order[0] # 无条件保留每次迭代中置信度最高的bbox
|
||
keep.append(i)
|
||
|
||
# 计算置信度最高的bbox和其他剩下bbox之间的交叉区域
|
||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||
|
||
# 计算置信度高的bbox和其他剩下bbox之间交叉区域的面积
|
||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||
inter = w * h
|
||
|
||
# 求交叉区域的面积占两者(置信度高的bbox和其他bbox)面积和的必烈
|
||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||
|
||
# 保留ovr小于thresh的bbox,进入下一次迭代。
|
||
inds = np.where(ovr <= thresh)[0]
|
||
|
||
# 因为ovr中的索引不包括order[0]所以要向后移动一位
|
||
order = order[inds + 1]
|
||
|
||
output = []
|
||
for i in keep:
|
||
output.append(dets[i].tolist())
|
||
|
||
return output
|
||
|
||
# 人脸检测
|
||
def detection(session, img, input_width, input_height, thresh):
|
||
pred = []
|
||
|
||
# 输入图像的原始宽高
|
||
H, W, _ = img.shape
|
||
|
||
# 数据预处理: resize, 1/255
|
||
data = preprocess(img, [input_width, input_height])
|
||
|
||
# 模型推理
|
||
input_name = session.get_inputs()[0].name
|
||
feature_map = session.run([], {input_name: data})[0][0]
|
||
|
||
# 输出特征图转置: CHW, HWC
|
||
feature_map = feature_map.transpose(1, 2, 0)
|
||
# 输出特征图的宽高
|
||
feature_map_height = feature_map.shape[0]
|
||
feature_map_width = feature_map.shape[1]
|
||
|
||
# 特征图后处理
|
||
for h in range(feature_map_height):
|
||
for w in range(feature_map_width):
|
||
data = feature_map[h][w]
|
||
|
||
# 解析检测框置信度
|
||
obj_score, cls_score = data[0], data[5:].max()
|
||
score = obj_score * cls_score
|
||
|
||
# 阈值筛选
|
||
if score > thresh:
|
||
# 检测框类别
|
||
cls_index = np.argmax(data[5:])
|
||
# 检测框中心点偏移
|
||
x_offset, y_offset = tanh(data[1]), tanh(data[2])
|
||
# 检测框归一化后的宽高
|
||
box_width, box_height = sigmoid(data[3]), sigmoid(data[4])
|
||
# 检测框归一化后中心点
|
||
box_cx = (w + x_offset) / feature_map_width
|
||
box_cy = (h + y_offset) / feature_map_height
|
||
|
||
# cx,cy,w,h => x1, y1, x2, y2
|
||
x1, y1 = box_cx - 0.5 * box_width, box_cy - 0.5 * box_height
|
||
x2, y2 = box_cx + 0.5 * box_width, box_cy + 0.5 * box_height
|
||
x1, y1, x2, y2 = int(x1 * W), int(y1 * H), int(x2 * W), int(y2 * H)
|
||
|
||
pred.append([x1, y1, x2, y2, score, cls_index])
|
||
|
||
return nms(np.array(pred))
|
||
|
||
if __name__ == '__main__':
|
||
# 读取图片
|
||
img = cv2.imread("1.jpg")
|
||
# 模型输入的宽高
|
||
input_width, input_height = 352, 352
|
||
# 加载模型
|
||
session = onnxruntime.InferenceSession('FastestDet.onnx')
|
||
# 目标检测
|
||
start = time.perf_counter()
|
||
bboxes = detection(session, img, input_width, input_height, 0.8)
|
||
end = time.perf_counter()
|
||
time = (end - start) * 1000.
|
||
print("forward time:%fms"%time)
|
||
|
||
# 加载label names
|
||
names = []
|
||
with open("coco.names", 'r') as f:
|
||
for line in f.readlines():
|
||
names.append(line.strip())
|
||
|
||
print("=================box info===================")
|
||
for b in bboxes:
|
||
print(b)
|
||
obj_score, cls_index = b[4], int(b[5])
|
||
x1, y1, x2, y2 = int(b[0]), int(b[1]), int(b[2]), int(b[3])
|
||
|
||
#绘制检测框
|
||
cv2.rectangle(img, (x1,y1), (x2, y2), (255, 255, 0), 2)
|
||
cv2.putText(img, '%.2f' % obj_score, (x1, y1 - 5), 0, 0.7, (0, 255, 0), 2)
|
||
cv2.putText(img, names[cls_index], (x1, y1 - 25), 0, 0.7, (0, 255, 0), 2)
|
||
|
||
cv2.imwrite("result.jpg", img)
|
||
|
||
|