|
1 |
| -import cv2 |
2 | 1 | import numpy as np
|
| 2 | +import cv2 |
3 | 3 | import argparse
|
4 |
| -import time |
5 |
| -from NanodetPlus import NanoDet |
| 4 | + |
| 5 | +from nanodet import NanoDet |
| 6 | + |
| 7 | +def str2bool(v): |
| 8 | + if v.lower() in ['on', 'yes', 'true', 'y', 't']: |
| 9 | + return True |
| 10 | + elif v.lower() in ['off', 'no', 'false', 'n', 'f']: |
| 11 | + return False |
| 12 | + else: |
| 13 | + raise NotImplementedError |
6 | 14 |
|
7 | 15 | backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
|
8 | 16 | targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
|
|
15 | 23 | help_msg_backends += "; {:d}: TIMVX"
|
16 | 24 | help_msg_targets += "; {:d}: NPU"
|
17 | 25 | except:
|
18 |
| - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/Sidd1609/5bb321c8733110ed613ec120c7c02e41 for more information.') |
19 |
| - |
20 |
| -classes = ( 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', |
21 |
| - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', |
22 |
| - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', |
23 |
| - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', |
24 |
| - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
25 |
| - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', |
26 |
| - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', |
27 |
| - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', |
28 |
| - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', |
29 |
| - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
30 |
| - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', |
31 |
| - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', |
32 |
| - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', |
33 |
| - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' |
34 |
| - ) |
35 |
| - |
36 |
| -def vis(preds, res_img): |
37 |
| - if preds is not None: |
38 |
| - image_shape = (416, 416) |
39 |
| - top, left, newh, neww = 0, 0, image_shape[0], image_shape[1] |
40 |
| - hw_scale = res_img.shape[0] / res_img.shape[1] |
| 26 | + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') |
| 27 | + |
| 28 | +classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', |
| 29 | + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', |
| 30 | + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', |
| 31 | + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', |
| 32 | + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
| 33 | + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', |
| 34 | + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', |
| 35 | + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', |
| 36 | + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', |
| 37 | + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
| 38 | + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', |
| 39 | + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', |
| 40 | + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', |
| 41 | + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') |
| 42 | + |
| 43 | +def letterbox(srcimg, target_size=(416, 416)): |
| 44 | + img = srcimg.copy() |
| 45 | + |
| 46 | + top, left, newh, neww = 0, 0, target_size[0], target_size[1] |
| 47 | + if img.shape[0] != img.shape[1]: |
| 48 | + hw_scale = img.shape[0] / img.shape[1] |
41 | 49 | if hw_scale > 1:
|
42 |
| - newh, neww = image_shape[0], int(image_shape[1] / hw_scale) |
43 |
| - left = int((image_shape[1] - neww) * 0.5) |
| 50 | + newh, neww = target_size[0], int(target_size[1] / hw_scale) |
| 51 | + img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA) |
| 52 | + left = int((target_size[1] - neww) * 0.5) |
| 53 | + img = cv2.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv2.BORDER_CONSTANT, value=0) # add border |
44 | 54 | else:
|
45 |
| - newh, neww = int(image_shape[0] * hw_scale), image_shape[1] |
46 |
| - top = int((image_shape[0] - newh) * 0.5) |
47 |
| - |
48 |
| - ratioh,ratiow = res_img.shape[0]/newh,res_img.shape[1]/neww |
49 |
| - |
50 |
| - det_bboxes = preds[0] |
51 |
| - det_conf = preds[1] |
52 |
| - det_classid = preds[2] |
53 |
| - |
54 |
| - for i in range(det_bboxes.shape[0]): |
55 |
| - xmin, ymin, xmax, ymax = max(int((det_bboxes[i,0] - left) * ratiow), 0), max(int((det_bboxes[i,1] - top) * ratioh), 0), min( |
56 |
| - int((det_bboxes[i,2] - left) * ratiow), res_img.shape[1]), min(int((det_bboxes[i,3] - top) * ratioh), res_img.shape[0]) |
57 |
| - cv2.rectangle(res_img, (xmin, ymin), (xmax, ymax), (0, 0, 0), thickness=2) |
58 |
| - #label = '%.2f' % det_conf[i] |
59 |
| - label='' |
60 |
| - label = '%s%s' % (classes[det_classid[i]], label) |
61 |
| - labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
62 |
| - top = max(top, labelSize[1]) |
63 |
| - # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) |
64 |
| - cv2.putText(res_img, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) |
65 |
| - |
| 55 | + newh, neww = int(target_size[0] * hw_scale), target_size[1] |
| 56 | + img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA) |
| 57 | + top = int((target_size[0] - newh) * 0.5) |
| 58 | + img = cv2.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0) |
66 | 59 | else:
|
67 |
| - print('No detections') |
| 60 | + img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA) |
| 61 | + |
| 62 | + letterbox_scale = [top, left, newh, neww] |
| 63 | + return img, letterbox_scale |
| 64 | + |
| 65 | +def unletterbox(bbox, original_image_shape, letterbox_scale): |
| 66 | + ret = bbox.copy() |
| 67 | + |
| 68 | + h, w = original_image_shape |
| 69 | + top, left, newh, neww = letterbox_scale |
| 70 | + |
| 71 | + if h == w: |
| 72 | + ratio = h / newh |
| 73 | + ret = ret * ratio |
| 74 | + return ret |
| 75 | + |
| 76 | + ratioh, ratiow = h / newh, w / neww |
| 77 | + ret[0] = max((ret[0] - left) * ratiow, 0) |
| 78 | + ret[1] = max((ret[1] - top) * ratioh, 0) |
| 79 | + ret[2] = min((ret[2] - left) * ratiow, w) |
| 80 | + ret[3] = min((ret[3] - top) * ratioh, h) |
| 81 | + |
| 82 | + return ret.astype(np.int32) |
| 83 | + |
| 84 | +def vis(preds, res_img, letterbox_scale, fps=None): |
| 85 | + ret = res_img.copy() |
68 | 86 |
|
69 |
| - return res_img |
| 87 | + # draw FPS |
| 88 | + if fps is not None: |
| 89 | + fps_label = "FPS: %.2f" % fps |
| 90 | + cv2.putText(ret, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) |
| 91 | + |
| 92 | + # draw bboxes and labels |
| 93 | + for pred in preds: |
| 94 | + bbox = pred[:4] |
| 95 | + conf = pred[-2] |
| 96 | + classid = pred[-1].astype(np.int32) |
| 97 | + |
| 98 | + # bbox |
| 99 | + xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale) |
| 100 | + cv2.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2) |
| 101 | + |
| 102 | + # label |
| 103 | + label = "{:s}: {:.2f}".format(classes[classid], conf) |
| 104 | + cv2.putText(ret, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) |
| 105 | + |
| 106 | + return ret |
70 | 107 |
|
71 | 108 | if __name__=='__main__':
|
72 | 109 | parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
|
73 |
| - parser.add_argument('--model', type=str, default='object_detection_nanodet-plus-m-1.5x-416.onnx', help="Path to the model") |
74 |
| - parser.add_argument('--input_type', type=str, default='image', help="Input types: image or video") |
75 |
| - parser.add_argument('--image_path', type=str, default='test2.jpg', help="Image path") |
| 110 | + parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') |
| 111 | + parser.add_argument('--model', '-m', type=str, default='object_detection_nanodet_2022nov.onnx', help="Path to the model") |
| 112 | + parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) |
| 113 | + parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) |
76 | 114 | parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence')
|
77 | 115 | parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold')
|
78 |
| - parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') |
| 116 | + parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') |
| 117 | + parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') |
79 | 118 | args = parser.parse_args()
|
80 |
| - model_net = NanoDet(modelPath= args.model ,prob_threshold=args.confidence, iou_threshold=args.nms) |
81 | 119 |
|
82 |
| - if (args.input_type=="image"): |
83 |
| - image = cv2.imread(args.image_path) |
84 |
| - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
| 120 | + model = NanoDet(modelPath= args.model, |
| 121 | + prob_threshold=args.confidence, |
| 122 | + iou_threshold=args.nms, |
| 123 | + backend_id=args.backend, |
| 124 | + target_id=args.target) |
| 125 | + |
| 126 | + tm = cv2.TickMeter() |
| 127 | + tm.reset() |
| 128 | + if args.input is not None: |
| 129 | + image = cv2.imread(args.input) |
| 130 | + input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
85 | 131 |
|
86 |
| - a = time.time() |
87 |
| - preds = model_net.infer(image) |
88 |
| - b = time.time() |
89 |
| - print('Inference_Time:'+str(b-a)+' secs') |
| 132 | + # Letterbox transformation |
| 133 | + input_blob, letterbox_scale = letterbox(input_blob) |
90 | 134 |
|
91 |
| - srcimg = vis(preds, image) |
| 135 | + # Inference |
| 136 | + tm.start() |
| 137 | + preds = model.infer(input_blob) |
| 138 | + tm.stop() |
| 139 | + print("Inference time: {:.2f} ms".format(tm.getTimeMilli())) |
92 | 140 |
|
93 |
| - srcimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB) |
94 |
| - cv2.namedWindow(args.image_path, cv2.WINDOW_AUTOSIZE) |
95 |
| - cv2.imshow(args.image_path, srcimg) |
96 |
| - cv2.waitKey(0) |
| 141 | + img = vis(preds, image, letterbox_scale) |
97 | 142 |
|
98 | 143 | if args.save:
|
99 | 144 | print('Resutls saved to result.jpg\n')
|
100 |
| - cv2.imwrite('result.jpg', srcimg) |
| 145 | + cv2.imwrite('result.jpg', img) |
101 | 146 |
|
102 |
| - else: |
103 |
| - print("Press 1 to stop video capture") |
104 |
| - cap = cv2.VideoCapture(0) |
105 |
| - tm = cv2.TickMeter() |
106 |
| - frame_width = int(cap.get(3)) |
107 |
| - frame_height = int(cap.get(4)) |
108 |
| - size = (frame_width, frame_height) |
109 |
| - total_frames = 0 |
| 147 | + if args.vis: |
| 148 | + cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE) |
| 149 | + cv2.imshow(args.input, img) |
| 150 | + cv2.waitKey(0) |
110 | 151 |
|
111 |
| - if(args.save): |
112 |
| - result = cv2.VideoWriter('Webcam_result.avi', cv2.VideoWriter_fourcc(*'MJPG'),10, size) |
| 152 | + else: |
| 153 | + print("Press any key to stop video capture") |
| 154 | + deviceId = 0 |
| 155 | + cap = cv2.VideoCapture(deviceId) |
113 | 156 |
|
114 | 157 | while cv2.waitKey(1) < 0:
|
115 | 158 | hasFrame, frame = cap.read()
|
116 | 159 | if not hasFrame:
|
117 | 160 | print('No frames grabbed!')
|
118 | 161 | break
|
119 | 162 |
|
120 |
| - frame = cv2.flip(frame, 1) |
121 |
| - #frame = cv2.resize(frame, [args.width, args.height]) |
| 163 | + input_blob, letterbox_scale = letterbox(frame) |
122 | 164 | # Inference
|
123 | 165 | tm.start()
|
124 |
| - preds = model_net.infer(frame) |
| 166 | + preds = model.infer(input_blob) |
125 | 167 | tm.stop()
|
126 | 168 |
|
127 |
| - srcimg = vis(preds, frame) |
128 |
| - |
129 |
| - total_frames += 1 |
130 |
| - fps=tm.getFPS() |
131 |
| - |
132 |
| - if fps > 0: |
133 |
| - fps_label = "FPS: %.2f" % fps |
134 |
| - cv2.putText(srcimg, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) |
135 |
| - |
136 |
| - cv2.imshow("output", srcimg) |
137 |
| - |
138 |
| - if cv2.waitKey(1) < 0: |
139 |
| - print("Stream terminated") |
140 |
| - break |
| 169 | + img = vis(preds, frame, letterbox_scale, fps=tm.getFPS()) |
141 | 170 |
|
142 |
| - if(args.save): |
143 |
| - result.write(frame) |
| 171 | + cv2.imshow("NanoDet Demo", img) |
144 | 172 |
|
145 |
| - print("Total frames: " + str(total_frames)) |
| 173 | + tm.reset() |
0 commit comments