Skip to content

Commit 640f164

Browse files
committed
changes before merge
1 parent d4c27bd commit 640f164

11 files changed

+190
-188
lines changed

.DS_Store

-6 KB
Binary file not shown.

models/.DS_Store

-6 KB
Binary file not shown.

models/object_detection_nanodet/README.md

+38-59
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,35 @@
22

33
Nanodet: NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalized Focal Loss as classification and regression loss.In NanoDet-Plus, we propose a novel label assignment strategy with a simple assign guidance module (AGM) and a dynamic soft label assigner (DSLA) to solve the optimal label assignment problem in lightweight model training.
44

5-
#### Model metrics:
6-
Average Precision and Recall values observed for COCO dataset classes are showed below
5+
Note:
6+
- This version of nanodet: Nanodet-m-plus-1.5x_416
7+
8+
## Demo
9+
10+
Run the following command to try the demo:
11+
```shell
12+
# detect on camera input
13+
python demo.py
14+
# detect on an image
15+
python demo.py --input /path/to/image
16+
```
17+
Note:
18+
- image result saved as "result.jpg"
19+
20+
21+
## Results
22+
23+
Here are some of the sample results that were observed using the model,
24+
25+
![test1_res.jpg](./examples/results/test1_res.jpg)
26+
![test2_res.jpg](./examples/results/test2_res.jpg)
27+
28+
Video inference result,
29+
![WebCamR.gif](./examples/results/WebCamR.gif)
30+
31+
## Model metrics:
32+
33+
The model is evaluated on [COCO 2017 val](https://cocodataset.org/#download). Results are showed below:
734

835
<table>
936
<tr><th>Average Precision </th><th>Average Recall</th></tr>
@@ -30,63 +57,6 @@ Average Precision and Recall values observed for COCO dataset classes are showed
3057
| large | 0.50:0.95 | 0.702 |
3158
</td></tr> </table>
3259

33-
34-
## Demo
35-
36-
Run the following command to try the demo:
37-
```shell
38-
# Nanodet inference on image input
39-
python demo.py --model /path/to/model/ --input_type image --image_path /path/to/image/
40-
41-
# Nanodet inference on video input
42-
python demo.py --model /path/to/model/ --input_type video
43-
44-
#Saving outputs
45-
#Image output
46-
python demo.py --model /path/to/model/ --input_type image --image_path /path/to/image/ --save True
47-
48-
#Video output
49-
python demo.py --model /path/to/model/ --input_type video --save True
50-
```
51-
Note:
52-
- By default input_type: image
53-
- image result saved as "result.jpg"
54-
- webcam result saved as "Webcam_result.mp4"
55-
56-
57-
## Results
58-
59-
Here are some of the sample results that were observed using the model,
60-
61-
<p float="left">
62-
<img src="./examples/results/TestResult1.png" width="450" height="450">
63-
<img src="./examples/results/TestResult2.png" width="450" height="450">
64-
</p>
65-
66-
Video inference result,
67-
<p align="center">
68-
<img src="https://github.com/Sidd1609/opencv_zoo/blob/master/models/object_detection_nanodet/examples/results/WebCamR.gif" width="650" height="450">
69-
</p>
70-
71-
72-
## License
73-
74-
All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
75-
76-
77-
## Reference
78-
79-
- Nanodet: https://zhuanlan.zhihu.com/p/306530300
80-
- Nanodet Plus: https://zhuanlan.zhihu.com/p/449912627
81-
- Nanodet weight and scripts for training: https://github.com/RangiLyu/nanodet
82-
83-
84-
#### Note:
85-
86-
- This version of nanodet: Nanodet-m-plus-1.5x_416
87-
- The model was trained on COCO 2017 dataset, link to dataset: https://cocodataset.org/#download
88-
- Below, we have results of COCO data inference
89-
9060
| class | AP50 | mAP | class | AP50 | mAP |
9161
|:--------------|:-------|:------|:---------------|:-------|:------|
9262
| person | 67.5 | 41.8 | bicycle | 35.4 | 18.8 |
@@ -130,6 +100,9 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
130100
| scissors | 27.8 | 17.8 | teddy bear | 54.1 | 35.4 |
131101
| hair drier | 2.9 | 1.1 | toothbrush | 13.1 | 8.2 |
132102

103+
## License
104+
105+
All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
133106

134107
#### Contributor Details
135108

@@ -138,3 +111,9 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
138111
- Github Profile: https://github.com/Sidd1609
139112
- Organisation: OpenCV
140113
- Project: Lightweight object detection models using OpenCV
114+
115+
## Reference
116+
117+
- Nanodet: https://zhuanlan.zhihu.com/p/306530300
118+
- Nanodet Plus: https://zhuanlan.zhihu.com/p/449912627
119+
- Nanodet weight and scripts for training: https://github.com/RangiLyu/nanodet
+127-99
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
1-
import cv2
21
import numpy as np
2+
import cv2
33
import argparse
4-
import time
5-
from NanodetPlus import NanoDet
4+
5+
from nanodet import NanoDet
6+
7+
def str2bool(v):
8+
if v.lower() in ['on', 'yes', 'true', 'y', 't']:
9+
return True
10+
elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
11+
return False
12+
else:
13+
raise NotImplementedError
614

715
backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
816
targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
@@ -15,131 +23,151 @@
1523
help_msg_backends += "; {:d}: TIMVX"
1624
help_msg_targets += "; {:d}: NPU"
1725
except:
18-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/Sidd1609/5bb321c8733110ed613ec120c7c02e41 for more information.')
19-
20-
classes = ( 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
21-
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
22-
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
23-
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
24-
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
25-
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
26-
'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
27-
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
28-
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
29-
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
30-
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
31-
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
32-
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
33-
'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
34-
)
35-
36-
def vis(preds, res_img):
37-
if preds is not None:
38-
image_shape = (416, 416)
39-
top, left, newh, neww = 0, 0, image_shape[0], image_shape[1]
40-
hw_scale = res_img.shape[0] / res_img.shape[1]
26+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
27+
28+
classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
29+
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
30+
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
31+
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
32+
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
33+
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
34+
'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
35+
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
36+
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
37+
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
38+
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
39+
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
40+
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
41+
'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
42+
43+
def letterbox(srcimg, target_size=(416, 416)):
44+
img = srcimg.copy()
45+
46+
top, left, newh, neww = 0, 0, target_size[0], target_size[1]
47+
if img.shape[0] != img.shape[1]:
48+
hw_scale = img.shape[0] / img.shape[1]
4149
if hw_scale > 1:
42-
newh, neww = image_shape[0], int(image_shape[1] / hw_scale)
43-
left = int((image_shape[1] - neww) * 0.5)
50+
newh, neww = target_size[0], int(target_size[1] / hw_scale)
51+
img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
52+
left = int((target_size[1] - neww) * 0.5)
53+
img = cv2.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv2.BORDER_CONSTANT, value=0) # add border
4454
else:
45-
newh, neww = int(image_shape[0] * hw_scale), image_shape[1]
46-
top = int((image_shape[0] - newh) * 0.5)
47-
48-
ratioh,ratiow = res_img.shape[0]/newh,res_img.shape[1]/neww
49-
50-
det_bboxes = preds[0]
51-
det_conf = preds[1]
52-
det_classid = preds[2]
53-
54-
for i in range(det_bboxes.shape[0]):
55-
xmin, ymin, xmax, ymax = max(int((det_bboxes[i,0] - left) * ratiow), 0), max(int((det_bboxes[i,1] - top) * ratioh), 0), min(
56-
int((det_bboxes[i,2] - left) * ratiow), res_img.shape[1]), min(int((det_bboxes[i,3] - top) * ratioh), res_img.shape[0])
57-
cv2.rectangle(res_img, (xmin, ymin), (xmax, ymax), (0, 0, 0), thickness=2)
58-
#label = '%.2f' % det_conf[i]
59-
label=''
60-
label = '%s%s' % (classes[det_classid[i]], label)
61-
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
62-
top = max(top, labelSize[1])
63-
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
64-
cv2.putText(res_img, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
65-
55+
newh, neww = int(target_size[0] * hw_scale), target_size[1]
56+
img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
57+
top = int((target_size[0] - newh) * 0.5)
58+
img = cv2.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
6659
else:
67-
print('No detections')
60+
img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
61+
62+
letterbox_scale = [top, left, newh, neww]
63+
return img, letterbox_scale
64+
65+
def unletterbox(bbox, original_image_shape, letterbox_scale):
66+
ret = bbox.copy()
67+
68+
h, w = original_image_shape
69+
top, left, newh, neww = letterbox_scale
70+
71+
if h == w:
72+
ratio = h / newh
73+
ret = ret * ratio
74+
return ret
75+
76+
ratioh, ratiow = h / newh, w / neww
77+
ret[0] = max((ret[0] - left) * ratiow, 0)
78+
ret[1] = max((ret[1] - top) * ratioh, 0)
79+
ret[2] = min((ret[2] - left) * ratiow, w)
80+
ret[3] = min((ret[3] - top) * ratioh, h)
81+
82+
return ret.astype(np.int32)
83+
84+
def vis(preds, res_img, letterbox_scale, fps=None):
85+
ret = res_img.copy()
6886

69-
return res_img
87+
# draw FPS
88+
if fps is not None:
89+
fps_label = "FPS: %.2f" % fps
90+
cv2.putText(ret, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
91+
92+
# draw bboxes and labels
93+
for pred in preds:
94+
bbox = pred[:4]
95+
conf = pred[-2]
96+
classid = pred[-1].astype(np.int32)
97+
98+
# bbox
99+
xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
100+
cv2.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
101+
102+
# label
103+
label = "{:s}: {:.2f}".format(classes[classid], conf)
104+
cv2.putText(ret, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
105+
106+
return ret
70107

71108
if __name__=='__main__':
72109
parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
73-
parser.add_argument('--model', type=str, default='object_detection_nanodet-plus-m-1.5x-416.onnx', help="Path to the model")
74-
parser.add_argument('--input_type', type=str, default='image', help="Input types: image or video")
75-
parser.add_argument('--image_path', type=str, default='test2.jpg', help="Image path")
110+
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
111+
parser.add_argument('--model', '-m', type=str, default='object_detection_nanodet_2022nov.onnx', help="Path to the model")
112+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
113+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
76114
parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence')
77115
parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold')
78-
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
116+
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
117+
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
79118
args = parser.parse_args()
80-
model_net = NanoDet(modelPath= args.model ,prob_threshold=args.confidence, iou_threshold=args.nms)
81119

82-
if (args.input_type=="image"):
83-
image = cv2.imread(args.image_path)
84-
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
120+
model = NanoDet(modelPath= args.model,
121+
prob_threshold=args.confidence,
122+
iou_threshold=args.nms,
123+
backend_id=args.backend,
124+
target_id=args.target)
125+
126+
tm = cv2.TickMeter()
127+
tm.reset()
128+
if args.input is not None:
129+
image = cv2.imread(args.input)
130+
input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
85131

86-
a = time.time()
87-
preds = model_net.infer(image)
88-
b = time.time()
89-
print('Inference_Time:'+str(b-a)+' secs')
132+
# Letterbox transformation
133+
input_blob, letterbox_scale = letterbox(input_blob)
90134

91-
srcimg = vis(preds, image)
135+
# Inference
136+
tm.start()
137+
preds = model.infer(input_blob)
138+
tm.stop()
139+
print("Inference time: {:.2f} ms".format(tm.getTimeMilli()))
92140

93-
srcimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB)
94-
cv2.namedWindow(args.image_path, cv2.WINDOW_AUTOSIZE)
95-
cv2.imshow(args.image_path, srcimg)
96-
cv2.waitKey(0)
141+
img = vis(preds, image, letterbox_scale)
97142

98143
if args.save:
99144
print('Resutls saved to result.jpg\n')
100-
cv2.imwrite('result.jpg', srcimg)
145+
cv2.imwrite('result.jpg', img)
101146

102-
else:
103-
print("Press 1 to stop video capture")
104-
cap = cv2.VideoCapture(0)
105-
tm = cv2.TickMeter()
106-
frame_width = int(cap.get(3))
107-
frame_height = int(cap.get(4))
108-
size = (frame_width, frame_height)
109-
total_frames = 0
147+
if args.vis:
148+
cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE)
149+
cv2.imshow(args.input, img)
150+
cv2.waitKey(0)
110151

111-
if(args.save):
112-
result = cv2.VideoWriter('Webcam_result.avi', cv2.VideoWriter_fourcc(*'MJPG'),10, size)
152+
else:
153+
print("Press any key to stop video capture")
154+
deviceId = 0
155+
cap = cv2.VideoCapture(deviceId)
113156

114157
while cv2.waitKey(1) < 0:
115158
hasFrame, frame = cap.read()
116159
if not hasFrame:
117160
print('No frames grabbed!')
118161
break
119162

120-
frame = cv2.flip(frame, 1)
121-
#frame = cv2.resize(frame, [args.width, args.height])
163+
input_blob, letterbox_scale = letterbox(frame)
122164
# Inference
123165
tm.start()
124-
preds = model_net.infer(frame)
166+
preds = model.infer(input_blob)
125167
tm.stop()
126168

127-
srcimg = vis(preds, frame)
128-
129-
total_frames += 1
130-
fps=tm.getFPS()
131-
132-
if fps > 0:
133-
fps_label = "FPS: %.2f" % fps
134-
cv2.putText(srcimg, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
135-
136-
cv2.imshow("output", srcimg)
137-
138-
if cv2.waitKey(1) < 0:
139-
print("Stream terminated")
140-
break
169+
img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
141170

142-
if(args.save):
143-
result.write(frame)
171+
cv2.imshow("NanoDet Demo", img)
144172

145-
print("Total frames: " + str(total_frames))
173+
tm.reset()
Binary file not shown.
Binary file not shown.
Loading
Loading

0 commit comments

Comments
 (0)