opencv · fengyuentau · Jan 11, 2023 · Nov 13, 2022
diff --git a/README.md b/README.md
@@ -15,11 +15,13 @@ Guidelines:
 
 ## Models & Benchmark Results
 
-| Model                                                | Task                          | Input Size | INTEL-CPU (ms) | RPI-CPU (ms) | JETSON-GPU (ms) | KV3-NPU (ms) | D1-CPU (ms) |
-| ---------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- |
+| Model                                                   | Task                          | Input Size | INTEL-CPU (ms) | RPI-CPU (ms) | JETSON-GPU (ms) | KV3-NPU (ms) | D1-CPU (ms) |
+| ------------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- |
 | [YuNet](./models/face_detection_yunet)                  | Face Detection                | 160x120    | 1.45           | 6.22         | 12.18           | 4.04         | 86.69       |
 | [SFace](./models/face_recognition_sface)                | Face Recognition              | 112x112    | 8.65           | 99.20        | 24.88           | 46.25        | ---         |
-| [LPD-YuNet](./models/license_plate_detection_yunet/)    | License Plate Detection       | 320x240    | ---            | 168.03       | 56.12           | 29.53        |             |
+| [LPD-YuNet](./models/license_plate_detection_yunet/)    | License Plate Detection       | 320x240    | ---            | 168.03       | 56.12           | 29.53        | ---         |
+| [YOLOX](./models/object_detection_yolox/)               | Object Detection              | 640x640    | 176.68         | 1496.70      | 388.95          | 420.98       | ---         |
+| [NanoDet](./models/object_detection_nanodet/)           | Object Detection              | 416x416    | 157.91         | 220.36       | 64.94           | 116.64       | ---         |
 | [DB-IC15](./models/text_detection_db)                   | Text Detection                | 640x480    | 142.91         | 2835.91      | 208.41          | ---          | ---         |
 | [DB-TD500](./models/text_detection_db)                  | Text Detection                | 640x480    | 142.91         | 2841.71      | 210.51          | ---          | ---         |
 | [CRNN-EN](./models/text_recognition_crnn)               | Text Recognition              | 100x32     | 50.21          | 234.32       | 196.15          | 125.30       | ---         |
@@ -68,6 +70,12 @@ Some examples are listed below. You can find more in the directory of each model
 
 ![license plate detection](./models/license_plate_detection_yunet/examples/lpd_yunet_demo.gif)
 
+### Object Detection with [NanoDet](./models/object_detection_nanodet/) & [YOLOX](./models/object_detection_yolox/)
+
+![nanodet demo](./models/object_detection_nanodet/samples/1_res.jpg)
+
+![yolox demo](./models/object_detection_yolox/samples/3_res.jpg)
+
 ### Object Tracking with [DaSiamRPN](./models/object_tracking_dasiamrpn/)
 
 ![webcam demo](./models/object_tracking_dasiamrpn/examples/dasiamrpn_demo.gif)

diff --git a/benchmark/config/object_detection_nanodet.yaml b/benchmark/config/object_detection_nanodet.yaml
@@ -0,0 +1,21 @@
+Benchmark:
+  name: "Object Detection Benchmark"
+  type: "Detection"
+  data:
+    path: "benchmark/data/object_detection"
+    files: ["1.png", "2.png", "3.png"]
+    sizes:
+      - [416, 416]
+  metric:
+    warmup: 30
+    repeat: 10
+    reduction: "median"
+  backend: "default"
+  target: "cpu"
+
+Model:
+  name: "NanoDet"
+  modelPath: "models/object_detection_nanodet/object_detection_nanodet_2022nov.onnx"
+  prob_threshold: 0.35
+  iou_threshold: 0.6
+
diff --git a/benchmark/config/object_detection_yolox.yaml b/benchmark/config/object_detection_yolox.yaml
@@ -0,0 +1,22 @@
+Benchmark:
+  name: "Object Detection Benchmark"
+  type: "Detection"
+  data:
+    path: "benchmark/data/object_detection"
+    files: ["1.png", "2.png", "3.png"]
+    sizes:
+      - [640, 640]
+  metric:
+    warmup: 30
+    repeat: 10
+    reduction: "median"
+  backend: "default"
+  target: "cpu"
+
+Model:
+  name: "YoloX"
+  modelPath: "models/object_detection_yolox/object_detection_yolox_2022nov.onnx"
+  confThreshold: 0.35
+  nmsThreshold: 0.5
+  objThreshold: 0.5
+
diff --git a/benchmark/download_data.py b/benchmark/download_data.py
@@ -205,6 +205,10 @@ def get_confirm_token(response):  # in case of large files
         url='https://drive.google.com/u/0/uc?id=1cf9MEyUqMMy8lLeDGd1any6tM_SsSmny&export=download',
         sha='997acb143ddc4531e6e41365fb7ad4722064564c',
         filename='license_plate_detection.zip'),
+    object_detection=Downloader(name='object_detection',
+        url='https://drive.google.com/u/0/uc?id=1LUUrQIWYYtiGoNAL_twZvdw5NkC39Swe&export=download',
+        sha='4161a5cd3b0be1f51484abacf19dc9a2231e9894',
+        filename='object_detection.zip'),
 )
 
 if __name__ == '__main__':

diff --git a/models/__init__.py b/models/__init__.py
@@ -12,6 +12,8 @@
 from .palm_detection_mediapipe.mp_palmdet import MPPalmDet
 from .handpose_estimation_mediapipe.mp_handpose import MPHandPose
 from .license_plate_detection_yunet.lpd_yunet import LPD_YuNet
+from .object_detection_nanodet.nanodet import NanoDet
+from .object_detection_yolox.yolox import YoloX
 
 class Registery:
     def __init__(self, name):
@@ -39,3 +41,6 @@ def register(self, item):
 MODELS.register(MPPalmDet)
 MODELS.register(MPHandPose)
 MODELS.register(LPD_YuNet)
+MODELS.register(NanoDet)
+MODELS.register(YoloX)
+
diff --git a/models/object_detection_nanodet/README.md b/models/object_detection_nanodet/README.md
@@ -24,7 +24,9 @@ Here are some of the sample results that were observed using the model,
 
 ![test1_res.jpg](./samples/1_res.jpg)
 ![test2_res.jpg](./samples/2_res.jpg)
-
+
+Check [benchmark/download_data.py](../../benchmark/download_data.py) for the original images.
+
 Video inference result,
 ![WebCamR.gif](./samples/WebCamR.gif)
 

diff --git a/models/object_detection_nanodet/nanodet.py b/models/object_detection_nanodet/nanodet.py
@@ -37,7 +37,7 @@ def __init__(self, modelPath, prob_threshold=0.35, iou_threshold=0.6, backend_id
     def name(self):
         return self.__class__.__name__
 
-    def setBackend(self, backenId):
+    def setBackend(self, backendId):
         self.backend_id = backendId
         self.net.setPreferableBackend(self.backend_id)
 

diff --git a/models/object_detection_yolox/README.md b/models/object_detection_yolox/README.md
@@ -33,10 +33,7 @@ Here are some of the sample results that were observed using the model (**yolox_
 ![2_res.jpg](./samples/2_res.jpg)
 ![3_res.jpg](./samples/3_res.jpg)
 
-<!--  
-Video inference result,
-![WebCamR.gif](./examples/results/WebCamR.gif)
--->
+Check [benchmark/download_data.py](../../benchmark/download_data.py) for the original images.
 
 ## Model metrics: