Adding docs and fixing keypoint and mask.

datumbox · datumbox · commit ae8b92e37e77 · 2020-12-23T00:27:10.000Z
diff --git a/test/test_onnx.py b/test/test_onnx.py
@@ -197,7 +197,7 @@ def _init_test_rpn(self):
         rpn_pre_nms_top_n = dict(training=2000, testing=1000)
         rpn_post_nms_top_n = dict(training=2000, testing=1000)
         rpn_nms_thresh = 0.7
-        rpn_score_thresh = 0.05
+        rpn_score_thresh = 0.0
 
         rpn = RegionProposalNetwork(
             rpn_anchor_generator, rpn_head,
@@ -368,7 +368,8 @@ def get_test_images(self):
     def test_faster_rcnn(self):
         images, test_images = self.get_test_images()
         dummy_image = [torch.ones(3, 100, 100) * 0.3]
-        model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300)
+        model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300,
+                                                                     rpn_score_thresh=0.0)
         model.eval()
         model(images)
         # Test exported model on images of different size, or dummy input
@@ -419,7 +420,8 @@ def test_paste_mask_in_image(self):
     def test_mask_rcnn(self):
         images, test_images = self.get_test_images()
         dummy_image = [torch.ones(3, 100, 100) * 0.3]
-        model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300)
+        model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300,
+                                                                 rpn_score_thresh=0.0)
         model.eval()
         model(images)
         # Test exported model on images of different size, or dummy input
@@ -469,7 +471,8 @@ def test_heatmaps_to_keypoints(self):
     def test_keypoint_rcnn(self):
         images, test_images = self.get_test_images()
         dummy_images = [torch.ones(3, 100, 100) * 0.3]
-        model = models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300)
+        model = models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300,
+                                                                         rpn_score_thresh=0.0)
         model.eval()
         model(images)
         self.run_model(model, [(images,), (test_images,), (dummy_images,)],
diff --git a/torchvision/models/detection/_utils.py b/torchvision/models/detection/_utils.py
@@ -173,10 +173,14 @@ def decode(self, rel_codes, boxes):
         box_sum = 0
         for val in boxes_per_image:
             box_sum += val
+        if box_sum > 0:
+            rel_codes = rel_codes.reshape(box_sum, -1)
         pred_boxes = self.decode_single(
-            rel_codes.reshape(box_sum, -1), concat_boxes
+            rel_codes, concat_boxes
         )
-        return pred_boxes.reshape(box_sum, -1, 4)
+        if box_sum > 0:
+            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)
+        return pred_boxes
 
     def decode_single(self, rel_codes, boxes):
         """
diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
@@ -79,6 +79,8 @@ class FasterRCNN(GeneralizedRCNN):
             for computing the loss
         rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
             of the RPN
+        rpn_score_thresh (float): during inference, only return proposals with a classification score
+            greater than rpn_score_thresh
         box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
             the locations indicated by the bounding boxes
         box_head (nn.Module): module that takes the cropped feature maps as input
diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py
@@ -74,6 +74,8 @@ class KeypointRCNN(FasterRCNN):
             for computing the loss
         rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
             of the RPN
+        rpn_score_thresh (float): during inference, only return proposals with a classification score
+            greater than rpn_score_thresh
         box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
             the locations indicated by the bounding boxes
         box_head (nn.Module): module that takes the cropped feature maps as input
@@ -158,6 +160,7 @@ def __init__(self, backbone, num_classes=None,
                  rpn_nms_thresh=0.7,
                  rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                  rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+                 rpn_score_thresh=0.05,
                  # Box parameters
                  box_roi_pool=None, box_head=None, box_predictor=None,
                  box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
@@ -204,6 +207,7 @@ def __init__(self, backbone, num_classes=None,
             rpn_nms_thresh,
             rpn_fg_iou_thresh, rpn_bg_iou_thresh,
             rpn_batch_size_per_image, rpn_positive_fraction,
+            rpn_score_thresh,
             # Box parameters
             box_roi_pool, box_head, box_predictor,
             box_score_thresh, box_nms_thresh, box_detections_per_img,
diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py
@@ -78,6 +78,8 @@ class MaskRCNN(FasterRCNN):
             for computing the loss
         rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
             of the RPN
+        rpn_score_thresh (float): during inference, only return proposals with a classification score
+            greater than rpn_score_thresh
         box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
             the locations indicated by the bounding boxes
         box_head (nn.Module): module that takes the cropped feature maps as input
@@ -161,6 +163,7 @@ def __init__(self, backbone, num_classes=None,
                  rpn_nms_thresh=0.7,
                  rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                  rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+                 rpn_score_thresh=0.05,
                  # Box parameters
                  box_roi_pool=None, box_head=None, box_predictor=None,
                  box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
@@ -207,6 +210,7 @@ def __init__(self, backbone, num_classes=None,
             rpn_nms_thresh,
             rpn_fg_iou_thresh, rpn_bg_iou_thresh,
             rpn_batch_size_per_image, rpn_positive_fraction,
+            rpn_score_thresh,
             # Box parameters
             box_roi_pool, box_head, box_predictor,
             box_score_thresh, box_nms_thresh, box_detections_per_img,