Allow gradient backpropagation through GeneralizedRCNNTransform to inputs (#4327)

beat-buesser · fmassa · datumbox · web-flow · commit 2e0949e22be6 · 2021-09-21T13:20:33.000+01:00
* Allow gradient backpropagation through GeneralizedRCNNTransform to inputs

Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;

* Add unit tests for gradient backpropagation to inputs

Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;

* Update torchvision/models/detection/transform.py

Co-authored-by: Francisco Massa &lt;fvsmassa@gmail.com&gt;

* Update _check_input_backprop

Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;

* Account for tests requiring cuda

Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;

Co-authored-by: Francisco Massa &lt;fvsmassa@gmail.com&gt;
Co-authored-by: Vasilis Vryniotis &lt;datumbox@users.noreply.github.com&gt;
diff --git a/test/test_models.py b/test/test_models.py
@@ -148,6 +148,35 @@ def _check_fx_compatible(model, inputs):
     torch.testing.assert_close(out, out_fx)
 
 
+def _check_input_backprop(model, inputs):
+    if isinstance(inputs, list):
+        requires_grad = list()
+        for inp in inputs:
+            requires_grad.append(inp.requires_grad)
+            inp.requires_grad_(True)
+    else:
+        requires_grad = inputs.requires_grad
+        inputs.requires_grad_(True)
+
+    out = model(inputs)
+
+    if isinstance(out, dict):
+        out["out"].sum().backward()
+    else:
+        if isinstance(out[0], dict):
+            out[0]["scores"].sum().backward()
+        else:
+            out[0].sum().backward()
+
+    if isinstance(inputs, list):
+        for i, inp in enumerate(inputs):
+            assert inputs[i].grad is not None
+            inp.requires_grad_(requires_grad[i])
+    else:
+        assert inputs.grad is not None
+        inputs.requires_grad_(requires_grad)
+
+
 # If 'unwrapper' is provided it will be called with the script model outputs
 # before they are compared to the eager model outputs. This is useful if the
 # model outputs are different between TorchScript / Eager mode
@@ -263,6 +292,9 @@ def test_memory_efficient_densenet(model_name):
     assert num_params == num_grad
     torch.testing.assert_close(out1, out2, rtol=0.0, atol=1e-5)
 
+    _check_input_backprop(model1, x)
+    _check_input_backprop(model2, x)
+
 
 @pytest.mark.parametrize('dilate_layer_2', (True, False))
 @pytest.mark.parametrize('dilate_layer_3', (True, False))
@@ -312,6 +344,7 @@ def test_inception_v3_eval():
     model = model.eval()
     x = torch.rand(1, 3, 299, 299)
     _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
+    _check_input_backprop(model, x)
 
 
 def test_fasterrcnn_double():
@@ -327,6 +360,7 @@ def test_fasterrcnn_double():
     assert "boxes" in out[0]
     assert "scores" in out[0]
     assert "labels" in out[0]
+    _check_input_backprop(model, model_input)
 
 
 def test_googlenet_eval():
@@ -343,6 +377,7 @@ def test_googlenet_eval():
     model = model.eval()
     x = torch.rand(1, 3, 224, 224)
     _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
+    _check_input_backprop(model, x)
 
 
 @needs_cuda
@@ -369,13 +404,17 @@ def checkOut(out):
 
     checkOut(out)
 
+    _check_input_backprop(model, model_input)
+
     # now switch to cpu and make sure it works
     model.cpu()
     x = x.cpu()
     out_cpu = model([x])
 
     checkOut(out_cpu)
 
+    _check_input_backprop(model, [x])
+
 
 def test_generalizedrcnn_transform_repr():
 
@@ -426,6 +465,8 @@ def test_classification_model(model_name, dev):
                 _assert_expected(out.cpu(), model_name, prec=0.1)
             assert out.shape[-1] == 50
 
+    _check_input_backprop(model, x)
+
 
 @pytest.mark.parametrize('model_name', get_available_segmentation_models())
 @pytest.mark.parametrize('dev', cpu_and_gpu())
@@ -483,6 +524,8 @@ def check_out(out):
         warnings.warn(msg, RuntimeWarning)
         pytest.skip(msg)
 
+    _check_input_backprop(model, x)
+
 
 @pytest.mark.parametrize('model_name', get_available_detection_models())
 @pytest.mark.parametrize('dev', cpu_and_gpu())
@@ -574,6 +617,8 @@ def compute_mean_std(tensor):
         warnings.warn(msg, RuntimeWarning)
         pytest.skip(msg)
 
+    _check_input_backprop(model, model_input)
+
 
 @pytest.mark.parametrize('model_name', get_available_detection_models())
 def test_detection_model_validation(model_name):
@@ -625,6 +670,8 @@ def test_video_model(model_name, dev):
             out = model(x)
             assert out.shape[-1] == 50
 
+    _check_input_backprop(model, x)
+
 
 @pytest.mark.skipif(not ('fbgemm' in torch.backends.quantized.supported_engines and
                          'qnnpack' in torch.backends.quantized.supported_engines),
diff --git a/torchvision/models/detection/transform.py b/torchvision/models/detection/transform.py
@@ -214,8 +214,9 @@ def batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor
 
         batch_shape = [len(images)] + max_size
         batched_imgs = images[0].new_full(batch_shape, 0)
-        for img, pad_img in zip(images, batched_imgs):
-            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+        for i in range(batched_imgs.shape[0]):
+            img = images[i]
+            batched_imgs[i, : img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 
         return batched_imgs