From 0c43c5946abb1d3a0c51bff3523891b2ff5eb31d Mon Sep 17 00:00:00 2001
From: Yosua Michael Maranatha <yosuamichael@fb.com>
Date: Thu, 19 Jan 2023 18:09:30 +0000
Subject: [PATCH 1/2] Use float64 to reduce differences between cpu and gpu
 result

---
 test/test_models.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/test_models.py b/test/test_models.py
index d4dab1bbc9d..c6d85c0160e 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -683,8 +683,9 @@ def test_classification_model(model_fn, dev):
     real_image = kwargs.pop("real_image", False)
 
     model = model_fn(**kwargs)
-    model.eval().to(device=dev)
-    x = _get_image(input_shape=input_shape, real_image=real_image, device=dev)
+    # We use float64 (.double()) to reduce differences between cpu and gpu result
+    model.eval().to(device=dev).double()
+    x = _get_image(input_shape=input_shape, real_image=real_image, device=dev).double()
     out = model(x)
     _assert_expected(out.cpu(), model_name, prec=1e-3)
     assert out.shape[-1] == num_classes
@@ -782,8 +783,9 @@ def test_detection_model(model_fn, dev):
     real_image = kwargs.pop("real_image", False)
 
     model = model_fn(**kwargs)
-    model.eval().to(device=dev)
-    x = _get_image(input_shape=input_shape, real_image=real_image, device=dev)
+    # We use float64 (.double()) to reduce differences between cpu and gpu result
+    model.eval().to(device=dev).double()
+    x = _get_image(input_shape=input_shape, real_image=real_image, device=dev).double()
     model_input = [x]
     with torch.no_grad(), freeze_rng_state():
         out = model(model_input)

From 55a25eba21c56799820a67d4fe97fb4d1522b8a9 Mon Sep 17 00:00:00 2001
From: Yosua Michael Maranatha <yosuamichael@fb.com>
Date: Thu, 26 Jan 2023 15:56:19 +0000
Subject: [PATCH 2/2] Use torch.no_grad to save memory

---
 test/test_models.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_models.py b/test/test_models.py
index c6d85c0160e..8ace17cdb99 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -686,14 +686,15 @@ def test_classification_model(model_fn, dev):
     # We use float64 (.double()) to reduce differences between cpu and gpu result
     model.eval().to(device=dev).double()
     x = _get_image(input_shape=input_shape, real_image=real_image, device=dev).double()
-    out = model(x)
+    with torch.no_grad(), freeze_rng_state():
+        out = model(x)
     _assert_expected(out.cpu(), model_name, prec=1e-3)
     assert out.shape[-1] == num_classes
     _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out)
     _check_fx_compatible(model, x, eager_out=out)
 
     if dev == "cuda":
-        with torch.cuda.amp.autocast():
+        with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state():
             out = model(x)
             # See autocast_flaky_numerics comment at top of file.
             if model_name not in autocast_flaky_numerics: