From 67a172656956190bf76f9e1c41872136b7188c0b Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@gmail.com>
Date: Thu, 18 Jul 2024 19:56:22 -0700
Subject: [PATCH] Revert "Parallel test with pytest-xdist (#518)"

This reverts commit cbaff6c128d97ff4d26ab60fa5b06c56cd23ba2a.
---
 .github/workflows/regression_test.yml | 3 +--
 dev-requirements.txt                  | 1 -
 pytest.ini                            | 3 ---
 test/dtypes/test_nf4.py               | 1 -
 test/integration/test_integration.py  | 9 +++------
 test/prototype/test_low_bit_optim.py  | 1 -
 6 files changed, 4 insertions(+), 14 deletions(-)
 delete mode 100644 pytest.ini

diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml
index 9fdebeecd7..191fb6fe6d 100644
--- a/.github/workflows/regression_test.yml
+++ b/.github/workflows/regression_test.yml
@@ -68,5 +68,4 @@ jobs:
         pip install ${{ matrix.torch-spec }}
         pip install -r dev-requirements.txt
         pip install .
-        pytest test --verbose -s -m "not multi_gpu" --dist load --tx popen//env:CUDA_VISIBLE_DEVICES=0 --tx popen//env:CUDA_VISIBLE_DEVICES=1 --tx popen//env:CUDA_VISIBLE_DEVICES=2 --tx popen//env:CUDA_VISIBLE_DEVICES=3
-        pytest test --verbose -s -m "multi_gpu"
+        pytest test --verbose -s
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 615fa6cede..a400b1c1ee 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,7 +7,6 @@ transformers
 hypothesis # Avoid test derandomization warning
 sentencepiece # for gpt-fast tokenizer
 expecttest
-pytest-xdist
 
 # For prototype features and benchmarks
 bitsandbytes #needed for testing triton quant / dequant ops for 8-bit optimizers
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index d00bfa4aef..0000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[pytest]
-markers =
-    multi_gpu: marks tests as require multi GPUs (deselect with '-m "not multi_gpu"')
diff --git a/test/dtypes/test_nf4.py b/test/dtypes/test_nf4.py
index 81d2d2d290..1cdf2708a0 100644
--- a/test/dtypes/test_nf4.py
+++ b/test/dtypes/test_nf4.py
@@ -486,7 +486,6 @@ class TestQLoRA(FSDPTest):
     def world_size(self) -> int:
         return 2
 
-    @pytest.mark.multi_gpu
     @pytest.mark.skipif(
         version.parse(torch.__version__).base_version < "2.4.0",
         reason="torch >= 2.4 required",
diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py
index 7d7d9ddd13..fd6fa89f15 100644
--- a/test/integration/test_integration.py
+++ b/test/integration/test_integration.py
@@ -985,10 +985,7 @@ def forward(self, x):
         # save quantized state_dict
         api(model)
 
-        # unique filename to avoid collision in parallel tests
-        ckpt_name = f"{api.__name__}_{test_device}_{test_dtype}_test.pth"
-
-        torch.save(model.state_dict(), ckpt_name)
+        torch.save(model.state_dict(), "test.pth")
         # get quantized reference
         model_qc = torch.compile(model, mode="max-autotune")
         ref_q = model_qc(x).detach()
@@ -1001,8 +998,8 @@ def forward(self, x):
         api(model)
 
         # load quantized state_dict
-        state_dict = torch.load(ckpt_name, mmap=True)
-        os.remove(ckpt_name)
+        state_dict = torch.load("test.pth", mmap=True)
+        os.remove("test.pth")
 
         model.load_state_dict(state_dict, assign=True)
         model = model.to(device=test_device, dtype=test_dtype).eval()
diff --git a/test/prototype/test_low_bit_optim.py b/test/prototype/test_low_bit_optim.py
index 80bea3197e..94cfe34096 100644
--- a/test/prototype/test_low_bit_optim.py
+++ b/test/prototype/test_low_bit_optim.py
@@ -163,7 +163,6 @@ class TestFSDP2(FSDPTest):
     def world_size(self) -> int:
         return 2
 
-    @pytest.mark.multi_gpu
     @pytest.mark.skipif(not TORCH_VERSION_AFTER_2_4, reason="torch >= 2.4 required")
     @skip_if_lt_x_gpu(2)
     def test_fsdp2(self):