huggingface · ydshieh · Apr 7, 2025 · Mar 31, 2025 · Apr 1, 2025 · Apr 1, 2025
diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
@@ -202,9 +202,11 @@
 
     IS_ROCM_SYSTEM = torch.version.hip is not None
     IS_CUDA_SYSTEM = torch.version.cuda is not None
+    IS_XPU_SYSTEM = torch.version.xpu is not None
 else:
     IS_ROCM_SYSTEM = False
     IS_CUDA_SYSTEM = False
+    IS_XPU_SYSTEM = False
 
 logger = transformers_logging.get_logger(__name__)
 
@@ -3094,6 +3096,13 @@ def get_device_properties() -> DeviceProperties:
             return ("rocm", major)
         else:
             return ("cuda", major)
+    elif IS_XPU_SYSTEM:
+        import torch
+
+        arch = torch.xpu.get_device_capability()["architecture"]
+        gen_mask = 0x000000FF00000000
+        gen = (arch & gen_mask) >> 32
+        return ("xpu", gen)
     else:
         return (torch_device, None)
 

diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
@@ -22,6 +22,7 @@
 from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.generation.configuration_utils import GenerationConfig
 from transformers.testing_utils import (
+    Expectations,
     cleanup,
     require_read_token,
     require_torch,
@@ -539,16 +540,6 @@ def _reinitialize_config(base_config, new_kwargs):
 
 @require_torch_accelerator
 class LlamaIntegrationTest(unittest.TestCase):
-    # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
-    # Depending on the hardware we get different logits / generations
-    cuda_compute_capability_major_version = None
-
-    @classmethod
-    def setUpClass(cls):
-        if is_torch_available() and torch.cuda.is_available():
-            # 8 is for A100 / A10 and 7 for T4
-            cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
-
     @slow
     @require_read_token
     def test_llama_3_1_hard(self):
@@ -593,30 +584,35 @@ def test_model_7b_logits_bf16(self):
         # Expected mean on dim = -1
 
         # fmt: off
-        EXPECTED_MEAN = {
-            7: torch.tensor([[-6.5061, -4.1147, -4.9669, -3.2038, 0.8069, -2.9694, 1.2864, -3.3786]]),
-            8: torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536,  0.8127, -2.9811,  1.2918, -3.3848]])
-        }
-
+        expected_means = Expectations(
+            {
+            ("xpu", 3): torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536,  0.8127, -2.9811,  1.2918, -3.3848]]),
+            ("cuda", 7): torch.tensor([[-6.5061, -4.1147, -4.9669, -3.2038, 0.8069, -2.9694, 1.2864, -3.3786]]),
+            ("cuda", 8): torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536,  0.8127, -2.9811,  1.2918, -3.3848]])
+         })
+
+        expected_mean = expected_means.get_expectation()
         self.assertTrue(
             torch.allclose(
-                EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device),
+                expected_mean.to(torch_device),
                 out.logits.float().mean(-1),
                 atol=1e-2,
                 rtol=1e-2
             )
         )
 
         # slicing logits[0, 0, 0:15]
-        EXPECTED_SLICE = {
-            7: torch.tensor([[-12.5000, -7.0625, -0.6289, -7.8750, -6.9688, -7.8125, -6.4688, -7.4375, -7.6875, -6.9375, -6.0312, -7.0000, -1.8594, 1.8438, -8.5000]]),
-            8: torch.tensor([[-12.5625,  -7.1250,  -0.6289,  -7.8750,  -6.9688,  -7.8125,  -6.5000, -7.4375,  -7.6562,  -6.9688,  -6.0312,  -7.0312,  -1.8203,   1.8750, -8.5000]])
-        }
+        expected_slices = Expectations(
+            {
+            ("xpu", 3): torch.tensor([[-12.5625,  -7.1250,  -0.6289,  -7.8750,  -6.9688,  -7.8125,  -6.5000, -7.4375,  -7.6562,  -6.9688,  -6.0312,  -7.0312,  -1.8203,   1.8750, -8.5000]]),
+            ("cuda", 7): torch.tensor([[-12.5000, -7.0625, -0.6289, -7.8750, -6.9688, -7.8125, -6.4688, -7.4375, -7.6875, -6.9375, -6.0312, -7.0000, -1.8594, 1.8438, -8.5000]]),
+            ("cuda", 8): torch.tensor([[-12.5625,  -7.1250,  -0.6289,  -7.8750,  -6.9688,  -7.8125,  -6.5000, -7.4375,  -7.6562,  -6.9688,  -6.0312,  -7.0312,  -1.8203,   1.8750, -8.5000]])
+        })
         # fmt: on
-
+        expected_slice = expected_slices.get_expectation()
         self.assertTrue(
             torch.allclose(
-                EXPECTED_SLICE[self.cuda_compute_capability_major_version].to(torch_device),
+                expected_slice.to(torch_device),
                 out.logits[0, 0, :15].float(),
                 atol=1e-2,
                 rtol=1e-2,
@@ -637,30 +633,36 @@ def test_model_7b_logits(self):
 
         # fmt: off
         # Expected mean on dim = -1
-        EXPECTED_MEAN = {
-            7: torch.tensor([[-6.6420, -4.1227, -4.9809, -3.2041, 0.8261, -3.0052, 1.2957, -3.3648]]),
-            8: torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456,  0.8261, -3.0124,  1.2971, -3.3641]])
-        }
-
+        expected_means = Expectations(
+          {
+            ("xpu", 3): torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456,  0.8261, -3.0124,  1.2971, -3.3641]]),
+            ("cuda", 7): torch.tensor([[-6.6420, -4.1227, -4.9809, -3.2041, 0.8261, -3.0052, 1.2957, -3.3648]]),
+            ("cuda", 8): torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456,  0.8261, -3.0124,  1.2971, -3.3641]]),
+        })
+
+        expected_mean = expected_means.get_expectation()
         self.assertTrue(
             torch.allclose(
-                EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device),
+                expected_mean.to(torch_device),
                 out.logits.float().mean(-1),
                 atol=1e-2,
                 rtol=1e-2
             )
         )
 
         # slicing logits[0, 0, 0:15]
-        EXPECTED_SLICE = {
-            7: torch.tensor([-12.8125, -7.3359, -0.4846, -8.0234, -7.2383, -7.9922, -6.4805, -7.7344, -7.8125, -7.0078, -6.1797, -7.1094, -1.8633, 1.9736, -8.6016]),
-            8: torch.tensor([-12.8281,  -7.4609,  -0.4668,  -8.0703,  -7.2539,  -8.0078,  -6.4961, -7.7734,  -7.8516,  -7.0352,  -6.2188,  -7.1367,  -1.8564,   1.9922, -8.6328])
-        }
+        expected_slices = Expectations(
+            {
+              ("xpu", 3): torch.tensor([-12.8281,  -7.4609,  -0.4668,  -8.0703,  -7.2539,  -8.0078,  -6.4961, -7.7734,  -7.8516,  -7.0352,  -6.2188,  -7.1367,  -1.8564,   1.9922, -8.6328]),
+              ("cuda", 7): torch.tensor([-12.8125, -7.3359, -0.4846, -8.0234, -7.2383, -7.9922, -6.4805, -7.7344, -7.8125, -7.0078, -6.1797, -7.1094, -1.8633, 1.9736, -8.6016]),
+              ("cuda", 8): torch.tensor([-12.8281,  -7.4609,  -0.4668,  -8.0703,  -7.2539,  -8.0078,  -6.4961, -7.7734,  -7.8516,  -7.0352,  -6.2188,  -7.1367,  -1.8564,   1.9922, -8.6328])
+        })
         # fmt: on
 
+        expected_slice = expected_slices.get_expectation()
         self.assertTrue(
             torch.allclose(
-                EXPECTED_SLICE[self.cuda_compute_capability_major_version].to(torch_device),
+                expected_slice.to(torch_device),
                 out.logits[0, 0, :15].float(),
                 atol=1e-2,
                 rtol=1e-2,

diff --git a/tests/utils/test_expectations.py b/tests/utils/test_expectations.py
@@ -13,14 +13,16 @@ def test_expectations(self):
                 ("rocm", 8): 4,
                 ("rocm", None): 5,
                 ("cpu", None): 6,
+                ("xpu", 3): 7,
             }
         )
 
         def check(value, key):
             assert expectations.find_expectation(key) == value
 
-        # xpu has no matches so should find default expectation
-        check(1, ("xpu", None))
+        # npu has no matches so should find default expectation
+        check(1, ("npu", None))
+        check(7, ("xpu", 3))
         check(2, ("cuda", 8))
         check(3, ("cuda", 7))
         check(4, ("rocm", 9))