Skip to content
9 changes: 9 additions & 0 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,11 @@

IS_ROCM_SYSTEM = torch.version.hip is not None
IS_CUDA_SYSTEM = torch.version.cuda is not None
IS_XPU_SYSTEM = torch.version.xpu is not None
else:
IS_ROCM_SYSTEM = False
IS_CUDA_SYSTEM = False
IS_XPU_SYSTEM = False

logger = transformers_logging.get_logger(__name__)

Expand Down Expand Up @@ -3094,6 +3096,13 @@ def get_device_properties() -> DeviceProperties:
return ("rocm", major)
else:
return ("cuda", major)
elif IS_XPU_SYSTEM:
import torch

arch = torch.xpu.get_device_capability()["architecture"]
gen_mask = 0x000000FF00000000
gen = (arch & gen_mask) >> 32
return ("xpu", gen)
else:
return (torch_device, None)

Expand Down
68 changes: 35 additions & 33 deletions tests/models/llama/test_modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed
from transformers.generation.configuration_utils import GenerationConfig
from transformers.testing_utils import (
Expectations,
cleanup,
require_read_token,
require_torch,
Expand Down Expand Up @@ -539,16 +540,6 @@ def _reinitialize_config(base_config, new_kwargs):

@require_torch_accelerator
class LlamaIntegrationTest(unittest.TestCase):
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
cuda_compute_capability_major_version = None

@classmethod
def setUpClass(cls):
if is_torch_available() and torch.cuda.is_available():
# 8 is for A100 / A10 and 7 for T4
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

@slow
@require_read_token
def test_llama_3_1_hard(self):
Expand Down Expand Up @@ -593,30 +584,35 @@ def test_model_7b_logits_bf16(self):
# Expected mean on dim = -1

# fmt: off
EXPECTED_MEAN = {
7: torch.tensor([[-6.5061, -4.1147, -4.9669, -3.2038, 0.8069, -2.9694, 1.2864, -3.3786]]),
8: torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536, 0.8127, -2.9811, 1.2918, -3.3848]])
}

expected_means = Expectations(
{
("xpu", 3): torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536, 0.8127, -2.9811, 1.2918, -3.3848]]),
("cuda", 7): torch.tensor([[-6.5061, -4.1147, -4.9669, -3.2038, 0.8069, -2.9694, 1.2864, -3.3786]]),
("cuda", 8): torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536, 0.8127, -2.9811, 1.2918, -3.3848]])
})

expected_mean = expected_means.get_expectation()
self.assertTrue(
torch.allclose(
EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device),
expected_mean.to(torch_device),
out.logits.float().mean(-1),
atol=1e-2,
rtol=1e-2
)
)

# slicing logits[0, 0, 0:15]
EXPECTED_SLICE = {
7: torch.tensor([[-12.5000, -7.0625, -0.6289, -7.8750, -6.9688, -7.8125, -6.4688, -7.4375, -7.6875, -6.9375, -6.0312, -7.0000, -1.8594, 1.8438, -8.5000]]),
8: torch.tensor([[-12.5625, -7.1250, -0.6289, -7.8750, -6.9688, -7.8125, -6.5000, -7.4375, -7.6562, -6.9688, -6.0312, -7.0312, -1.8203, 1.8750, -8.5000]])
}
expected_slices = Expectations(
{
("xpu", 3): torch.tensor([[-12.5625, -7.1250, -0.6289, -7.8750, -6.9688, -7.8125, -6.5000, -7.4375, -7.6562, -6.9688, -6.0312, -7.0312, -1.8203, 1.8750, -8.5000]]),
("cuda", 7): torch.tensor([[-12.5000, -7.0625, -0.6289, -7.8750, -6.9688, -7.8125, -6.4688, -7.4375, -7.6875, -6.9375, -6.0312, -7.0000, -1.8594, 1.8438, -8.5000]]),
("cuda", 8): torch.tensor([[-12.5625, -7.1250, -0.6289, -7.8750, -6.9688, -7.8125, -6.5000, -7.4375, -7.6562, -6.9688, -6.0312, -7.0312, -1.8203, 1.8750, -8.5000]])
})
# fmt: on

expected_slice = expected_slices.get_expectation()
self.assertTrue(
torch.allclose(
EXPECTED_SLICE[self.cuda_compute_capability_major_version].to(torch_device),
expected_slice.to(torch_device),
out.logits[0, 0, :15].float(),
atol=1e-2,
rtol=1e-2,
Expand All @@ -637,30 +633,36 @@ def test_model_7b_logits(self):

# fmt: off
# Expected mean on dim = -1
EXPECTED_MEAN = {
7: torch.tensor([[-6.6420, -4.1227, -4.9809, -3.2041, 0.8261, -3.0052, 1.2957, -3.3648]]),
8: torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456, 0.8261, -3.0124, 1.2971, -3.3641]])
}

expected_means = Expectations(
{
("xpu", 3): torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456, 0.8261, -3.0124, 1.2971, -3.3641]]),
("cuda", 7): torch.tensor([[-6.6420, -4.1227, -4.9809, -3.2041, 0.8261, -3.0052, 1.2957, -3.3648]]),
("cuda", 8): torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456, 0.8261, -3.0124, 1.2971, -3.3641]]),
})

expected_mean = expected_means.get_expectation()
self.assertTrue(
torch.allclose(
EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device),
expected_mean.to(torch_device),
out.logits.float().mean(-1),
atol=1e-2,
rtol=1e-2
)
)

# slicing logits[0, 0, 0:15]
EXPECTED_SLICE = {
7: torch.tensor([-12.8125, -7.3359, -0.4846, -8.0234, -7.2383, -7.9922, -6.4805, -7.7344, -7.8125, -7.0078, -6.1797, -7.1094, -1.8633, 1.9736, -8.6016]),
8: torch.tensor([-12.8281, -7.4609, -0.4668, -8.0703, -7.2539, -8.0078, -6.4961, -7.7734, -7.8516, -7.0352, -6.2188, -7.1367, -1.8564, 1.9922, -8.6328])
}
expected_slices = Expectations(
{
("xpu", 3): torch.tensor([-12.8281, -7.4609, -0.4668, -8.0703, -7.2539, -8.0078, -6.4961, -7.7734, -7.8516, -7.0352, -6.2188, -7.1367, -1.8564, 1.9922, -8.6328]),
("cuda", 7): torch.tensor([-12.8125, -7.3359, -0.4846, -8.0234, -7.2383, -7.9922, -6.4805, -7.7344, -7.8125, -7.0078, -6.1797, -7.1094, -1.8633, 1.9736, -8.6016]),
("cuda", 8): torch.tensor([-12.8281, -7.4609, -0.4668, -8.0703, -7.2539, -8.0078, -6.4961, -7.7734, -7.8516, -7.0352, -6.2188, -7.1367, -1.8564, 1.9922, -8.6328])
})
# fmt: on

expected_slice = expected_slices.get_expectation()
self.assertTrue(
torch.allclose(
EXPECTED_SLICE[self.cuda_compute_capability_major_version].to(torch_device),
expected_slice.to(torch_device),
out.logits[0, 0, :15].float(),
atol=1e-2,
rtol=1e-2,
Expand Down
6 changes: 4 additions & 2 deletions tests/utils/test_expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ def test_expectations(self):
("rocm", 8): 4,
("rocm", None): 5,
("cpu", None): 6,
("xpu", 3): 7,
}
)

def check(value, key):
assert expectations.find_expectation(key) == value

# xpu has no matches so should find default expectation
check(1, ("xpu", None))
# npu has no matches so should find default expectation
check(1, ("npu", None))
check(7, ("xpu", 3))
check(2, ("cuda", 8))
check(3, ("cuda", 7))
check(4, ("rocm", 9))
Expand Down