Skip to content

Commit 4dc60fd

Browse files
authored
[Quantization] Removing misleading int8 quantization in Finegrained FP8 (#42945)
* rm misleading * add comment
1 parent af91c0b commit 4dc60fd

File tree

2 files changed

+6
-12
lines changed

2 files changed

+6
-12
lines changed

src/transformers/integrations/finegrained_fp8.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,10 @@
3434
_FP8_DTYPE = torch.float8_e4m3fn
3535
_FP8_MIN = torch.finfo(_FP8_DTYPE).min
3636
_FP8_MAX = torch.finfo(_FP8_DTYPE).max
37-
_FP8_IS_INT = False
3837
except AttributeError:
39-
_FP8_DTYPE = torch.int8
40-
_FP8_MIN, _FP8_MAX = -127, 127
41-
_FP8_IS_INT = True
42-
logger.warning_once(
43-
"torch.float8_e4m3fn not available; falling back to int8 emulation for Fp8Quantize operations."
44-
)
38+
_FP8_DTYPE = None
39+
_FP8_MIN, _FP8_MAX = -448, 448
40+
logger.warning_once("torch.float8_e4m3fn not available")
4541

4642

4743
# Copied from https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/main/inference/kernel.py
@@ -701,10 +697,7 @@ def convert(self, input_dict: torch.Tensor, **kwargs) -> dict[str, torch.Tensor]
701697
scales_broadcast = scales.unsqueeze(-1).unsqueeze(-3) # -> (..., rows_tiles, 1, cols_tiles, 1)
702698
scaled = reshaped * scales_broadcast
703699

704-
if _FP8_IS_INT:
705-
quantized = torch.clamp(scaled.round(), min=_FP8_MIN, max=_FP8_MAX).to(_FP8_DTYPE)
706-
else:
707-
quantized = torch.clamp(scaled, min=_FP8_MIN, max=_FP8_MAX).to(_FP8_DTYPE)
700+
quantized = torch.clamp(scaled, min=_FP8_MIN, max=_FP8_MAX).to(_FP8_DTYPE)
708701

709702
quantized = quantized.reshape(original_shape)
710703

src/transformers/quantizers/quantizer_finegrained_fp8.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ def validate_environment(self, *args, **kwargs):
4848
if (major < 8) or (major == 8 and minor < 9):
4949
logger.warning_once(
5050
"FP8 quantized models is only supported on GPUs with compute capability >= 8.9 (e.g 4090/H100)"
51-
f", actual = `{major}.{minor}`. We will default to dequantizing the model to bf16 "
51+
f", actual = `{major}.{minor}`. We will default to dequantizing the model to bf16. Feel free "
52+
f"to use a different quantization method like bitsandbytes or torchao"
5253
)
5354
self.quantization_config.dequantize = True
5455
return

0 commit comments

Comments
 (0)