Skip to content

Commit 135d584

Browse files
committed
fix bits calculation
1 parent 47f88d2 commit 135d584

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4+
#include <type_traits>
5+
46
#include <cub/cub.cuh>
57
#include <cublas_v2.h>
68
#include <cuda_fp16.h>
@@ -139,7 +141,7 @@ bool TryMatMulBnb4(
139141
int ldc = n;
140142
int num_blocks = (n + 3) / 4;
141143

142-
constexpr int bits = ::cuda::std::is_same_v<T, half> ? 16 : 32;
144+
constexpr int bits = std::is_same_v<T, half> ? 16 : 32;
143145
kgemm_4bit_inference_naive<T, 128, bits><<<num_blocks, 128, 0, stream>>>(
144146
m, n, k, a_data, b_data_quant, absmax, quant_map, output, lda, ldb, ldc, block_size);
145147

0 commit comments

Comments
 (0)