File tree Expand file tree Collapse file tree 1 file changed +12
-1
lines changed
python/sglang/srt/layers/quantization Expand file tree Collapse file tree 1 file changed +12
-1
lines changed Original file line number Diff line number Diff line change 15
15
if is_cuda ():
16
16
import deep_gemm
17
17
from deep_gemm import get_num_sms
18
+ from deep_gemm .jit .compiler import get_nvcc_compiler
18
19
from deep_gemm .jit_kernels .gemm import get_best_configs
19
20
from deep_gemm .jit_kernels .runtime import FP8GemmRuntime , GemmType
20
21
from deep_gemm .jit_kernels .tuner import jit_tuner
@@ -48,7 +49,17 @@ def get_enable_jit_deepgemm():
48
49
# Refer to https://github.com/deepseek-ai/DeepGEMM/commit/d75b218b7b8f4a5dd5406ac87905039ead3ae42f
49
50
# NVRTC may have performance loss with some cases.
50
51
# And NVCC JIT speed is also 9x faster in the ref commit
51
- os .environ ["DG_JIT_USE_NVRTC" ] = os .getenv ("SGL_DG_USE_NVRTC" , "0" )
52
+ _USE_NVRTC_DEFAULT = "0"
53
+ if _ENABLE_JIT_DEEPGEMM :
54
+ try :
55
+ get_nvcc_compiler ()
56
+ except :
57
+ logger .warning (
58
+ "NVCC Compiler not found, use NVRTC for DeepGEMM JIT "
59
+ "and may have performance loss with some cases."
60
+ )
61
+ _USE_NVRTC_DEFAULT = "1"
62
+ os .environ ["DG_JIT_USE_NVRTC" ] = os .getenv ("SGL_DG_USE_NVRTC" , _USE_NVRTC_DEFAULT )
52
63
53
64
54
65
def update_deep_gemm_config (gpu_id : int , server_args : ServerArgs ):
You can’t perform that action at this time.
0 commit comments