Closed
Description
Update:
see the latest benchmark results in another post #5611 (comment)
# launch server
# First, warm up for DeepGEMM
# SGLang uses FA3 backend by default since v0.4.5.post1
# Use dp 8 for offline use case
SGL_ENABLE_JIT_DEEPGEMM=1 python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code --enable-dp-attention --dp-size 8
# Random 1k, 2k
python3 -m sglang.bench_serving --backend sglang-oai --num-prompts 50 --request-rate 10 --dataset-name random --random-input-len 1000 --random-output-len 2000 --random-range-ratio 1
# Random 5k, 1k
python3 -m sglang.bench_serving --backend sglang-oai --num-prompts 50 --request-rate 10 --dataset-name random --random-input-len 5000 --random-output-len 1000 --random-range-ratio 1
# Random 10k, 500
python3 -m sglang.bench_serving --backend sglang-oai --num-prompts 50 --request-rate 10 --dataset-name random --random-input-len 10000 --random-output-len 500 --random-range-ratio 1
# Random 30k, 100
python3 -m sglang.bench_serving --backend sglang-oai --num-prompts 50 --request-rate 10 --dataset-name random --random-input-len 30000 --random-output-len 100 --random-range-ratio 1