File tree 2 files changed +2
-2
lines changed 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -100,7 +100,7 @@ struct RuntimeConfig {
100
100
101
101
// These defaults are overridden by InferenceArgs::CopyTo(*this):
102
102
// Max tokens per batch during prefill.
103
- size_t prefill_tbatch_size = 32 ;
103
+ size_t prefill_tbatch_size = 256 ;
104
104
// Max queries per batch (one token from each) during decode.
105
105
size_t decode_qbatch_size = 16 ;
106
106
Original file line number Diff line number Diff line change @@ -273,7 +273,7 @@ struct InferenceArgs : public ArgsBase<InferenceArgs> {
273
273
visitor (max_generated_tokens, " max_generated_tokens" , size_t {2048 },
274
274
" Maximum number of tokens to generate." );
275
275
276
- visitor (prefill_tbatch_size, " prefill_tbatch" , size_t {64 },
276
+ visitor (prefill_tbatch_size, " prefill_tbatch" , size_t {256 },
277
277
" Prefill: max tokens per batch." );
278
278
visitor (decode_qbatch_size, " decode_qbatch" , size_t {16 },
279
279
" Decode: max queries per batch." );
You can’t perform that action at this time.
0 commit comments