We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e548164 commit 308858aCopy full SHA for 308858a
1 file changed
onnxruntime/contrib_ops/cuda/bert/paged_attention.cc
@@ -51,6 +51,7 @@ struct THEvent {
51
};
52
53
struct InputMetadata {
54
+ int64_t schedule_type; // 0: vllm. 1:sarathi, 2:custom, 3:self-build
55
int64_t block_tables;
56
int64_t max_num_blocks_per_seq;
57
int64_t context_lens;
0 commit comments