Skip to content

Commit 3f75e61

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Add index check for embedding kernel
Summary: index should always be smaller than weight.size(0). Adding this check in `op_embedding`. This is to avoid wild-addr-read error: ``` AddressSanitizer:DEADLYSIGNAL ================================================================= ==3544359==ERROR: AddressSanitizer: SEGV on unknown address 0x7fce2364bc00 (pc 0x000002d225a0 bp 0x7ffffc792a40 sp 0x7ffffc792990 T0) ==3544359==The signal is caused by a READ memory access. SCARINESS: 20 (wild-addr-read) #0 0x2d225a0 in void torch::executor::native::(anonymous namespace)::embedding_byte_per_channel<signed char, c10::Half, float>(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:175 #1 0x2d22367 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #2 0x2d2223d in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #3 0x2d21d37 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #4 0x2d21bca in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #5 0x2d20f8f in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #6 0x2d20e13 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #7 0x2d20d06 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #8 0x2d226b7 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::KernelRuntimeContext&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:329 #9 0x2d09bef in torch::executor::function::(anonymous namespace)::$_7::operator()(executorch::runtime::KernelRuntimeContext&, executorch::runtime::EValue**) const buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/executorch/kernels/quantized/__generated_lib_combined__/out/RegisterCodegenUnboxedKernelsEverything.cpp:322 #10 0x2d09a70 in torch::executor::function::(anonymous namespace)::$_7::__invoke(executorch::runtime::KernelRuntimeContext&, executorch::runtime::EValue**) buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/executorch/kernels/quantized/__generated_lib_combined__/out/RegisterCodegenUnboxedKernelsEverything.cpp:297 #11 0x27d769b in executorch::runtime::Method::execute_instruction() xplat/executorch/runtime/executor/method.cpp:1306 #12 0x27d8c55 in executorch::runtime::Method::execute() xplat/executorch/runtime/executor/method.cpp:1550 #13 0x27b1e25 in executorch::extension::Module::execute(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, std::vector<executorch::runtime::EValue, std::allocator<executorch::runtime::EValue>> const&) xplat/executorch/extension/module/module.cpp:261 #14 0x27afe43 in executorch::extension::Module::forward(std::vector<executorch::runtime::EValue, std::allocator<executorch::runtime::EValue>> const&) xplat/executorch/extension/module/module.h:340 #15 0x27e0519 in executorch::extension::llm::LlmBackboneRunner::run(std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&) xplat/executorch/examples/models/fb/llama4/runner/llm_backbone_runner.cpp:58 #16 0x27a35c9 in executorch::extension::llm::Llama4Runner::prefill_tokens(std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&) xplat/executorch/examples/models/fb/llama4/runner/llama4_runner.cpp:133 #17 0x885774 in main (/data/users/larryliu/fbsource/buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/cria/benchmark/llama4/__generation_main__/generation_main+0x885774) #18 0x7fce2122c656 in __libc_start_call_main /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../sysdeps/nptl/libc_start_call_main.h:58:16 #19 0x7fce2122c717 in __libc_start_main@GLIBC_2.2.5 /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../csu/libc-start.c:409:3 #20 0x884c20 in _start /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../sysdeps/x86_64/start.S:116 AddressSanitizer can not provide additional info. AddressSanitizer: SEGV xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:175 in void torch::executor::native::(anonymous namespace)::embedding_byte_per_channel<signed char, c10::Half, float>(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor&) ==3544359==ABORTING ``` Differential Revision: D75982682
1 parent 1f52982 commit 3f75e61

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

kernels/quantized/cpu/op_embedding.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,20 @@ void embedding_byte_per_channel(
153153

154154
for (int i = 0; i < indices.numel(); i++) {
155155
int64_t index = indices_ptr[i];
156+
157+
// Check if index is out of bounds for both weight and weight_scales
158+
ET_CHECK_MSG(
159+
index >= 0 && index < weight.size(0),
160+
"Index out of bounds for weight: index %" PRId64 " must be in range [0, %zd)",
161+
index,
162+
weight.size(0));
163+
164+
ET_CHECK_MSG(
165+
index >= 0 && index < weight_scales.size(0),
166+
"Index out of bounds for weight_scales: index %" PRId64 " must be in range [0, %zd)",
167+
index,
168+
weight_scales.size(0));
169+
156170
// If using groupwise embedding
157171
int32_t qparams_index = index * num_groups_per_channel;
158172
CTYPE_PARAMS zp = 0.0;

kernels/quantized/test/op_embedding_test.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,3 +373,37 @@ TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath5) {
373373
out),
374374
"");
375375
}
376+
377+
TEST(OpQuantizedEmbeddingTest, TestOutOfBoundsIndex) {
378+
et_pal_init();
379+
TensorFactory<ScalarType::Float> tf;
380+
TensorFactory<ScalarType::Long> tf_l;
381+
382+
int64_t quant_min = 0;
383+
int64_t quant_max = 255;
384+
385+
// Create a weight tensor with 3 rows
386+
TensorFactory<ScalarType::Byte> tfo;
387+
Tensor qweight = tfo.make({3, 4}, {8, 10, 12, 14, 10, 12, 12, 14, 8, 9, 10, 12});
388+
389+
// Create weight_scales with the same number of rows
390+
Tensor weight_scales = tf.make({3, 1}, {0.5, 1.0, 1.5});
391+
Tensor weight_zero_points = tf.make({3, 1}, {1, 5, 7});
392+
393+
// Create indices with an out-of-bounds index (3, which is >= weight.size(0))
394+
Tensor indices = tf_l.make({2}, {1, 3});
395+
396+
Tensor out = tf.zeros({2, 4});
397+
398+
// Expect death when accessing an out-of-bounds index
399+
ET_EXPECT_DEATH(
400+
quantized_embedding_byte_out(
401+
qweight,
402+
weight_scales,
403+
weight_zero_points,
404+
quant_min,
405+
quant_max,
406+
indices,
407+
out),
408+
"Index out of bounds for weight");
409+
}

0 commit comments

Comments
 (0)