From 3b45e88e4c6a61ea482a7ee4634fed28979800a6 Mon Sep 17 00:00:00 2001 From: Zonglin Peng Date: Mon, 3 Mar 2025 16:08:50 -0800 Subject: [PATCH 1/2] init --- backends/cadence/aot/functions.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backends/cadence/aot/functions.yaml b/backends/cadence/aot/functions.yaml index f1a5b6a50b0..173538cc2ee 100644 --- a/backends/cadence/aot/functions.yaml +++ b/backends/cadence/aot/functions.yaml @@ -214,6 +214,11 @@ - arg_meta: null kernel_name: impl::reference::quantized_relu_out +- func: cadence::quantized_relu.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: impl::reference::quantized_relu_per_tensor_out + - func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null From c29492079a12b83235a5f41ad0f901831e0dc831 Mon Sep 17 00:00:00 2001 From: Zonglin Peng Date: Mon, 3 Mar 2025 16:09:33 -0800 Subject: [PATCH 2/2] init --- .../operators/quantized_relu_out.cpp | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/backends/cadence/reference/operators/quantized_relu_out.cpp b/backends/cadence/reference/operators/quantized_relu_out.cpp index 19b971405c9..7a385849aee 100644 --- a/backends/cadence/reference/operators/quantized_relu_out.cpp +++ b/backends/cadence/reference/operators/quantized_relu_out.cpp @@ -7,6 +7,7 @@ */ #include +#include #include namespace impl { @@ -75,6 +76,59 @@ void quantized_relu_out( } } +template +void quantized_relu_per_tensor_out_( + __ET_UNUSED KernelRuntimeContext& ctx, + const Tensor& input, + const int64_t in_zero_point, + const int64_t out_zero_point, + const int64_t out_multiplier, + const int64_t out_shift, + Tensor& output) { + const T* __restrict__ in = input.const_data_ptr(); + T* __restrict__ out = output.mutable_data_ptr(); + + // Compute the out_scale from out_multiplier and out_shift + const float out_scale = -out_multiplier * 1.0 / (1 << 31) * pow(2, out_shift); + + for (size_t i = 0, e = input.numel(); i < e; ++i) { + const float temp = in[i] > in_zero_point ? (in[i] - in_zero_point) : 0; + out[i] = kernels::quantize(temp, out_scale, out_zero_point); + } +} + +void quantized_relu_per_tensor_out( + KernelRuntimeContext& ctx, + const Tensor& input, + const int64_t in_zero_point, + const int64_t out_zero_point, + const int64_t out_multiplier, + const int64_t out_shift, + Tensor& output) { +#define typed_quantized_relu(ctype, dtype) \ + case executorch::aten::ScalarType::dtype: { \ + quantized_relu_per_tensor_out_( \ + ctx, \ + input, \ + in_zero_point, \ + out_zero_point, \ + out_multiplier, \ + out_shift, \ + output); \ + break; \ + } + + executorch::aten::ScalarType dtype = input.scalar_type(); + switch (dtype) { + ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_relu) + default: + ET_DCHECK_MSG( + false, "Unhandled dtype %s", torch::executor::toString(dtype)); + } + +#undef typed_quantized_relu +} + }; // namespace native }; // namespace reference }; // namespace impl