pytorch
diff --git a/‎kernels/portable/cpu/op_max_pool2d_with_indices.cpp
+95 b/‎kernels/portable/cpu/op_max_pool2d_with_indices.cpp
+95
diff --git a/‎kernels/portable/cpu/targets.bzl
+6 b/‎kernels/portable/cpu/targets.bzl
+6
diff --git a/‎kernels/portable/cpu/util/kernel_ops_util.cpp
+63 b/‎kernels/portable/cpu/util/kernel_ops_util.cpp
+63
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstring>
+
+#include <executorch/kernels/portable/cpu/util/kernel_ops_util.h>
+#include <executorch/runtime/core/exec_aten/util/dim_order_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using Tensor = exec_aten::Tensor;
+using ScalarType = exec_aten::ScalarType;
+using IntArrayRef = exec_aten::ArrayRef<int64_t>;
+
+std::tuple<Tensor&, Tensor&> max_pool2d_with_indices_out(
+    RuntimeContext& ctx,
+    const Tensor& in,
+    IntArrayRef kernel_size,
+    IntArrayRef stride,
+    IntArrayRef padding,
+    IntArrayRef dilation,
+    bool ceil_mode,
+    Tensor& out,
+    Tensor& indices) {
+  std::tuple<Tensor&, Tensor&> ret_val(out, indices);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      check_max_pool2d_with_indices_args(
+          in, kernel_size, stride, padding, dilation, ceil_mode, out, indices),
+      InvalidArgument,
+      ret_val);
+
+  size_t output_ndim = 0;
+  exec_aten::SizesType output_sizes[kTensorDimensionLimit];
+  get_max_pool2d_with_indices_out_target_size(
+      in,
+      kernel_size,
+      stride,
+      padding,
+      dilation,
+      ceil_mode,
+      output_sizes,
+      &output_ndim);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      output_size_is_valid({output_sizes, output_ndim}),
+      InvalidArgument,
+      out);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(out, {output_sizes, output_ndim}) == Error::Ok,
+      InvalidArgument,
+      ret_val);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(indices, {output_sizes, output_ndim}) == Error::Ok,
+      InvalidArgument,
+      ret_val);
+
+  ScalarType in_type = in.scalar_type();
+  ET_SWITCH_REAL_TYPES(in_type, ctx, __func__, CTYPE, [&]() {
+    apply_kernel_2d_reduce_fn<CTYPE>(
+        [](const CTYPE in_val, int64_t in_idx, CTYPE accum, int64_t accum_idx) {
+          if (in_val > accum) {
+            return std::tuple<CTYPE, int64_t>(in_val, in_idx);
+          }
+          return std::tuple<CTYPE, int64_t>(accum, accum_idx);
+        },
+        in,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        out,
+        {indices});
+  });
+
+  return ret_val;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
@@ -480,6 +480,12 @@ _ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),
+    op_target(
+        name = "op_max_pool2d_with_indices",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:kernel_ops_util",
+        ],
+    ),
     op_target(
         name = "op_mean",
         deps = [
 
@@ -40,6 +40,16 @@ bool int_array_all_ge(IntArrayRef array, int64_t val) {
   return true;
 }
 
+bool kernel_size_is_valid(IntArrayRef kernel_size, size_t kernel_ndim) {
+  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+      kernel_size.size() == kernel_ndim,
+      "Expected kernel_size to have size %zu but got %zd",
+      kernel_ndim,
+      kernel_size.size());
+  ET_LOG_AND_RETURN_IF_FALSE(int_array_all_ge(kernel_size, 1));
+  return true;
+}
+
 bool stride_is_valid(IntArrayRef stride, size_t kernel_ndim) {
   ET_LOG_MSG_AND_RETURN_IF_FALSE(
       stride.size() > 0 && stride.size() <= kernel_ndim,
@@ -267,5 +277,58 @@ void get_convolution_out_target_size(
       in, {kernel_size, kernel_ndim}, stride, padding, dilation, out_sizes);
 }
 
+bool check_max_pool2d_with_indices_args(
+    const Tensor& in,
+    IntArrayRef kernel_size,
+    IntArrayRef stride,
+    IntArrayRef padding,
+    IntArrayRef dilation,
+    bool ceil_mode,
+    Tensor& out,
+    Tensor& indices) {
+  ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
+  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+      indices.scalar_type() == ScalarType::Long,
+      "Expected indices to have type of Long, but found %s",
+      toString(indices.scalar_type()));
+
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(in));
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out));
+
+  ET_LOG_AND_RETURN_IF_FALSE(kernel_size_is_valid(kernel_size, 2));
+  if (stride.size() > 0) {
+    ET_LOG_AND_RETURN_IF_FALSE(stride_is_valid(kernel_size, 2));
+  }
+  ET_LOG_AND_RETURN_IF_FALSE(padding_is_valid(padding, kernel_size, 2, true));
+  if (dilation.size() > 0) {
+    ET_LOG_AND_RETURN_IF_FALSE(dilation_is_valid(dilation, 2));
+  }
+
+  return true;
+}
+
+void get_max_pool2d_with_indices_out_target_size(
+    const Tensor& in,
+    IntArrayRef kernel_size,
+    IntArrayRef stride,
+    IntArrayRef padding,
+    IntArrayRef dilation,
+    bool ceil_mode,
+    exec_aten::SizesType* out_sizes,
+    size_t* out_ndim) {
+  *out_ndim = in.dim();
+
+  // Batch dim is optional, so in can be either 3 or 4 dim.
+  if (in.dim() == 4) {
+    out_sizes[0] = in.size(0);
+    out_sizes[1] = in.size(1);
+  } else {
+    out_sizes[0] = in.size(0);
+  }
+
+  calculate_kernel_output_sizes(
+      in, kernel_size, stride, padding, dilation, out_sizes, ceil_mode);
+}
+
 } // namespace executor
 } // namespace torch