[mlir][memref] Remove redundant memref.tensor_store op (#71010)

matthias-springer · web-flow · commit 437c62178c78 · 2023-11-05T12:47:18.000+09:00
`bufferization.materialize_in_destination` should be used instead. Both
ops bufferize to a memcpy. This change also conceptually cleans up the
memref dialect a bit: the memref dialect no longer contains ops that
operate on tensor values.
diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md
@@ -77,10 +77,12 @@ func.func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf3
 
   // Allocate addressable "buffers" and copy tensors %A and %B into them.
   %A_m = memref.alloc(%n) : memref<100x?xf32>
-  memref.tensor_store %A to %A_m : memref<100x?xf32>
+  bufferization.materialize_in_destination %A in writable %A_m
+      : (tensor<100x?xf32>, memref<100x?xf32>) -> ()
 
   %B_m = memref.alloc(%n) : memref<?x50xf32>
-  memref.tensor_store %B to %B_m : memref<?x50xf32>
+  bufferization.materialize_in_destination %B in writable %B_m
+      : (tensor<?x50xf32>, memref<?x50xf32>) -> ()
 
   // Call function @multiply passing memrefs as arguments,
   // and getting returned the result of the multiplication.
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -103,7 +103,7 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
 
     ```
     %alloc = memref.alloc() : memref<10xf32>
-    memref.tensor_store %dest, %alloc : memref<10xf32>
+    bufferization.materialize_in_destination %dest in %alloc
     memref.store %f, %alloc[%pos] : memref<10xf32>
     %0 = bufferization.to_tensor %alloc restrict writable : memref<10xf32>
     ```
@@ -118,15 +118,16 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
     An optional memory space attribute can be specified for the materialized
     buffer allocation.
 
-    If a memory copy is needed, a "memref.tensor_store" is used when possible.
-    This is an op with tensor semantics that will bufferize to a memory copy
-    later. Which concrete op will be used for the memory copy is up to the
-    bufferization framework. Alternatively, a custom memcpy op can be specified
-    via `memcpy_op`. Currently supported are "memref.copy" and "linalg.copy".
-    In that case, the source of each memcpy must not have a custom memory space.
-    Furthermore, because the future buffer layout unknown for a given tensor,
-    a fully dynamic layout is assumed for best compatibility. Users should use
-    "memref.tensor_store" when possible.
+    If a memory copy is needed, a "bufferization.materialize_in_destination" is
+    used when possible. This is an op with tensor semantics that will bufferize
+    to a memory copy later. Which concrete op will be used for the memory copy
+    is up to the bufferization framework. Alternatively, a custom memcpy op can
+    be specified via `memcpy_op`. Currently supported are "memref.copy" and
+    "linalg.copy". In that case, the source of each memcpy must not have a
+    custom memory space. Furthermore, because the future buffer layout unknown
+    for a given tensor, a fully dynamic layout is assumed for best
+    compatibility. Users should use "bufferization.materialize_in_destination"
+    when possible.
 
     "memref.alloc" is used for new buffer allocations. The buffer is deallocated
     at the end of the block if the "emit_dealloc" attribute is present. If this
@@ -148,7 +149,8 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
 
   let arguments = (ins TransformHandleTypeInterface:$target,
                        OptionalAttr<AnyAttr>:$memory_space,
-                       DefaultValuedAttr<StrAttr, "\"memref.tensor_store\"">:
+                       DefaultValuedAttr<StrAttr,
+                              "\"bufferization.materialize_in_destination\"">:
                            $memcpy_op,
                        DefaultValuedAttr<StrAttr, "\"memref.alloc\"">:
                            $alloc_op,
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -52,8 +52,12 @@ struct BufferizeToAllocationOptions {
   enum class AllocOp { MemrefAlloc = 0, MemrefAlloca = 1 };
   AllocOp allocOp = AllocOp::MemrefAlloc;
 
-  enum class MemcpyOp { MemrefTensorStore = 0, MemrefCopy = 1, LinalgCopy = 2 };
-  MemcpyOp memcpyOp = MemcpyOp::MemrefTensorStore;
+  enum class MemcpyOp {
+    MaterializeInDestination = 0,
+    MemrefCopy = 1,
+    LinalgCopy = 2
+  };
+  MemcpyOp memcpyOp = MemcpyOp::MaterializeInDestination;
 
   /// If set to "true", only the destination tensor operands are bufferized to
   /// a new allocation (and wrapped in "bufferization.to_tensor"), but not the
@@ -68,7 +72,8 @@ struct BufferizeToAllocationOptions {
 };
 
 /// Materialize a buffer allocation for the given tensor.pad op and lower the
-/// op to linalg.fill/linalg.generic + memref.tensor_store. E.g.:
+/// op to linalg.fill/linalg.generic + bufferization.materialize_in_destination.
+/// E.g.:
 ///
 /// %0 = tensor.pad low[%l] high[%h] %t ...
 ///
@@ -77,7 +82,7 @@ struct BufferizeToAllocationOptions {
 /// %alloc = memref.alloc
 /// linalg.fill ... outs(%alloc)
 /// %subview = memref.subview %alloc [%l] [...] [1]
-/// memref.tensor_store %t, %subview
+/// bufferization.materialize_in_destination %t in %subview
 /// %0 = bufferization.to_tensor %alloc restrict writable
 ///
 /// In addition to rewriting the IR as shown above, this function returns the
@@ -98,7 +103,7 @@ Value bufferizeToAllocation(RewriterBase &rewriter,
 /// is lowered to:
 ///
 /// %alloc = memref.alloc
-/// memref.tensor_store %t, %subview
+/// bufferization.materialize_in_destination %t in %subview
 /// vector.mask {
 ///   vector.transfer_write %arg0, %alloc : vector<16xf32>, memref<?xf32>
 /// } : vector<16xi1>
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -2095,37 +2095,6 @@ def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [
   let hasVerifier = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// TensorStoreOp
-//===----------------------------------------------------------------------===//
-
-def TensorStoreOp : MemRef_Op<"tensor_store",
-    [SameOperandsShape, SameOperandsElementType,
-     TypesMatchWith<"type of 'value' matches tensor equivalent of 'memref'",
-                    "memref", "tensor",
-                    "getTensorTypeFromMemRefType($_self)">]> {
-  let summary = "tensor store operation";
-  let description = [{
-    Stores the contents of a tensor into a memref. The first operand is a value
-    of tensor type, the second operand is a value of memref type. The shapes and
-    element types of these must match, and are specified by the memref type.
-
-    Example:
-
-    ```mlir
-    %9 = dim %8, 1 : tensor<4x?xf32>
-    %10 = memref.alloc(%9) : memref<4x?xf32, #layout, memspace0>
-    memref.tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
-    ```
-  }];
-
-  let arguments = (ins AnyTensor:$tensor, Arg<AnyRankedOrUnrankedMemRef,
-                       "the reference to store to",
-                       [MemWriteAt<0, FullEffect>]>:$memref);
-
-  let assemblyFormat = "$tensor `,` $memref attr-dict `:` type($memref)";
-}
-
 //===----------------------------------------------------------------------===//
 // TransposeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
@@ -53,7 +53,6 @@
 #include "mlir/Dialect/MemRef/IR/MemRefMemorySlot.h"
 #include "mlir/Dialect/MemRef/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/MemRef/Transforms/AllocationOpInterfaceImpl.h"
-#include "mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/MemRef/Transforms/RuntimeOpVerification.h"
 #include "mlir/Dialect/Mesh/IR/MeshOps.h"
 #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
@@ -157,7 +156,6 @@ inline void registerAllDialects(DialectRegistry &registry) {
   linalg::registerTilingInterfaceExternalModels(registry);
   linalg::registerValueBoundsOpInterfaceExternalModels(registry);
   memref::registerAllocationOpInterfaceExternalModels(registry);
-  memref::registerBufferizableOpInterfaceExternalModels(registry);
   memref::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
   memref::registerValueBoundsOpInterfaceExternalModels(registry);
   memref::registerMemorySlotExternalModels(registry);
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -585,7 +585,11 @@ MaterializeInDestinationOp::bufferize(RewriterBase &rewriter,
     assert(isa<BaseMemRefType>(getDest().getType()) && "expected memref type");
     buffer = getDest();
   }
-  rewriter.create<memref::TensorStoreOp>(getLoc(), getSource(), buffer);
+  auto srcBuffer = getBuffer(rewriter, getSource(), options);
+  if (failed(srcBuffer))
+    return failure();
+  if (failed(options.createMemCpy(rewriter, getLoc(), *srcBuffer, buffer)))
+    return failure();
   replaceOpWithBufferizedValues(rewriter, getOperation(),
                                 tensorDest ? ValueRange(buffer) : ValueRange());
   return success();
@@ -682,8 +686,9 @@ LogicalResult MaterializeInDestinationOp::verify() {
 void MaterializeInDestinationOp::build(OpBuilder &builder,
                                        OperationState &state, Value source,
                                        Value dest) {
-  assert(isa<TensorType>(dest.getType()) && "expected tensor type");
-  build(builder, state, /*result=*/dest.getType(), source, dest);
+  auto destTensorType = dyn_cast<TensorType>(dest.getType());
+  build(builder, state, /*result=*/destTensorType ? destTensorType : Type(),
+        source, dest);
 }
 
 bool MaterializeInDestinationOp::isWritable(Value value,
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -241,9 +241,9 @@ DiagnosedSilenceableFailure transform::BufferizeToAllocationOp::apply(
   rewriter.setListener(&newOpsListener);
 
   linalg::BufferizeToAllocationOptions options;
-  if (getMemcpyOp() == "memref.tensor_store") {
-    options.memcpyOp =
-        linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore;
+  if (getMemcpyOp() == "bufferization.materialize_in_destination") {
+    options.memcpyOp = linalg::BufferizeToAllocationOptions::MemcpyOp::
+        MaterializeInDestination;
   } else if (getMemcpyOp() == "memref.copy") {
     options.memcpyOp =
         linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy;
@@ -296,7 +296,7 @@ void transform::BufferizeToAllocationOp::getEffects(
 }
 
 LogicalResult transform::BufferizeToAllocationOp::verify() {
-  if (getMemcpyOp() != "memref.tensor_store" &&
+  if (getMemcpyOp() != "bufferization.materialize_in_destination" &&
       getMemcpyOp() != "memref.copy" && getMemcpyOp() != "linalg.copy")
     return emitOpError() << "unsupported memcpy op";
   if (getAllocOp() != "memref.alloc" && getAllocOp() != "memref.alloca")
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
@@ -63,11 +63,14 @@ static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource,
   assert(memrefDest.getType().isa<MemRefType>() && "expected ranked memref");
 
   switch (options.memcpyOp) {
-  case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore:
+  case linalg::BufferizeToAllocationOptions::MemcpyOp::
+      MaterializeInDestination: {
     // Note: This is the preferred way of memcpy'ing because no layout map
     // and/or memory space must be specified for the source.
-    b.create<memref::TensorStoreOp>(loc, tensorSource, memrefDest);
-    break;
+    auto materializeOp = b.create<bufferization::MaterializeInDestinationOp>(
+        loc, tensorSource, memrefDest);
+    materializeOp.setWritable(true);
+  } break;
   case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy: {
     // TODO: Support custom memory space on source.
     // We do not know the layout map of the source yet, so use a fully dynamic
@@ -238,7 +241,7 @@ Value linalg::bufferizeToAllocation(
     rewriter.setInsertionPointAfter(fillOp);
   }
 
-  // Create memref.tensor_store.
+  // Create memcpy.
   SmallVector<OpFoldResult> sizes =
       getMixedSizes(rewriter, loc, padOp.getSource());
   SmallVector<OpFoldResult> strides(padOp.getResultType().getRank(),
diff --git a/mlir/lib/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.cpp
diff --git a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_mlir_dialect_library(MLIRMemRefTransforms
   AllocationOpInterfaceImpl.cpp
-  BufferizableOpInterfaceImpl.cpp
   ComposeSubView.cpp
   ExpandOps.cpp
   ExpandRealloc.cpp
diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
@@ -15,7 +15,7 @@
 //       CHECK:   linalg.fill ins(%[[c50]] : index) outs(%[[alloc]] : memref<?x?xindex>)
 //       CHECK:   %[[dim0:.*]] = tensor.dim %[[t]], %[[c0]]
 //       CHECK:   %[[subview:.*]] = memref.subview %[[alloc]][5, %[[l2]]] [%[[dim0]], 10] [1, 1]
-//       CHECK:   memref.tensor_store %[[t]], %[[subview]]
+//       CHECK:   bufferization.materialize_in_destination %[[t]] in writable %[[subview]]
 //       CHECK:   %[[r:.*]] = bufferization.to_tensor %[[alloc]] restrict writable : memref<?x?xindex>
 //       CHECK:   memref.dealloc %[[alloc]]
 //       CHECK:   return %[[r]]
@@ -40,17 +40,17 @@ module attributes {transform.with_named_sequence} {
     transform.test_print_number_of_associated_payload_ir_ops %fill_op : !transform.any_op
 
     // Ensure that one linalg.copy was generated.
-    %tensor_store = transform.select "memref.tensor_store" in %new : (!transform.any_op) -> !transform.any_op
+    %mat = transform.select "bufferization.materialize_in_destination" in %new : (!transform.any_op) -> !transform.any_op
     // expected-remark @below{{1}}
-    transform.test_print_number_of_associated_payload_ir_ops %tensor_store : !transform.any_op
+    transform.test_print_number_of_associated_payload_ir_ops %mat : !transform.any_op
     transform.yield
   }
 }
 
 // -----
 
 // CHECK-LABEL: func @tensor_pad_constant_with_custom_copy(
-//   CHECK-NOT:   memref.tensor_store
+//   CHECK-NOT:   bufferization.materialize_in_destination
 //   CHECK-NOT:   memref.copy
 //       CHECK:   memref.alloca
 //       CHECK:   linalg.copy
@@ -194,7 +194,7 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: func @vector_mask(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>,
 //       CHECK:   %[[alloc:.*]] = memref.alloc(%{{.*}}) : memref<?xf32, 4>
-//       CHECK:   memref.tensor_store %[[t]], %[[alloc]]
+//       CHECK:   bufferization.materialize_in_destination %[[t]] in writable %[[alloc]]
 //       CHECK:   vector.mask %{{.*}} { vector.transfer_write %{{.*}}, %[[alloc]]
 //       CHECK:   %[[r:.*]] = bufferization.to_tensor %[[alloc]] restrict writable
 //       CHECK:   memref.dealloc %[[alloc]]
@@ -217,7 +217,7 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: func @tensor_insert_destination(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?x10xindex>
 //       CHECK:   %[[alloc:.*]] = memref.alloc(%{{.*}}) : memref<?x10xindex, 4>
-//       CHECK:   memref.tensor_store %[[t]], %[[alloc]]
+//       CHECK:   bufferization.materialize_in_destination %[[t]] in writable %[[alloc]]
 //       CHECK:   %[[t2:.*]] = bufferization.to_tensor %[[alloc]] restrict writable
 //       CHECK:   %[[inserted:.*]] = tensor.insert %{{.*}} into %[[t2]]
 //       CHECK:   memref.dealloc %[[alloc]]
@@ -240,7 +240,7 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: func @scf_for_destination(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?x10xindex>
 //       CHECK:   %[[alloc:.*]] = memref.alloc(%{{.*}}) : memref<?x10xindex, 4>
-//       CHECK:   memref.tensor_store %[[t]], %[[alloc]]
+//       CHECK:   bufferization.materialize_in_destination %[[t]] in writable %[[alloc]]
 //       CHECK:   %[[t2:.*]] = bufferization.to_tensor %[[alloc]] restrict writable
 //       CHECK:   %[[for:.*]] = scf.for {{.*}} iter_args(%{{.*}} = %[[t2]])
 //       CHECK:   memref.dealloc %[[alloc]]
diff --git a/mlir/test/Dialect/MemRef/bufferize.mlir b/mlir/test/Dialect/MemRef/bufferize.mlir
diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir
diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir