flutter · auto-submit · Jan 26, 2024 · Jan 20, 2024 · Jan 21, 2024 · Jan 21, 2024
diff --git a/impeller/entity/entity_unittests.cc b/impeller/entity/entity_unittests.cc
@@ -2545,11 +2545,6 @@ TEST_P(EntityTest, PointFieldGeometryCoverage) {
             Rect::MakeLTRB(35, 15, 135, 205));
 }
 
-TEST_P(EntityTest, PointFieldCanUseCompute) {
-  EXPECT_EQ(PointFieldGeometry::CanUseCompute(*GetContentContext()),
-            GetContext()->GetBackendType() == Context::BackendType::kMetal);
-}
-
 TEST_P(EntityTest, ColorFilterContentsWithLargeGeometry) {
   Entity entity;
   entity.SetTransform(Matrix::MakeScale(GetContentScale()));

diff --git a/impeller/entity/geometry/point_field_geometry.cc b/impeller/entity/geometry/point_field_geometry.cc
@@ -17,7 +17,7 @@ GeometryResult PointFieldGeometry::GetPositionBuffer(
     const ContentContext& renderer,
     const Entity& entity,
     RenderPass& pass) const {
-  if (CanUseCompute(renderer)) {
+  if (renderer.GetDeviceCapabilities().SupportsCompute()) {
     return GetPositionBufferGPU(renderer, entity, pass);
   }
   auto vtx_builder = GetPositionBufferCPU(renderer, entity, pass);
@@ -40,7 +40,7 @@ GeometryResult PointFieldGeometry::GetPositionUVBuffer(
     const ContentContext& renderer,
     const Entity& entity,
     RenderPass& pass) const {
-  if (CanUseCompute(renderer)) {
+  if (renderer.GetDeviceCapabilities().SupportsCompute()) {
     return GetPositionBufferGPU(renderer, entity, pass, texture_coverage,
                                 effect_transform);
   }
@@ -200,6 +200,7 @@ GeometryResult PointFieldGeometry::GetPositionBufferGPU(
 
     using UV = UvComputeShader;
 
+    compute_pass->AddBufferMemoryBarrier();
     compute_pass->SetCommandLabel("UV Geometry");
     compute_pass->SetPipeline(renderer.GetUvComputePipeline());
 
@@ -264,14 +265,6 @@ GeometryVertexType PointFieldGeometry::GetVertexType() const {
   return GeometryVertexType::kPosition;
 }
 
-// Compute is disabled for Vulkan because the barriers are incorrect, see
-// also: https://github.com/flutter/flutter/issues/140798 .
-bool PointFieldGeometry::CanUseCompute(const ContentContext& renderer) {
-  return renderer.GetDeviceCapabilities().SupportsCompute() &&
-         renderer.GetContext()->GetBackendType() ==
-             Context::BackendType::kMetal;
-}
-
 // |Geometry|
 std::optional<Rect> PointFieldGeometry::GetCoverage(
     const Matrix& transform) const {

diff --git a/impeller/entity/geometry/point_field_geometry.h b/impeller/entity/geometry/point_field_geometry.h
@@ -17,9 +17,6 @@ class PointFieldGeometry final : public Geometry {
 
   static size_t ComputeCircleDivisions(Scalar scaled_radius, bool round);
 
-  /// If the platform can use compute safely.
-  static bool CanUseCompute(const ContentContext& renderer);
-
  private:
   // |Geometry|
   GeometryResult GetPositionBuffer(const ContentContext& renderer,

diff --git a/impeller/renderer/backend/metal/compute_pass_mtl.h b/impeller/renderer/backend/metal/compute_pass_mtl.h
@@ -66,6 +66,12 @@ class ComputePassMTL final : public ComputePass {
   // |ComputePass|
   bool EncodeCommands() const override;
 
+  // |ComputePass|
+  void AddBufferMemoryBarrier() override;
+
+  // |ComputePass|
+  void AddTextureMemoryBarrier() override;
+
   ComputePassMTL(const ComputePassMTL&) = delete;
 
   ComputePassMTL& operator=(const ComputePassMTL&) = delete;

diff --git a/impeller/renderer/backend/metal/compute_pass_mtl.mm b/impeller/renderer/backend/metal/compute_pass_mtl.mm
@@ -32,7 +32,8 @@
   if (!buffer_) {
     return;
   }
-  encoder_ = [buffer_ computeCommandEncoder];
+  encoder_ = [buffer_ computeCommandEncoderWithDispatchType:
+                          MTLDispatchType::MTLDispatchTypeConcurrent];
   if (!encoder_) {
     return;
   }
@@ -67,6 +68,16 @@
       ComputePipelineMTL::Cast(*pipeline).GetMTLComputePipelineState());
 }
 
+// |ComputePass|
+void ComputePassMTL::AddBufferMemoryBarrier() {
+  [encoder_ memoryBarrierWithScope:MTLBarrierScopeBuffers];
+}
+
+// |ComputePass|
+void ComputePassMTL::AddTextureMemoryBarrier() {
+  [encoder_ memoryBarrierWithScope:MTLBarrierScopeTextures];
+}
+
 // |ComputePass|
 bool ComputePassMTL::BindResource(ShaderStage stage,
                                   DescriptorType type,

diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc
@@ -104,11 +104,7 @@ fml::Status ComputePassVK::Compute(const ISize& grid_size) {
 
   // Special case for linear processing.
   if (height == 1) {
-    int64_t minimum = 1;
-    int64_t threadGroups = std::max(
-        static_cast<int64_t>(std::ceil(width * 1.0 / max_wg_size_[0] * 1.0)),
-        minimum);
-    command_buffer_vk.dispatch(threadGroups, 1, 1);
+    command_buffer_vk.dispatch(width, 1, 1);
   } else {
     while (width > max_wg_size_[0]) {
       width = std::max(static_cast<int64_t>(1), width / 2);
@@ -216,8 +212,53 @@ bool ComputePassVK::BindResource(size_t binding,
   return true;
 }
 
+// Note:
+// https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples
+// Seems to suggest that anything more finely grained than a global memory
+// barrier is likely to be weakened into a global barrier. Confirming this on
+// mobile devices will require some experimentation.
+
+// |ComputePass|
+void ComputePassVK::AddBufferMemoryBarrier() {
+  vk::MemoryBarrier barrier;
+  barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
+  barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
+
+  command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
+      vk::PipelineStageFlagBits::eComputeShader,
+      vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
+}
+
+// |ComputePass|
+void ComputePassVK::AddTextureMemoryBarrier() {
+  vk::MemoryBarrier barrier;
+  barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
+  barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
+
+  command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
+      vk::PipelineStageFlagBits::eComputeShader,
+      vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
+}
+
 // |ComputePass|
 bool ComputePassVK::EncodeCommands() const {
+  // Since we only use global memory barrier, we don't have to worry about
+  // compute to compute dependencies across cmd buffers. Instead, we pessimize
+  // here and assume that we wrote to a storage image or buffer and that a
+  // render pass will read from it. if there are ever scenarios where we end up
+  // with compute to compute dependencies this should be revisited.
+
+  // This does not currently handle image barriers as we do not use them
+  // for anything.
+  vk::MemoryBarrier barrier;
+  barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
+  barrier.dstAccessMask =
+      vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead;
+
+  command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
+      vk::PipelineStageFlagBits::eComputeShader,
+      vk::PipelineStageFlagBits::eVertexInput, {}, 1, &barrier, 0, {}, 0, {});
+
   return true;
 }
 

diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.h b/impeller/renderer/backend/vulkan/compute_pass_vk.h
@@ -57,6 +57,12 @@ class ComputePassVK final : public ComputePass {
   void SetPipeline(const std::shared_ptr<Pipeline<ComputePipelineDescriptor>>&
                        pipeline) override;
 
+  // |ComputePass|
+  void AddBufferMemoryBarrier() override;
+
+  // |ComputePass|
+  void AddTextureMemoryBarrier() override;
+
   // |ComputePass|
   fml::Status Compute(const ISize& grid_size) override;
 

diff --git a/impeller/renderer/compute_pass.h b/impeller/renderer/compute_pass.h
@@ -35,6 +35,22 @@ class ComputePass : public ResourceBinder {
 
   virtual fml::Status Compute(const ISize& grid_size) = 0;
 
+  /// @brief Ensures all previously encoded compute command's buffer writes are
+  ///        visible to any subsequent compute commands.
+  ///
+  ///        On Vulkan, it does not matter if the compute command is in a
+  ///        different command buffer, only that it is executed later in queue
+  ///        order.
+  virtual void AddBufferMemoryBarrier() = 0;
+
+  /// @brief Ensures all previously encoded compute command's texture writes are
+  ///        visible to any subsequent compute commands.
+  ///
+  ///        On Vulkan, it does not matter if the compute command is in a
+  ///        different command buffer, only that it is executed later in queue
+  ///        order.
+  virtual void AddTextureMemoryBarrier() = 0;
+
   //----------------------------------------------------------------------------
   /// @brief      Encode the recorded commands to the underlying command buffer.
   ///
@@ -43,6 +59,8 @@ class ComputePass : public ResourceBinder {
   ///
   virtual bool EncodeCommands() const = 0;
 
+  const Context& GetContext() const { return *context_; }
+
  protected:
   const std::shared_ptr<const Context> context_;
 

diff --git a/impeller/renderer/compute_unittests.cc b/impeller/renderer/compute_unittests.cc
@@ -308,6 +308,7 @@ TEST_P(ComputeTest, MultiStageInputAndOutput) {
     CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
 
     ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
+    pass->AddBufferMemoryBarrier();
   }
 
   {
-Original file line number
+Diff line change
@@ Expand Up / @@ -308,6 +308,7 @@ TEST_P(ComputeTest, MultiStageInputAndOutput) { @@
         CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
         ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
+        pass->AddBufferMemoryBarrier();
       }
       {
@@ Expand Down @@