Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit d27b18d

Browse files
[Impeller] add compute pass API for memory barriers, re-enable for Vulkan. (#49946)
Adds two new APIs that insert memory barriers for compute -> compute dependencies. ```c++ // |ComputePass| void AddBufferMemoryBarrier() override; // |ComputePass| void AddTextureMemoryBarrier() override; ``` Also makes the ComputePassVK automatically insert a compute -> vertex dependency when encoding. This change is sufficient to let the GPU compute implementation of draw points work on Pixel and Samsung Android devices. For more explaination on these specific barriers, see the documentation added in this PR. Fixes #49946
1 parent d33b2dd commit d27b18d

9 files changed

+92
-24
lines changed

impeller/entity/entity_unittests.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2545,11 +2545,6 @@ TEST_P(EntityTest, PointFieldGeometryCoverage) {
25452545
Rect::MakeLTRB(35, 15, 135, 205));
25462546
}
25472547

2548-
TEST_P(EntityTest, PointFieldCanUseCompute) {
2549-
EXPECT_EQ(PointFieldGeometry::CanUseCompute(*GetContentContext()),
2550-
GetContext()->GetBackendType() == Context::BackendType::kMetal);
2551-
}
2552-
25532548
TEST_P(EntityTest, ColorFilterContentsWithLargeGeometry) {
25542549
Entity entity;
25552550
entity.SetTransform(Matrix::MakeScale(GetContentScale()));

impeller/entity/geometry/point_field_geometry.cc

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ GeometryResult PointFieldGeometry::GetPositionBuffer(
1717
const ContentContext& renderer,
1818
const Entity& entity,
1919
RenderPass& pass) const {
20-
if (CanUseCompute(renderer)) {
20+
if (renderer.GetDeviceCapabilities().SupportsCompute()) {
2121
return GetPositionBufferGPU(renderer, entity, pass);
2222
}
2323
auto vtx_builder = GetPositionBufferCPU(renderer, entity, pass);
@@ -40,7 +40,7 @@ GeometryResult PointFieldGeometry::GetPositionUVBuffer(
4040
const ContentContext& renderer,
4141
const Entity& entity,
4242
RenderPass& pass) const {
43-
if (CanUseCompute(renderer)) {
43+
if (renderer.GetDeviceCapabilities().SupportsCompute()) {
4444
return GetPositionBufferGPU(renderer, entity, pass, texture_coverage,
4545
effect_transform);
4646
}
@@ -200,6 +200,7 @@ GeometryResult PointFieldGeometry::GetPositionBufferGPU(
200200

201201
using UV = UvComputeShader;
202202

203+
compute_pass->AddBufferMemoryBarrier();
203204
compute_pass->SetCommandLabel("UV Geometry");
204205
compute_pass->SetPipeline(renderer.GetUvComputePipeline());
205206

@@ -264,14 +265,6 @@ GeometryVertexType PointFieldGeometry::GetVertexType() const {
264265
return GeometryVertexType::kPosition;
265266
}
266267

267-
// Compute is disabled for Vulkan because the barriers are incorrect, see
268-
// also: https://github.com/flutter/flutter/issues/140798 .
269-
bool PointFieldGeometry::CanUseCompute(const ContentContext& renderer) {
270-
return renderer.GetDeviceCapabilities().SupportsCompute() &&
271-
renderer.GetContext()->GetBackendType() ==
272-
Context::BackendType::kMetal;
273-
}
274-
275268
// |Geometry|
276269
std::optional<Rect> PointFieldGeometry::GetCoverage(
277270
const Matrix& transform) const {

impeller/entity/geometry/point_field_geometry.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ class PointFieldGeometry final : public Geometry {
1717

1818
static size_t ComputeCircleDivisions(Scalar scaled_radius, bool round);
1919

20-
/// If the platform can use compute safely.
21-
static bool CanUseCompute(const ContentContext& renderer);
22-
2320
private:
2421
// |Geometry|
2522
GeometryResult GetPositionBuffer(const ContentContext& renderer,

impeller/renderer/backend/metal/compute_pass_mtl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ class ComputePassMTL final : public ComputePass {
6666
// |ComputePass|
6767
bool EncodeCommands() const override;
6868

69+
// |ComputePass|
70+
void AddBufferMemoryBarrier() override;
71+
72+
// |ComputePass|
73+
void AddTextureMemoryBarrier() override;
74+
6975
ComputePassMTL(const ComputePassMTL&) = delete;
7076

7177
ComputePassMTL& operator=(const ComputePassMTL&) = delete;

impeller/renderer/backend/metal/compute_pass_mtl.mm

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
if (!buffer_) {
3333
return;
3434
}
35-
encoder_ = [buffer_ computeCommandEncoder];
35+
encoder_ = [buffer_ computeCommandEncoderWithDispatchType:
36+
MTLDispatchType::MTLDispatchTypeConcurrent];
3637
if (!encoder_) {
3738
return;
3839
}
@@ -67,6 +68,16 @@
6768
ComputePipelineMTL::Cast(*pipeline).GetMTLComputePipelineState());
6869
}
6970

71+
// |ComputePass|
72+
void ComputePassMTL::AddBufferMemoryBarrier() {
73+
[encoder_ memoryBarrierWithScope:MTLBarrierScopeBuffers];
74+
}
75+
76+
// |ComputePass|
77+
void ComputePassMTL::AddTextureMemoryBarrier() {
78+
[encoder_ memoryBarrierWithScope:MTLBarrierScopeTextures];
79+
}
80+
7081
// |ComputePass|
7182
bool ComputePassMTL::BindResource(ShaderStage stage,
7283
DescriptorType type,

impeller/renderer/backend/vulkan/compute_pass_vk.cc

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,7 @@ fml::Status ComputePassVK::Compute(const ISize& grid_size) {
104104

105105
// Special case for linear processing.
106106
if (height == 1) {
107-
int64_t minimum = 1;
108-
int64_t threadGroups = std::max(
109-
static_cast<int64_t>(std::ceil(width * 1.0 / max_wg_size_[0] * 1.0)),
110-
minimum);
111-
command_buffer_vk.dispatch(threadGroups, 1, 1);
107+
command_buffer_vk.dispatch(width, 1, 1);
112108
} else {
113109
while (width > max_wg_size_[0]) {
114110
width = std::max(static_cast<int64_t>(1), width / 2);
@@ -216,8 +212,53 @@ bool ComputePassVK::BindResource(size_t binding,
216212
return true;
217213
}
218214

215+
// Note:
216+
// https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples
217+
// Seems to suggest that anything more finely grained than a global memory
218+
// barrier is likely to be weakened into a global barrier. Confirming this on
219+
// mobile devices will require some experimentation.
220+
221+
// |ComputePass|
222+
void ComputePassVK::AddBufferMemoryBarrier() {
223+
vk::MemoryBarrier barrier;
224+
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
225+
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
226+
227+
command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
228+
vk::PipelineStageFlagBits::eComputeShader,
229+
vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
230+
}
231+
232+
// |ComputePass|
233+
void ComputePassVK::AddTextureMemoryBarrier() {
234+
vk::MemoryBarrier barrier;
235+
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
236+
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
237+
238+
command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
239+
vk::PipelineStageFlagBits::eComputeShader,
240+
vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
241+
}
242+
219243
// |ComputePass|
220244
bool ComputePassVK::EncodeCommands() const {
245+
// Since we only use global memory barrier, we don't have to worry about
246+
// compute to compute dependencies across cmd buffers. Instead, we pessimize
247+
// here and assume that we wrote to a storage image or buffer and that a
248+
// render pass will read from it. if there are ever scenarios where we end up
249+
// with compute to compute dependencies this should be revisited.
250+
251+
// This does not currently handle image barriers as we do not use them
252+
// for anything.
253+
vk::MemoryBarrier barrier;
254+
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
255+
barrier.dstAccessMask =
256+
vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead;
257+
258+
command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
259+
vk::PipelineStageFlagBits::eComputeShader,
260+
vk::PipelineStageFlagBits::eVertexInput, {}, 1, &barrier, 0, {}, 0, {});
261+
221262
return true;
222263
}
223264

impeller/renderer/backend/vulkan/compute_pass_vk.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ class ComputePassVK final : public ComputePass {
5757
void SetPipeline(const std::shared_ptr<Pipeline<ComputePipelineDescriptor>>&
5858
pipeline) override;
5959

60+
// |ComputePass|
61+
void AddBufferMemoryBarrier() override;
62+
63+
// |ComputePass|
64+
void AddTextureMemoryBarrier() override;
65+
6066
// |ComputePass|
6167
fml::Status Compute(const ISize& grid_size) override;
6268

impeller/renderer/compute_pass.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,22 @@ class ComputePass : public ResourceBinder {
3535

3636
virtual fml::Status Compute(const ISize& grid_size) = 0;
3737

38+
/// @brief Ensures all previously encoded compute command's buffer writes are
39+
/// visible to any subsequent compute commands.
40+
///
41+
/// On Vulkan, it does not matter if the compute command is in a
42+
/// different command buffer, only that it is executed later in queue
43+
/// order.
44+
virtual void AddBufferMemoryBarrier() = 0;
45+
46+
/// @brief Ensures all previously encoded compute command's texture writes are
47+
/// visible to any subsequent compute commands.
48+
///
49+
/// On Vulkan, it does not matter if the compute command is in a
50+
/// different command buffer, only that it is executed later in queue
51+
/// order.
52+
virtual void AddTextureMemoryBarrier() = 0;
53+
3854
//----------------------------------------------------------------------------
3955
/// @brief Encode the recorded commands to the underlying command buffer.
4056
///
@@ -43,6 +59,8 @@ class ComputePass : public ResourceBinder {
4359
///
4460
virtual bool EncodeCommands() const = 0;
4561

62+
const Context& GetContext() const { return *context_; }
63+
4664
protected:
4765
const std::shared_ptr<const Context> context_;
4866

impeller/renderer/compute_unittests.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ TEST_P(ComputeTest, MultiStageInputAndOutput) {
308308
CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
309309

310310
ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
311+
pass->AddBufferMemoryBarrier();
311312
}
312313

313314
{

0 commit comments

Comments
 (0)