Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

[Impeller] add compute pass API for memory barriers, re-enable for Vulkan. #49946

Merged
merged 15 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions impeller/entity/entity_unittests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2545,11 +2545,6 @@ TEST_P(EntityTest, PointFieldGeometryCoverage) {
Rect::MakeLTRB(35, 15, 135, 205));
}

TEST_P(EntityTest, PointFieldCanUseCompute) {
EXPECT_EQ(PointFieldGeometry::CanUseCompute(*GetContentContext()),
GetContext()->GetBackendType() == Context::BackendType::kMetal);
}

TEST_P(EntityTest, ColorFilterContentsWithLargeGeometry) {
Entity entity;
entity.SetTransform(Matrix::MakeScale(GetContentScale()));
Expand Down
13 changes: 3 additions & 10 deletions impeller/entity/geometry/point_field_geometry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ GeometryResult PointFieldGeometry::GetPositionBuffer(
const ContentContext& renderer,
const Entity& entity,
RenderPass& pass) const {
if (CanUseCompute(renderer)) {
if (renderer.GetDeviceCapabilities().SupportsCompute()) {
return GetPositionBufferGPU(renderer, entity, pass);
}
auto vtx_builder = GetPositionBufferCPU(renderer, entity, pass);
Expand All @@ -40,7 +40,7 @@ GeometryResult PointFieldGeometry::GetPositionUVBuffer(
const ContentContext& renderer,
const Entity& entity,
RenderPass& pass) const {
if (CanUseCompute(renderer)) {
if (renderer.GetDeviceCapabilities().SupportsCompute()) {
return GetPositionBufferGPU(renderer, entity, pass, texture_coverage,
effect_transform);
}
Expand Down Expand Up @@ -200,6 +200,7 @@ GeometryResult PointFieldGeometry::GetPositionBufferGPU(

using UV = UvComputeShader;

compute_pass->AddBufferMemoryBarrier();
compute_pass->SetCommandLabel("UV Geometry");
compute_pass->SetPipeline(renderer.GetUvComputePipeline());

Expand Down Expand Up @@ -264,14 +265,6 @@ GeometryVertexType PointFieldGeometry::GetVertexType() const {
return GeometryVertexType::kPosition;
}

// Compute is disabled for Vulkan because the barriers are incorrect, see
// also: https://github.com/flutter/flutter/issues/140798 .
bool PointFieldGeometry::CanUseCompute(const ContentContext& renderer) {
return renderer.GetDeviceCapabilities().SupportsCompute() &&
renderer.GetContext()->GetBackendType() ==
Context::BackendType::kMetal;
}

// |Geometry|
std::optional<Rect> PointFieldGeometry::GetCoverage(
const Matrix& transform) const {
Expand Down
3 changes: 0 additions & 3 deletions impeller/entity/geometry/point_field_geometry.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ class PointFieldGeometry final : public Geometry {

static size_t ComputeCircleDivisions(Scalar scaled_radius, bool round);

/// If the platform can use compute safely.
static bool CanUseCompute(const ContentContext& renderer);

private:
// |Geometry|
GeometryResult GetPositionBuffer(const ContentContext& renderer,
Expand Down
6 changes: 6 additions & 0 deletions impeller/renderer/backend/metal/compute_pass_mtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ class ComputePassMTL final : public ComputePass {
// |ComputePass|
bool EncodeCommands() const override;

// |ComputePass|
void AddBufferMemoryBarrier() override;

// |ComputePass|
void AddTextureMemoryBarrier() override;

ComputePassMTL(const ComputePassMTL&) = delete;

ComputePassMTL& operator=(const ComputePassMTL&) = delete;
Expand Down
13 changes: 12 additions & 1 deletion impeller/renderer/backend/metal/compute_pass_mtl.mm
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
if (!buffer_) {
return;
}
encoder_ = [buffer_ computeCommandEncoder];
encoder_ = [buffer_ computeCommandEncoderWithDispatchType:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By default we were creating a serial encoder, which acts like it has a global memory barrier between each command. Since we need to specify the barriers for Vulkan, we might as well use the concurrent dispatcher for iOS too.

MTLDispatchType::MTLDispatchTypeConcurrent];
if (!encoder_) {
return;
}
Expand Down Expand Up @@ -67,6 +68,16 @@
ComputePipelineMTL::Cast(*pipeline).GetMTLComputePipelineState());
}

// |ComputePass|
void ComputePassMTL::AddBufferMemoryBarrier() {
[encoder_ memoryBarrierWithScope:MTLBarrierScopeBuffers];
}

// |ComputePass|
void ComputePassMTL::AddTextureMemoryBarrier() {
[encoder_ memoryBarrierWithScope:MTLBarrierScopeTextures];
}

// |ComputePass|
bool ComputePassMTL::BindResource(ShaderStage stage,
DescriptorType type,
Expand Down
51 changes: 46 additions & 5 deletions impeller/renderer/backend/vulkan/compute_pass_vk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,7 @@ fml::Status ComputePassVK::Compute(const ISize& grid_size) {

// Special case for linear processing.
if (height == 1) {
int64_t minimum = 1;
int64_t threadGroups = std::max(
static_cast<int64_t>(std::ceil(width * 1.0 / max_wg_size_[0] * 1.0)),
minimum);
command_buffer_vk.dispatch(threadGroups, 1, 1);
command_buffer_vk.dispatch(width, 1, 1);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic was incorrect...

} else {
while (width > max_wg_size_[0]) {
width = std::max(static_cast<int64_t>(1), width / 2);
Expand Down Expand Up @@ -216,8 +212,53 @@ bool ComputePassVK::BindResource(size_t binding,
return true;
}

// Note:
// https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see how that is the case. Though you do need the buffer and texture references to specify the barriers on. So this approach is definitely more easy to use if coarse.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could easily add these to the API and create the more specific barriers, but given that we dont have any way to evaluate it this yet, I'd prefer to follow the recommendations.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sgtm

// Seems to suggest that anything more finely grained than a global memory
// barrier is likely to be weakened into a global barrier. Confirming this on
// mobile devices will require some experimentation.

// |ComputePass|
void ComputePassVK::AddBufferMemoryBarrier() {
vk::MemoryBarrier barrier;
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These to me seem underspecified. Right now, this says "All commands before the barrier must have buffer-writes in their compute shaders be visible to buffer-reads in also the compute shaders of commands after the barrier".

The same can apply to transfer read-write (VK_ACCESS_TRANSFER_READ|WRITE_BIT in VK_PIPELINE_STAGE_TRANSFER_BIT) in the transfer stage in addition to any compute-render dependencies.

You can just or them up. But I suggest a comment indicating each value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would also match up with Metal semantics implemented above.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would I need to specify transfer dependencies in a barrier API that is specifically for compute-compute dependencies?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I clarified on the API docs that it is only for compute-compute dependencies, but I can add a caveat that it does not cover other cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair. But the first compute pass still needs to wait for say transfer writes on the buffers it works on to be visible. So my expectation would be that compute-compute dependencies while necessary are not sufficient.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use transfer commands anywhere yet though. my expectation is that if we needed this we would add this blit/transfer -> compute barrier functionality to the blit pass and not to the compute pass.

barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;

command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
}

// |ComputePass|
void ComputePassVK::AddTextureMemoryBarrier() {
vk::MemoryBarrier barrier;
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;

command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
}

// |ComputePass|
bool ComputePassVK::EncodeCommands() const {
// Since we only use global memory barrier, we don't have to worry about
// compute to compute dependencies across cmd buffers. Instead, we pessimize
// here and assume that we wrote to a storage image or buffer and that a
// render pass will read from it. if there are ever scenarios where we end up
// with compute to compute dependencies this should be revisited.

// This does not currently handle image barriers as we do not use them
// for anything.
vk::MemoryBarrier barrier;
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
barrier.dstAccessMask =
vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead;

command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eVertexInput, {}, 1, &barrier, 0, {}, 0, {});

return true;
}

Expand Down
6 changes: 6 additions & 0 deletions impeller/renderer/backend/vulkan/compute_pass_vk.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class ComputePassVK final : public ComputePass {
void SetPipeline(const std::shared_ptr<Pipeline<ComputePipelineDescriptor>>&
pipeline) override;

// |ComputePass|
void AddBufferMemoryBarrier() override;

// |ComputePass|
void AddTextureMemoryBarrier() override;

// |ComputePass|
fml::Status Compute(const ISize& grid_size) override;

Expand Down
18 changes: 18 additions & 0 deletions impeller/renderer/compute_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,22 @@ class ComputePass : public ResourceBinder {

virtual fml::Status Compute(const ISize& grid_size) = 0;

/// @brief Ensures all previously encoded compute command's buffer writes are
/// visible to any subsequent compute commands.
///
/// On Vulkan, it does not matter if the compute command is in a
/// different command buffer, only that it is executed later in queue
/// order.
virtual void AddBufferMemoryBarrier() = 0;

/// @brief Ensures all previously encoded compute command's texture writes are
/// visible to any subsequent compute commands.
///
/// On Vulkan, it does not matter if the compute command is in a
/// different command buffer, only that it is executed later in queue
/// order.
virtual void AddTextureMemoryBarrier() = 0;

//----------------------------------------------------------------------------
/// @brief Encode the recorded commands to the underlying command buffer.
///
Expand All @@ -43,6 +59,8 @@ class ComputePass : public ResourceBinder {
///
virtual bool EncodeCommands() const = 0;

const Context& GetContext() const { return *context_; }

protected:
const std::shared_ptr<const Context> context_;

Expand Down
1 change: 1 addition & 0 deletions impeller/renderer/compute_unittests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ TEST_P(ComputeTest, MultiStageInputAndOutput) {
CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));

ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
pass->AddBufferMemoryBarrier();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're now running concurrently, we need a memory barrier here so that step 2 sees the output of step 1.

}

{
Expand Down