Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 32 additions & 26 deletions impeller/entity/contents/filters/gaussian_blur_filter_contents.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ DownsamplePassArgs CalculateDownsamplePassArgs(
fml::StatusOr<RenderTarget> MakeDownsampleSubpass(
const ContentContext& renderer,
const std::shared_ptr<CommandBuffer>& command_buffer,
std::shared_ptr<Texture> input_texture,
const std::shared_ptr<Texture>& input_texture,
const SamplerDescriptor& sampler_descriptor,
const DownsamplePassArgs& pass_args,
Entity::TileMode tile_mode) {
Expand All @@ -345,7 +345,8 @@ fml::StatusOr<RenderTarget> MakeDownsampleSubpass(

TextureFillVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
frame_info.texture_sampler_y_coord_scale = 1.0;
frame_info.texture_sampler_y_coord_scale =
input_texture->GetYCoordScale();

TextureFillFragmentShader::FragInfo frag_info;
frag_info.alpha = 1.0;
Expand Down Expand Up @@ -398,7 +399,8 @@ fml::StatusOr<RenderTarget> MakeDownsampleSubpass(

TextureFillVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
frame_info.texture_sampler_y_coord_scale = 1.0;
frame_info.texture_sampler_y_coord_scale =
input_texture->GetYCoordScale();

TextureDownsampleFragmentShader::FragInfo frag_info;
frag_info.edge = edge;
Expand Down Expand Up @@ -447,16 +449,18 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
return input_pass;
}

std::shared_ptr<Texture> input_texture = input_pass.GetRenderTargetTexture();
const std::shared_ptr<Texture>& input_texture =
input_pass.GetRenderTargetTexture();

// TODO(gaaclarke): This blurs the whole image, but because we know the clip
// region we could focus on just blurring that.
ISize subpass_size = input_texture->GetSize();
ContentContext::SubpassCallback subpass_callback =
[&](const ContentContext& renderer, RenderPass& pass) {
GaussianBlurVertexShader::FrameInfo frame_info{
.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
.texture_sampler_y_coord_scale = 1.0};
GaussianBlurVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
frame_info.texture_sampler_y_coord_scale =
input_texture->GetYCoordScale();

HostBuffer& host_buffer = renderer.GetTransientsBuffer();

Expand All @@ -481,11 +485,9 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
linear_sampler_descriptor));
GaussianBlurVertexShader::BindFrameInfo(
pass, host_buffer.EmplaceUniform(frame_info));
GaussianBlurPipeline::FragmentShader::KernelSamples kernel_samples =
LerpHackKernelSamples(GenerateBlurInfo(blur_info));
FML_CHECK(kernel_samples.sample_count <= kGaussianBlurMaxKernelSize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please bring back this check. It's important because we'll crash anyways if we violate this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is checked in the LerpHackFunction too, I don't think the count can change after that point?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay yea, in that case I'd rather keep this one here since it is the final point before we'd actually get the error. We can remove the lerp hack one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the kernel samples UBO has a std::array of a fixed size. It cannot be greater than kGaussianBlurMaxKernelSize

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could static assert this though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yea, the sample_count isn't even consequential here because emplace isn't looking at it. Lets keep the runtime DCHECK in the lerp hack and a static assert sounds good to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

GaussianBlurFragmentShader::BindKernelSamples(
pass, host_buffer.EmplaceUniform(kernel_samples));
GaussianBlurFragmentShader::BindBlurInfo(
pass, host_buffer.EmplaceUniform(
LerpHackKernelSamples(GenerateBlurInfo(blur_info))));
return pass.Draw().ok();
};
if (destination_target.has_value()) {
Expand Down Expand Up @@ -898,7 +900,7 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) {
Scalar tally = 0.0f;
for (int i = 0; i < result.sample_count; ++i) {
int x = x_offset + (i * parameters.step_size) - parameters.blur_radius;
result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
result.samples[i] = KernelSample{
.uv_offset = parameters.blur_uv_offset * x,
.coefficient = expf(-0.5f * (x * x) /
(parameters.blur_sigma * parameters.blur_sigma)) /
Expand All @@ -917,27 +919,31 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) {

// This works by shrinking the kernel size by 2 and relying on lerp to read
// between the samples.
GaussianBlurPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
GaussianBlurPipeline::FragmentShader::BlurInfo LerpHackKernelSamples(
KernelSamples parameters) {
GaussianBlurPipeline::FragmentShader::KernelSamples result;
GaussianBlurPipeline::FragmentShader::BlurInfo result = {};
result.sample_count = ((parameters.sample_count - 1) / 2) + 1;
int32_t middle = result.sample_count / 2;
int32_t j = 0;
FML_DCHECK(result.sample_count <= kGaussianBlurMaxKernelSize);

for (int i = 0; i < result.sample_count; i++) {
if (i == middle) {
result.samples[i] = parameters.samples[j++];
result.sample_data[i].x = parameters.samples[j].uv_offset.x;
result.sample_data[i].y = parameters.samples[j].uv_offset.y;
result.sample_data[i].z = parameters.samples[j].coefficient;
j++;
} else {
GaussianBlurPipeline::FragmentShader::KernelSample left =
parameters.samples[j];
GaussianBlurPipeline::FragmentShader::KernelSample right =
parameters.samples[j + 1];
result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
.uv_offset = (left.uv_offset * left.coefficient +
right.uv_offset * right.coefficient) /
(left.coefficient + right.coefficient),
.coefficient = left.coefficient + right.coefficient,
};
KernelSample left = parameters.samples[j];
KernelSample right = parameters.samples[j + 1];

result.sample_data[i].z = left.coefficient + right.coefficient;

auto uv = (left.uv_offset * left.coefficient +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto uv = (left.uv_offset * left.coefficient +
Point uv = (left.uv_offset * left.coefficient +

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

right.uv_offset * right.coefficient) /
(left.coefficient + right.coefficient);
result.sample_data[i].x = uv.x;
result.sample_data[i].y = uv.y;
j += 2;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ struct BlurParameters {
int step_size;
};

struct KernelSample {
Vector2 uv_offset;
float coefficient;
};

/// A larger mirror of GaussianBlurPipeline::FragmentShader::KernelSamples.
///
/// This is a mirror of GaussianBlurPipeline::FragmentShader::KernelSamples that
Expand All @@ -30,14 +35,14 @@ struct BlurParameters {
struct KernelSamples {
static constexpr int kMaxKernelSize = kGaussianBlurMaxKernelSize * 2;
int sample_count;
GaussianBlurPipeline::FragmentShader::KernelSample samples[kMaxKernelSize];
KernelSample samples[kMaxKernelSize];
};

KernelSamples GenerateBlurInfo(BlurParameters parameters);

/// This will shrink the size of a kernel by roughly half by sampling between
/// samples and relying on linear interpolation between the samples.
GaussianBlurPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
GaussianBlurPipeline::FragmentShader::BlurInfo LerpHackKernelSamples(
KernelSamples samples);

/// Performs a bidirectional Gaussian blur.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,27 +508,24 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
},
};

GaussianBlurPipeline::FragmentShader::KernelSamples fast_kernel_samples =
GaussianBlurPipeline::FragmentShader::BlurInfo blur_info =
LerpHackKernelSamples(kernel_samples);
EXPECT_EQ(fast_kernel_samples.sample_count, 3);
EXPECT_EQ(blur_info.sample_count, 3);

GaussianBlurPipeline::FragmentShader::KernelSample* samples =
kernel_samples.samples;
GaussianBlurPipeline::FragmentShader::KernelSample* fast_samples =
fast_kernel_samples.samples;
KernelSample* samples = kernel_samples.samples;

//////////////////////////////////////////////////////////////////////////////
// Check output kernel.

EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333);
EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3);
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0);
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4);
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333);
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3);
EXPECT_FLOAT_EQ(blur_info.sample_data[0].x, -1.3333333);
EXPECT_FLOAT_EQ(blur_info.sample_data[0].y, 0);
EXPECT_FLOAT_EQ(blur_info.sample_data[0].z, 0.3);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EXPECT_FLOAT_EQ(blur_info.sample_data[0].z, 0.3);
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[0]), 0.3);

I think we would be better off not losing the names of values. It's very confusing to have to remember that .z means "get the coefficient". Can we codify that into some tiny functions?

Same thing for uv_offset.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

EXPECT_FLOAT_EQ(blur_info.sample_data[1].x, 0);
EXPECT_FLOAT_EQ(blur_info.sample_data[1].y, 0);
EXPECT_FLOAT_EQ(blur_info.sample_data[1].z, 0.4);
EXPECT_FLOAT_EQ(blur_info.sample_data[2].x, 1.3333333);
EXPECT_FLOAT_EQ(blur_info.sample_data[2].y, 0);
EXPECT_FLOAT_EQ(blur_info.sample_data[2].z, 0.3);

//////////////////////////////////////////////////////////////////////////////
// Check output of fast kernel versus original kernel.
Expand All @@ -549,11 +546,11 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
}
};
Scalar fast_output =
/*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) *
fast_samples[0].coefficient +
/*2nd*/ data[2] * fast_samples[1].coefficient +
/*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) *
fast_samples[2].coefficient;
/*1st*/ lerp(blur_info.sample_data[0].xy(), data[0], data[1]) *
blur_info.sample_data[0].z +
/*2nd*/ data[2] * blur_info.sample_data[1].z +
/*3rd*/ lerp(blur_info.sample_data[2].xy(), data[3], data[4]) *
blur_info.sample_data[2].z;

EXPECT_NEAR(original_output, fast_output, 0.01);
}
Expand All @@ -568,7 +565,7 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) {
.step_size = 1};
KernelSamples kernel_samples = GenerateBlurInfo(parameters);
EXPECT_EQ(kernel_samples.sample_count, 33);
GaussianBlurPipeline::FragmentShader::KernelSamples fast_kernel_samples =
GaussianBlurPipeline::FragmentShader::BlurInfo fast_kernel_samples =
LerpHackKernelSamples(kernel_samples);
EXPECT_EQ(fast_kernel_samples.sample_count, 17);
float data[33];
Expand Down Expand Up @@ -604,9 +601,9 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) {
}

Scalar fast_output = 0.0;
for (int i = 0; i < fast_kernel_samples.sample_count; ++i) {
auto sample = fast_kernel_samples.samples[i];
fast_output += sample.coefficient * sampler(sample.uv_offset);
for (int i = 0; i < fast_kernel_samples.sample_count; i++) {
fast_output += fast_kernel_samples.sample_data[i].z *
sampler(fast_kernel_samples.sample_data[i].xy());
}

EXPECT_NEAR(output, fast_output, 0.1);
Expand All @@ -621,7 +618,7 @@ TEST(GaussianBlurFilterContentsTest, ChopHugeBlurs) {
.blur_radius = blur_radius,
.step_size = 1};
KernelSamples kernel_samples = GenerateBlurInfo(parameters);
GaussianBlurPipeline::FragmentShader::KernelSamples frag_kernel_samples =
GaussianBlurPipeline::FragmentShader::BlurInfo frag_kernel_samples =
LerpHackKernelSamples(kernel_samples);
EXPECT_TRUE(frag_kernel_samples.sample_count <= kGaussianBlurMaxKernelSize);
}
Expand Down
21 changes: 9 additions & 12 deletions impeller/entity/shaders/filters/gaussian.frag
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@ uniform f16sampler2D texture_sampler;

layout(constant_id = 0) const float supports_decal = 1.0;

struct KernelSample {
vec2 uv_offset;
float coefficient;
};

uniform KernelSamples {
int sample_count;
KernelSample samples[50];
uniform BlurInfo {
float sample_count;

// X, Y are uv offset and Z is Coefficient. W is padding.
vec4 sample_data[50];
}
blur_info;

Expand All @@ -36,11 +33,11 @@ out f16vec4 frag_color;
void main() {
f16vec4 total_color = f16vec4(0.0hf);

for (int i = 0; i < blur_info.sample_count; ++i) {
float16_t coefficient = float16_t(blur_info.samples[i].coefficient);
for (int i = 0; i < int(blur_info.sample_count); i++) {
float16_t coefficient = float16_t(blur_info.sample_data[i].z);
total_color +=
coefficient * Sample(texture_sampler,
v_texture_coords + blur_info.samples[i].uv_offset);
coefficient *
Sample(texture_sampler, v_texture_coords + blur_info.sample_data[i].xy);
}

frag_color = total_color;
Expand Down
2 changes: 2 additions & 0 deletions impeller/geometry/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ struct Vector4 {
return *this + (v - *this) * t;
}

constexpr Vector2 xy() const { return Vector2(x, y); }

std::string ToString() const;
};

Expand Down
15 changes: 7 additions & 8 deletions impeller/renderer/backend/gles/buffer_bindings_gles.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,20 +279,19 @@ bool BufferBindingsGLES::BindUniformBuffer(const ProcTableGLES& gl,
auto* buffer_data =
reinterpret_cast<const GLfloat*>(buffer_ptr + member.offset);

std::vector<uint8_t> array_element_buffer;
if (element_count > 1) {
// When binding uniform arrays, the elements must be contiguous. Copy
// the uniforms to a temp buffer to eliminate any padding needed by the
// other backends.
array_element_buffer.resize(member.size * element_count);
// When binding uniform arrays, the elements must be contiguous. Copy
// the uniforms to a temp buffer to eliminate any padding needed by the
// other backends if the array elements have padding.
if (element_count > 1 && element_stride != member.size) {
array_element_buffer_.resize(member.size * element_count);
for (size_t element_i = 0; element_i < element_count; element_i++) {
std::memcpy(array_element_buffer.data() + element_i * member.size,
std::memcpy(array_element_buffer_.data() + element_i * member.size,
reinterpret_cast<const char*>(buffer_data) +
element_i * element_stride,
member.size);
}
buffer_data =
reinterpret_cast<const GLfloat*>(array_element_buffer.data());
reinterpret_cast<const GLfloat*>(array_element_buffer_.data());
}

switch (member.type) {
Expand Down
1 change: 1 addition & 0 deletions impeller/renderer/backend/gles/buffer_bindings_gles.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class BufferBindingsGLES {
std::vector<VertexAttribPointer> vertex_attrib_arrays_;

std::unordered_map<std::string, GLint> uniform_locations_;
std::vector<uint8_t> array_element_buffer_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we adding this as an instance variable? I'd rather keep it localized to where it is used to avoid stale data bugs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to avoid creating the empty vector, since I can't safely conditionally initialize it. But that is probably not a worthwhile change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed


using BindingMap = std::unordered_map<std::string, std::vector<GLint>>;
BindingMap binding_map_ = {};
Expand Down
13 changes: 7 additions & 6 deletions impeller/tools/malioc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2581,9 +2581,9 @@
"arith_cvt"
],
"shortest_path_cycles": [
0.109375,
0.09375,
0.0,
0.109375,
0.09375,
0.0,
0.0,
0.0,
Expand All @@ -2593,11 +2593,11 @@
"load_store"
],
"total_cycles": [
0.3125,
0.265625,
0.09375,
0.3125,
0.265625,
0.0,
2.0,
1.0,
0.25,
0.25
]
Expand Down Expand Up @@ -2641,10 +2641,11 @@
0.0
],
"total_bound_pipelines": [
"arithmetic",
"load_store"
],
"total_cycles": [
1.6666666269302368,
2.0,
2.0,
1.0
]
Expand Down