Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit 22d3abd

Browse files
author
Jonah Williams
authored
[Impeller] Fix GLES gaussian implementation. (#55329)
Fixes flutter/flutter#142355 problems: no uniform structs, no int uniforms, buffer bindings dont work when the struct type doesn't match the uniform name �
1 parent e6dd280 commit 22d3abd

7 files changed

Lines changed: 99 additions & 75 deletions

File tree

impeller/entity/contents/filters/gaussian_blur_filter_contents.cc

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "impeller/entity/texture_downsample.frag.h"
1313
#include "impeller/entity/texture_fill.frag.h"
1414
#include "impeller/entity/texture_fill.vert.h"
15+
#include "impeller/geometry/color.h"
1516
#include "impeller/renderer/render_pass.h"
1617
#include "impeller/renderer/vertex_buffer_builder.h"
1718

@@ -316,7 +317,7 @@ DownsamplePassArgs CalculateDownsamplePassArgs(
316317
fml::StatusOr<RenderTarget> MakeDownsampleSubpass(
317318
const ContentContext& renderer,
318319
const std::shared_ptr<CommandBuffer>& command_buffer,
319-
std::shared_ptr<Texture> input_texture,
320+
const std::shared_ptr<Texture>& input_texture,
320321
const SamplerDescriptor& sampler_descriptor,
321322
const DownsamplePassArgs& pass_args,
322323
Entity::TileMode tile_mode) {
@@ -338,7 +339,8 @@ fml::StatusOr<RenderTarget> MakeDownsampleSubpass(
338339

339340
TextureFillVertexShader::FrameInfo frame_info;
340341
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
341-
frame_info.texture_sampler_y_coord_scale = 1.0;
342+
frame_info.texture_sampler_y_coord_scale =
343+
input_texture->GetYCoordScale();
342344

343345
TextureFillFragmentShader::FragInfo frag_info;
344346
frag_info.alpha = 1.0;
@@ -391,7 +393,8 @@ fml::StatusOr<RenderTarget> MakeDownsampleSubpass(
391393

392394
TextureFillVertexShader::FrameInfo frame_info;
393395
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
394-
frame_info.texture_sampler_y_coord_scale = 1.0;
396+
frame_info.texture_sampler_y_coord_scale =
397+
input_texture->GetYCoordScale();
395398

396399
TextureDownsampleFragmentShader::FragInfo frag_info;
397400
frag_info.edge = edge;
@@ -442,16 +445,18 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
442445
return input_pass;
443446
}
444447

445-
std::shared_ptr<Texture> input_texture = input_pass.GetRenderTargetTexture();
448+
const std::shared_ptr<Texture>& input_texture =
449+
input_pass.GetRenderTargetTexture();
446450

447451
// TODO(gaaclarke): This blurs the whole image, but because we know the clip
448452
// region we could focus on just blurring that.
449453
ISize subpass_size = input_texture->GetSize();
450454
ContentContext::SubpassCallback subpass_callback =
451455
[&](const ContentContext& renderer, RenderPass& pass) {
452-
GaussianBlurVertexShader::FrameInfo frame_info{
453-
.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
454-
.texture_sampler_y_coord_scale = 1.0};
456+
GaussianBlurVertexShader::FrameInfo frame_info;
457+
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
458+
frame_info.texture_sampler_y_coord_scale =
459+
input_texture->GetYCoordScale();
455460

456461
HostBuffer& host_buffer = renderer.GetTransientsBuffer();
457462

@@ -476,11 +481,9 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
476481
linear_sampler_descriptor));
477482
GaussianBlurVertexShader::BindFrameInfo(
478483
pass, host_buffer.EmplaceUniform(frame_info));
479-
GaussianBlurPipeline::FragmentShader::KernelSamples kernel_samples =
480-
LerpHackKernelSamples(GenerateBlurInfo(blur_info));
481-
FML_CHECK(kernel_samples.sample_count <= kGaussianBlurMaxKernelSize);
482484
GaussianBlurFragmentShader::BindKernelSamples(
483-
pass, host_buffer.EmplaceUniform(kernel_samples));
485+
pass, host_buffer.EmplaceUniform(
486+
LerpHackKernelSamples(GenerateBlurInfo(blur_info))));
484487
return pass.Draw().ok();
485488
};
486489
if (destination_target.has_value()) {
@@ -893,7 +896,7 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) {
893896
Scalar tally = 0.0f;
894897
for (int i = 0; i < result.sample_count; ++i) {
895898
int x = x_offset + (i * parameters.step_size) - parameters.blur_radius;
896-
result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
899+
result.samples[i] = KernelSample{
897900
.uv_offset = parameters.blur_uv_offset * x,
898901
.coefficient = expf(-0.5f * (x * x) /
899902
(parameters.blur_sigma * parameters.blur_sigma)) /
@@ -914,25 +917,31 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) {
914917
// between the samples.
915918
GaussianBlurPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
916919
KernelSamples parameters) {
917-
GaussianBlurPipeline::FragmentShader::KernelSamples result;
920+
GaussianBlurPipeline::FragmentShader::KernelSamples result = {};
918921
result.sample_count = ((parameters.sample_count - 1) / 2) + 1;
919922
int32_t middle = result.sample_count / 2;
920923
int32_t j = 0;
921924
FML_DCHECK(result.sample_count <= kGaussianBlurMaxKernelSize);
925+
static_assert(sizeof(result.sample_data) ==
926+
sizeof(std::array<Vector4, kGaussianBlurMaxKernelSize>));
927+
922928
for (int i = 0; i < result.sample_count; i++) {
923929
if (i == middle) {
924-
result.samples[i] = parameters.samples[j++];
930+
result.sample_data[i].x = parameters.samples[j].uv_offset.x;
931+
result.sample_data[i].y = parameters.samples[j].uv_offset.y;
932+
result.sample_data[i].z = parameters.samples[j].coefficient;
933+
j++;
925934
} else {
926-
GaussianBlurPipeline::FragmentShader::KernelSample left =
927-
parameters.samples[j];
928-
GaussianBlurPipeline::FragmentShader::KernelSample right =
929-
parameters.samples[j + 1];
930-
result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
931-
.uv_offset = (left.uv_offset * left.coefficient +
932-
right.uv_offset * right.coefficient) /
933-
(left.coefficient + right.coefficient),
934-
.coefficient = left.coefficient + right.coefficient,
935-
};
935+
KernelSample left = parameters.samples[j];
936+
KernelSample right = parameters.samples[j + 1];
937+
938+
result.sample_data[i].z = left.coefficient + right.coefficient;
939+
940+
Point uv = (left.uv_offset * left.coefficient +
941+
right.uv_offset * right.coefficient) /
942+
(left.coefficient + right.coefficient);
943+
result.sample_data[i].x = uv.x;
944+
result.sample_data[i].y = uv.y;
936945
j += 2;
937946
}
938947
}

impeller/entity/contents/filters/gaussian_blur_filter_contents.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,28 @@
99
#include "impeller/entity/contents/content_context.h"
1010
#include "impeller/entity/contents/filters/filter_contents.h"
1111
#include "impeller/entity/geometry/geometry.h"
12+
#include "impeller/geometry/color.h"
1213

1314
namespace impeller {
1415

1516
// Comes from gaussian.frag.
1617
static constexpr int32_t kGaussianBlurMaxKernelSize = 50;
1718

19+
static_assert(sizeof(GaussianBlurPipeline::FragmentShader::KernelSamples) ==
20+
sizeof(Vector4) * kGaussianBlurMaxKernelSize + sizeof(Vector4));
21+
1822
struct BlurParameters {
1923
Point blur_uv_offset;
2024
Scalar blur_sigma;
2125
int blur_radius;
2226
int step_size;
2327
};
2428

29+
struct KernelSample {
30+
Vector2 uv_offset;
31+
float coefficient;
32+
};
33+
2534
/// A larger mirror of GaussianBlurPipeline::FragmentShader::KernelSamples.
2635
///
2736
/// This is a mirror of GaussianBlurPipeline::FragmentShader::KernelSamples that
@@ -30,7 +39,7 @@ struct BlurParameters {
3039
struct KernelSamples {
3140
static constexpr int kMaxKernelSize = kGaussianBlurMaxKernelSize * 2;
3241
int sample_count;
33-
GaussianBlurPipeline::FragmentShader::KernelSample samples[kMaxKernelSize];
42+
KernelSample samples[kMaxKernelSize];
3443
};
3544

3645
KernelSamples GenerateBlurInfo(BlurParameters parameters);

impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "impeller/entity/contents/filters/gaussian_blur_filter_contents.h"
1010
#include "impeller/entity/contents/texture_contents.h"
1111
#include "impeller/entity/entity_playground.h"
12+
#include "impeller/geometry/color.h"
1213
#include "impeller/geometry/geometry_asserts.h"
1314
#include "impeller/renderer/testing/mocks.h"
1415

@@ -51,6 +52,14 @@ fml::StatusOr<float> LowerBoundNewtonianMethod(
5152
return x;
5253
}
5354

55+
Scalar GetCoefficient(const Vector4& vec) {
56+
return vec.z;
57+
}
58+
59+
Vector2 GetUVOffset(const Vector4& vec) {
60+
return vec.xy();
61+
}
62+
5463
fml::StatusOr<Scalar> CalculateSigmaForBlurRadius(
5564
Scalar radius,
5665
const Matrix& effect_transform) {
@@ -508,27 +517,24 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
508517
},
509518
};
510519

511-
GaussianBlurPipeline::FragmentShader::KernelSamples fast_kernel_samples =
520+
GaussianBlurPipeline::FragmentShader::KernelSamples blur_info =
512521
LerpHackKernelSamples(kernel_samples);
513-
EXPECT_EQ(fast_kernel_samples.sample_count, 3);
522+
EXPECT_EQ(blur_info.sample_count, 3);
514523

515-
GaussianBlurPipeline::FragmentShader::KernelSample* samples =
516-
kernel_samples.samples;
517-
GaussianBlurPipeline::FragmentShader::KernelSample* fast_samples =
518-
fast_kernel_samples.samples;
524+
KernelSample* samples = kernel_samples.samples;
519525

520526
//////////////////////////////////////////////////////////////////////////////
521527
// Check output kernel.
522528

523-
EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333);
524-
EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0);
525-
EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3);
526-
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0);
527-
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0);
528-
EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4);
529-
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333);
530-
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0);
531-
EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3);
529+
EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[0]),
530+
Point(-1.3333333, 0));
531+
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[0]), 0.3);
532+
533+
EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[1]), Point(0, 0));
534+
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[1]), 0.4);
535+
536+
EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[2]), Point(1.333333, 0));
537+
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[2]), 0.3);
532538

533539
//////////////////////////////////////////////////////////////////////////////
534540
// Check output of fast kernel versus original kernel.
@@ -549,11 +555,11 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
549555
}
550556
};
551557
Scalar fast_output =
552-
/*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) *
553-
fast_samples[0].coefficient +
554-
/*2nd*/ data[2] * fast_samples[1].coefficient +
555-
/*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) *
556-
fast_samples[2].coefficient;
558+
/*1st*/ lerp(GetUVOffset(blur_info.sample_data[0]), data[0], data[1]) *
559+
GetCoefficient(blur_info.sample_data[0]) +
560+
/*2nd*/ data[2] * GetCoefficient(blur_info.sample_data[1]) +
561+
/*3rd*/ lerp(GetUVOffset(blur_info.sample_data[2]), data[3], data[4]) *
562+
GetCoefficient(blur_info.sample_data[2]);
557563

558564
EXPECT_NEAR(original_output, fast_output, 0.01);
559565
}
@@ -604,9 +610,9 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) {
604610
}
605611

606612
Scalar fast_output = 0.0;
607-
for (int i = 0; i < fast_kernel_samples.sample_count; ++i) {
608-
auto sample = fast_kernel_samples.samples[i];
609-
fast_output += sample.coefficient * sampler(sample.uv_offset);
613+
for (int i = 0; i < fast_kernel_samples.sample_count; i++) {
614+
fast_output += GetCoefficient(fast_kernel_samples.sample_data[i]) *
615+
sampler(GetUVOffset(fast_kernel_samples.sample_data[i]));
610616
}
611617

612618
EXPECT_NEAR(output, fast_output, 0.1);

impeller/entity/shaders/filters/gaussian.frag

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,13 @@ uniform f16sampler2D texture_sampler;
1111

1212
layout(constant_id = 0) const float supports_decal = 1.0;
1313

14-
struct KernelSample {
15-
vec2 uv_offset;
16-
float coefficient;
17-
};
18-
1914
uniform KernelSamples {
20-
int sample_count;
21-
KernelSample samples[50];
15+
float sample_count;
16+
17+
// X, Y are uv offset and Z is Coefficient. W is padding.
18+
vec4 sample_data[50];
2219
}
23-
blur_info;
20+
kernel_samples;
2421

2522
f16vec4 Sample(f16sampler2D tex, vec2 coords) {
2623
if (supports_decal == 1.0) {
@@ -36,11 +33,11 @@ out f16vec4 frag_color;
3633
void main() {
3734
f16vec4 total_color = f16vec4(0.0hf);
3835

39-
for (int i = 0; i < blur_info.sample_count; ++i) {
40-
float16_t coefficient = float16_t(blur_info.samples[i].coefficient);
41-
total_color +=
42-
coefficient * Sample(texture_sampler,
43-
v_texture_coords + blur_info.samples[i].uv_offset);
36+
for (int i = 0; i < int(kernel_samples.sample_count); i++) {
37+
float16_t coefficient = float16_t(kernel_samples.sample_data[i].z);
38+
total_color += coefficient *
39+
Sample(texture_sampler,
40+
v_texture_coords + kernel_samples.sample_data[i].xy);
4441
}
4542

4643
frag_color = total_color;

impeller/geometry/vector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ struct Vector4 {
310310
return *this + (v - *this) * t;
311311
}
312312

313+
constexpr Vector2 xy() const { return Vector2(x, y); }
314+
313315
std::string ToString() const;
314316
};
315317

impeller/renderer/backend/gles/buffer_bindings_gles.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -279,20 +279,20 @@ bool BufferBindingsGLES::BindUniformBuffer(const ProcTableGLES& gl,
279279
auto* buffer_data =
280280
reinterpret_cast<const GLfloat*>(buffer_ptr + member.offset);
281281

282-
std::vector<uint8_t> array_element_buffer;
283-
if (element_count > 1) {
284-
// When binding uniform arrays, the elements must be contiguous. Copy
285-
// the uniforms to a temp buffer to eliminate any padding needed by the
286-
// other backends.
287-
array_element_buffer.resize(member.size * element_count);
282+
// When binding uniform arrays, the elements must be contiguous. Copy
283+
// the uniforms to a temp buffer to eliminate any padding needed by the
284+
// other backends if the array elements have padding.
285+
std::vector<uint8_t> array_element_buffer_;
286+
if (element_count > 1 && element_stride != member.size) {
287+
array_element_buffer_.resize(member.size * element_count);
288288
for (size_t element_i = 0; element_i < element_count; element_i++) {
289-
std::memcpy(array_element_buffer.data() + element_i * member.size,
289+
std::memcpy(array_element_buffer_.data() + element_i * member.size,
290290
reinterpret_cast<const char*>(buffer_data) +
291291
element_i * element_stride,
292292
member.size);
293293
}
294294
buffer_data =
295-
reinterpret_cast<const GLfloat*>(array_element_buffer.data());
295+
reinterpret_cast<const GLfloat*>(array_element_buffer_.data());
296296
}
297297

298298
switch (member.type) {

impeller/tools/malioc.json

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2581,9 +2581,9 @@
25812581
"arith_cvt"
25822582
],
25832583
"shortest_path_cycles": [
2584-
0.109375,
2584+
0.09375,
25852585
0.0,
2586-
0.109375,
2586+
0.09375,
25872587
0.0,
25882588
0.0,
25892589
0.0,
@@ -2593,11 +2593,11 @@
25932593
"load_store"
25942594
],
25952595
"total_cycles": [
2596-
0.3125,
2596+
0.265625,
25972597
0.09375,
2598-
0.3125,
2598+
0.265625,
25992599
0.0,
2600-
2.0,
2600+
1.0,
26012601
0.25,
26022602
0.25
26032603
]
@@ -2641,10 +2641,11 @@
26412641
0.0
26422642
],
26432643
"total_bound_pipelines": [
2644+
"arithmetic",
26442645
"load_store"
26452646
],
26462647
"total_cycles": [
2647-
1.6666666269302368,
2648+
2.0,
26482649
2.0,
26492650
1.0
26502651
]

0 commit comments

Comments
 (0)