Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 33 additions & 24 deletions impeller/entity/contents/filters/gaussian_blur_filter_contents.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "impeller/entity/texture_downsample.frag.h"
#include "impeller/entity/texture_fill.frag.h"
#include "impeller/entity/texture_fill.vert.h"
#include "impeller/geometry/color.h"
#include "impeller/renderer/render_pass.h"
#include "impeller/renderer/vertex_buffer_builder.h"

Expand Down Expand Up @@ -325,7 +326,7 @@ DownsamplePassArgs CalculateDownsamplePassArgs(
fml::StatusOr<RenderTarget> MakeDownsampleSubpass(
const ContentContext& renderer,
const std::shared_ptr<CommandBuffer>& command_buffer,
std::shared_ptr<Texture> input_texture,
const std::shared_ptr<Texture>& input_texture,
const SamplerDescriptor& sampler_descriptor,
const DownsamplePassArgs& pass_args,
Entity::TileMode tile_mode) {
Expand All @@ -345,7 +346,8 @@ fml::StatusOr<RenderTarget> MakeDownsampleSubpass(

TextureFillVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
frame_info.texture_sampler_y_coord_scale = 1.0;
frame_info.texture_sampler_y_coord_scale =
input_texture->GetYCoordScale();

TextureFillFragmentShader::FragInfo frag_info;
frag_info.alpha = 1.0;
Expand Down Expand Up @@ -398,7 +400,8 @@ fml::StatusOr<RenderTarget> MakeDownsampleSubpass(

TextureFillVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
frame_info.texture_sampler_y_coord_scale = 1.0;
frame_info.texture_sampler_y_coord_scale =
input_texture->GetYCoordScale();

TextureDownsampleFragmentShader::FragInfo frag_info;
frag_info.edge = edge;
Expand Down Expand Up @@ -447,16 +450,18 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
return input_pass;
}

std::shared_ptr<Texture> input_texture = input_pass.GetRenderTargetTexture();
const std::shared_ptr<Texture>& input_texture =
input_pass.GetRenderTargetTexture();

// TODO(gaaclarke): This blurs the whole image, but because we know the clip
// region we could focus on just blurring that.
ISize subpass_size = input_texture->GetSize();
ContentContext::SubpassCallback subpass_callback =
[&](const ContentContext& renderer, RenderPass& pass) {
GaussianBlurVertexShader::FrameInfo frame_info{
.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
.texture_sampler_y_coord_scale = 1.0};
GaussianBlurVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
frame_info.texture_sampler_y_coord_scale =
input_texture->GetYCoordScale();

HostBuffer& host_buffer = renderer.GetTransientsBuffer();

Expand All @@ -481,11 +486,9 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
linear_sampler_descriptor));
GaussianBlurVertexShader::BindFrameInfo(
pass, host_buffer.EmplaceUniform(frame_info));
GaussianBlurPipeline::FragmentShader::KernelSamples kernel_samples =
LerpHackKernelSamples(GenerateBlurInfo(blur_info));
FML_CHECK(kernel_samples.sample_count <= kGaussianBlurMaxKernelSize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please bring back this check. It's important because we'll crash anyways if we violate this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is checked in the LerpHackFunction too, I don't think the count can change after that point?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay yea, in that case I'd rather keep this one here since it is the final point before we'd actually get the error. We can remove the lerp hack one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the kernel samples UBO has a std::array of a fixed size. It cannot be greater than kGaussianBlurMaxKernelSize

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could static assert this though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yea, the sample_count isn't even consequential here because emplace isn't looking at it. Lets keep the runtime DCHECK in the lerp hack and a static assert sounds good to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

GaussianBlurFragmentShader::BindKernelSamples(
pass, host_buffer.EmplaceUniform(kernel_samples));
pass, host_buffer.EmplaceUniform(
LerpHackKernelSamples(GenerateBlurInfo(blur_info))));
return pass.Draw().ok();
};
if (destination_target.has_value()) {
Expand Down Expand Up @@ -898,7 +901,7 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) {
Scalar tally = 0.0f;
for (int i = 0; i < result.sample_count; ++i) {
int x = x_offset + (i * parameters.step_size) - parameters.blur_radius;
result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
result.samples[i] = KernelSample{
.uv_offset = parameters.blur_uv_offset * x,
.coefficient = expf(-0.5f * (x * x) /
(parameters.blur_sigma * parameters.blur_sigma)) /
Expand All @@ -919,25 +922,31 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) {
// between the samples.
GaussianBlurPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
KernelSamples parameters) {
GaussianBlurPipeline::FragmentShader::KernelSamples result;
GaussianBlurPipeline::FragmentShader::KernelSamples result = {};
result.sample_count = ((parameters.sample_count - 1) / 2) + 1;
int32_t middle = result.sample_count / 2;
int32_t j = 0;
FML_DCHECK(result.sample_count <= kGaussianBlurMaxKernelSize);
static_assert(sizeof(result.sample_data) ==
sizeof(std::array<Vector4, kGaussianBlurMaxKernelSize>));

for (int i = 0; i < result.sample_count; i++) {
if (i == middle) {
result.samples[i] = parameters.samples[j++];
result.sample_data[i].x = parameters.samples[j].uv_offset.x;
result.sample_data[i].y = parameters.samples[j].uv_offset.y;
result.sample_data[i].z = parameters.samples[j].coefficient;
j++;
} else {
GaussianBlurPipeline::FragmentShader::KernelSample left =
parameters.samples[j];
GaussianBlurPipeline::FragmentShader::KernelSample right =
parameters.samples[j + 1];
result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
.uv_offset = (left.uv_offset * left.coefficient +
right.uv_offset * right.coefficient) /
(left.coefficient + right.coefficient),
.coefficient = left.coefficient + right.coefficient,
};
KernelSample left = parameters.samples[j];
KernelSample right = parameters.samples[j + 1];

result.sample_data[i].z = left.coefficient + right.coefficient;

Point uv = (left.uv_offset * left.coefficient +
right.uv_offset * right.coefficient) /
(left.coefficient + right.coefficient);
result.sample_data[i].x = uv.x;
result.sample_data[i].y = uv.y;
j += 2;
}
}
Expand Down
11 changes: 10 additions & 1 deletion impeller/entity/contents/filters/gaussian_blur_filter_contents.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,28 @@
#include "impeller/entity/contents/content_context.h"
#include "impeller/entity/contents/filters/filter_contents.h"
#include "impeller/entity/geometry/geometry.h"
#include "impeller/geometry/color.h"

namespace impeller {

// Comes from gaussian.frag.
static constexpr int32_t kGaussianBlurMaxKernelSize = 50;

static_assert(sizeof(GaussianBlurPipeline::FragmentShader::KernelSamples) ==
sizeof(Vector4) * kGaussianBlurMaxKernelSize + sizeof(Vector4));

struct BlurParameters {
Point blur_uv_offset;
Scalar blur_sigma;
int blur_radius;
int step_size;
};

struct KernelSample {
Vector2 uv_offset;
float coefficient;
};

/// A larger mirror of GaussianBlurPipeline::FragmentShader::KernelSamples.
///
/// This is a mirror of GaussianBlurPipeline::FragmentShader::KernelSamples that
Expand All @@ -30,7 +39,7 @@ struct BlurParameters {
struct KernelSamples {
static constexpr int kMaxKernelSize = kGaussianBlurMaxKernelSize * 2;
int sample_count;
GaussianBlurPipeline::FragmentShader::KernelSample samples[kMaxKernelSize];
KernelSample samples[kMaxKernelSize];
};

KernelSamples GenerateBlurInfo(BlurParameters parameters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "impeller/entity/contents/filters/gaussian_blur_filter_contents.h"
#include "impeller/entity/contents/texture_contents.h"
#include "impeller/entity/entity_playground.h"
#include "impeller/geometry/color.h"
#include "impeller/geometry/geometry_asserts.h"
#include "impeller/renderer/testing/mocks.h"

Expand Down Expand Up @@ -51,6 +52,14 @@ fml::StatusOr<float> LowerBoundNewtonianMethod(
return x;
}

Scalar GetCoefficient(const Vector4& vec) {
return vec.z;
}

Vector2 GetUVOffset(const Vector4& vec) {
return vec.xy();
}

fml::StatusOr<Scalar> CalculateSigmaForBlurRadius(
Scalar radius,
const Matrix& effect_transform) {
Expand Down Expand Up @@ -508,27 +517,24 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
},
};

GaussianBlurPipeline::FragmentShader::KernelSamples fast_kernel_samples =
GaussianBlurPipeline::FragmentShader::KernelSamples blur_info =
LerpHackKernelSamples(kernel_samples);
EXPECT_EQ(fast_kernel_samples.sample_count, 3);
EXPECT_EQ(blur_info.sample_count, 3);

GaussianBlurPipeline::FragmentShader::KernelSample* samples =
kernel_samples.samples;
GaussianBlurPipeline::FragmentShader::KernelSample* fast_samples =
fast_kernel_samples.samples;
KernelSample* samples = kernel_samples.samples;

//////////////////////////////////////////////////////////////////////////////
// Check output kernel.

EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333);
EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3);
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0);
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4);
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333);
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3);
EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[0]),
Point(-1.3333333, 0));
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[0]), 0.3);

EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[1]), Point(0, 0));
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[1]), 0.4);

EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[2]), Point(1.333333, 0));
EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[2]), 0.3);

//////////////////////////////////////////////////////////////////////////////
// Check output of fast kernel versus original kernel.
Expand All @@ -549,11 +555,11 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
}
};
Scalar fast_output =
/*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) *
fast_samples[0].coefficient +
/*2nd*/ data[2] * fast_samples[1].coefficient +
/*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) *
fast_samples[2].coefficient;
/*1st*/ lerp(GetUVOffset(blur_info.sample_data[0]), data[0], data[1]) *
GetCoefficient(blur_info.sample_data[0]) +
/*2nd*/ data[2] * GetCoefficient(blur_info.sample_data[1]) +
/*3rd*/ lerp(GetUVOffset(blur_info.sample_data[2]), data[3], data[4]) *
GetCoefficient(blur_info.sample_data[2]);

EXPECT_NEAR(original_output, fast_output, 0.01);
}
Expand Down Expand Up @@ -604,9 +610,9 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) {
}

Scalar fast_output = 0.0;
for (int i = 0; i < fast_kernel_samples.sample_count; ++i) {
auto sample = fast_kernel_samples.samples[i];
fast_output += sample.coefficient * sampler(sample.uv_offset);
for (int i = 0; i < fast_kernel_samples.sample_count; i++) {
fast_output += GetCoefficient(fast_kernel_samples.sample_data[i]) *
sampler(GetUVOffset(fast_kernel_samples.sample_data[i]));
}

EXPECT_NEAR(output, fast_output, 0.1);
Expand Down
23 changes: 10 additions & 13 deletions impeller/entity/shaders/filters/gaussian.frag
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,13 @@ uniform f16sampler2D texture_sampler;

layout(constant_id = 0) const float supports_decal = 1.0;

struct KernelSample {
vec2 uv_offset;
float coefficient;
};

uniform KernelSamples {
int sample_count;
KernelSample samples[50];
float sample_count;

// X, Y are uv offset and Z is Coefficient. W is padding.
vec4 sample_data[50];
}
blur_info;
kernel_samples;

f16vec4 Sample(f16sampler2D tex, vec2 coords) {
if (supports_decal == 1.0) {
Expand All @@ -36,11 +33,11 @@ out f16vec4 frag_color;
void main() {
f16vec4 total_color = f16vec4(0.0hf);

for (int i = 0; i < blur_info.sample_count; ++i) {
float16_t coefficient = float16_t(blur_info.samples[i].coefficient);
total_color +=
coefficient * Sample(texture_sampler,
v_texture_coords + blur_info.samples[i].uv_offset);
for (int i = 0; i < int(kernel_samples.sample_count); i++) {
float16_t coefficient = float16_t(kernel_samples.sample_data[i].z);
total_color += coefficient *
Sample(texture_sampler,
v_texture_coords + kernel_samples.sample_data[i].xy);
}

frag_color = total_color;
Expand Down
2 changes: 2 additions & 0 deletions impeller/geometry/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ struct Vector4 {
return *this + (v - *this) * t;
}

constexpr Vector2 xy() const { return Vector2(x, y); }

std::string ToString() const;
};

Expand Down
16 changes: 8 additions & 8 deletions impeller/renderer/backend/gles/buffer_bindings_gles.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,20 +279,20 @@ bool BufferBindingsGLES::BindUniformBuffer(const ProcTableGLES& gl,
auto* buffer_data =
reinterpret_cast<const GLfloat*>(buffer_ptr + member.offset);

std::vector<uint8_t> array_element_buffer;
if (element_count > 1) {
// When binding uniform arrays, the elements must be contiguous. Copy
// the uniforms to a temp buffer to eliminate any padding needed by the
// other backends.
array_element_buffer.resize(member.size * element_count);
// When binding uniform arrays, the elements must be contiguous. Copy
// the uniforms to a temp buffer to eliminate any padding needed by the
// other backends if the array elements have padding.
std::vector<uint8_t> array_element_buffer_;
if (element_count > 1 && element_stride != member.size) {
array_element_buffer_.resize(member.size * element_count);
for (size_t element_i = 0; element_i < element_count; element_i++) {
std::memcpy(array_element_buffer.data() + element_i * member.size,
std::memcpy(array_element_buffer_.data() + element_i * member.size,
reinterpret_cast<const char*>(buffer_data) +
element_i * element_stride,
member.size);
}
buffer_data =
reinterpret_cast<const GLfloat*>(array_element_buffer.data());
reinterpret_cast<const GLfloat*>(array_element_buffer_.data());
}

switch (member.type) {
Expand Down
13 changes: 7 additions & 6 deletions impeller/tools/malioc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2581,9 +2581,9 @@
"arith_cvt"
],
"shortest_path_cycles": [
0.109375,
0.09375,
0.0,
0.109375,
0.09375,
0.0,
0.0,
0.0,
Expand All @@ -2593,11 +2593,11 @@
"load_store"
],
"total_cycles": [
0.3125,
0.265625,
0.09375,
0.3125,
0.265625,
0.0,
2.0,
1.0,
0.25,
0.25
]
Expand Down Expand Up @@ -2641,10 +2641,11 @@
0.0
],
"total_bound_pipelines": [
"arithmetic",
"load_store"
],
"total_cycles": [
1.6666666269302368,
2.0,
2.0,
1.0
]
Expand Down