[Impeller] Use 32 bit Gaussian function in the 2-pass blur (flutter#42069)

bdero · bdero · commit 5fa79221192d · 2023-05-22T12:17:15.000-07:00
Resolves flutter/flutter#126487. Increases the 2-pass blur quality and distribution limit. It turns out sigma was breaking down beyond ~255 (moderately high, but not unreasonable for users to rely on). The Gaussian function computes sigma^2, and half precision floats only have 5 bit exponents and overflow for numbers above 65k. Coincidentally, this also returns us to a state where we look a lot more like Skia's blurs for larger blur sigmas. Medium blurs have much less visual banding (although it's still there if you look closely). I suspect half precision isn't really enough for tracking the integral. Unfortunately, this means our SIMD pipelining isn't going to be as good. I'll be interested in watching the blur-driven benchmarks for the perf hit. (cherry picked from commit 87a03e1)
diff --git a/impeller/compiler/shader_lib/impeller/gaussian.glsl b/impeller/compiler/shader_lib/impeller/gaussian.glsl
@@ -9,7 +9,13 @@
 #include <impeller/types.glsl>
 
 /// Gaussian distribution function.
-float16_t IPGaussian(float16_t x, float16_t sigma) {
+float IPGaussian(float x, float sigma) {
+  float variance = sigma * sigma;
+  return exp(-0.5f * x * x / variance) / (kSqrtTwoPi * sigma);
+}
+
+/// Gaussian distribution function.
+float16_t IPHalfGaussian(float16_t x, float16_t sigma) {
   float16_t variance = sigma * sigma;
   return exp(-0.5hf * x * x / variance) / (float16_t(kSqrtTwoPi) * sigma);
 }
diff --git a/impeller/entity/entity_unittests.cc b/impeller/entity/entity_unittests.cc
@@ -997,7 +997,8 @@ TEST_P(EntityTest, GaussianBlurFilter) {
     static Color input_color = Color::Black();
     static int selected_blur_type = 0;
     static int selected_pass_variation = 0;
-    static float blur_amount[2] = {10, 10};
+    static float blur_amount_coarse[2] = {0, 0};
+    static float blur_amount_fine[2] = {10, 10};
     static int selected_blur_style = 0;
     static int selected_tile_mode = 3;
     static Color cover_color(1, 0, 0, 0.2);
@@ -1027,7 +1028,8 @@ TEST_P(EntityTest, GaussianBlurFilter) {
                      pass_variation_names,
                      sizeof(pass_variation_names) / sizeof(char*));
       }
-      ImGui::SliderFloat2("Sigma", blur_amount, 0, 10);
+      ImGui::SliderFloat2("Sigma (coarse)", blur_amount_coarse, 0, 1000);
+      ImGui::SliderFloat2("Sigma (fine)", blur_amount_fine, 0, 10);
       ImGui::Combo("Blur style", &selected_blur_style, blur_style_names,
                    sizeof(blur_style_names) / sizeof(char*));
       ImGui::Combo("Tile mode", &selected_tile_mode, tile_mode_names,
@@ -1044,6 +1046,9 @@ TEST_P(EntityTest, GaussianBlurFilter) {
     }
     ImGui::End();
 
+    auto blur_sigma_x = Sigma{blur_amount_coarse[0] + blur_amount_fine[0]};
+    auto blur_sigma_y = Sigma{blur_amount_coarse[1] + blur_amount_fine[1]};
+
     std::shared_ptr<Contents> input;
     Size input_size;
 
@@ -1071,18 +1076,17 @@ TEST_P(EntityTest, GaussianBlurFilter) {
     std::shared_ptr<FilterContents> blur;
     if (selected_pass_variation == 0) {
       blur = FilterContents::MakeGaussianBlur(
-          FilterInput::Make(input), Sigma{blur_amount[0]},
-          Sigma{blur_amount[1]}, blur_styles[selected_blur_style],
-          tile_modes[selected_tile_mode]);
+          FilterInput::Make(input), blur_sigma_x, blur_sigma_y,
+          blur_styles[selected_blur_style], tile_modes[selected_tile_mode]);
     } else {
-      Vector2 blur_vector(blur_amount[0], blur_amount[1]);
+      Vector2 blur_vector(blur_sigma_x.sigma, blur_sigma_y.sigma);
       blur = FilterContents::MakeDirectionalGaussianBlur(
           FilterInput::Make(input), Sigma{blur_vector.GetLength()},
           blur_vector.Normalize());
     }
 
     auto mask_blur = FilterContents::MakeBorderMaskBlur(
-        FilterInput::Make(input), Sigma{blur_amount[0]}, Sigma{blur_amount[1]},
+        FilterInput::Make(input), blur_sigma_x, blur_sigma_y,
         blur_styles[selected_blur_style]);
 
     auto ctm = Matrix::MakeScale(GetContentScale()) *
diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl
@@ -27,7 +27,7 @@ uniform BlurInfo {
   // host-side, but both are useful controls here. Sigma (pixels per standard
   // deviation) is used to define the gaussian function itself, whereas the
   // radius is used to limit how much of the function is integrated.
-  float16_t blur_sigma;
+  float blur_sigma;
   float16_t blur_radius;
 }
 blur_info;
@@ -62,7 +62,11 @@ void main() {
 
   for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius;
        i += 2.0hf) {
-    float16_t gaussian = IPGaussian(i, blur_info.blur_sigma);
+    // Use the 32 bit Gaussian function because the 16 bit variation results in
+    // quality loss/visible banding. Also, 16 bit variation internally breaks
+    // down at a moderately high (but still reasonable) blur sigma of >255 when
+    // computing sigma^2 due to the exponent only having 5 bits.
+    float16_t gaussian = float16_t(IPGaussian(float(i), blur_info.blur_sigma));
     gaussian_integral += gaussian;
     total_color +=
         gaussian * Sample(texture_sampler,  // sampler
diff --git a/impeller/entity/shaders/rrect_blur.frag b/impeller/entity/shaders/rrect_blur.frag
@@ -62,7 +62,7 @@ float16_t RRectShadow(f16vec2 sample_position, f16vec2 half_size) {
     float16_t y = begin_y + interval * (float16_t(sample_i) + 0.5hf);
     result += RRectShadowX(f16vec2(sample_position.x, sample_position.y - y),
                            half_size) *
-              IPGaussian(y, frag_info.blur_sigma) * interval;
+              IPHalfGaussian(y, frag_info.blur_sigma) * interval;
   }
 
   return result;

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ float16_t RRectShadow(f16vec2 sample_position, f16vec2 half_size) {`
`62`	`62`	`float16_t y = begin_y + interval * (float16_t(sample_i) + 0.5hf);`
`63`	`63`	`result += RRectShadowX(f16vec2(sample_position.x, sample_position.y - y),`
`64`	`64`	`half_size) *`
`65`		`- IPGaussian(y, frag_info.blur_sigma) * interval;`
	`65`	`+ IPHalfGaussian(y, frag_info.blur_sigma) * interval;`
`66`	`66`	`}`
`67`	`67`
`68`	`68`	`return result;`