Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit 253076d

Browse files
authored
Refactor Cubic/Quad tests to make sure all threads reach barrier() (#40506)
[Impeller] Refactor Cubic/Quad tests to make sure all threads reach barrier()
1 parent 004d841 commit 253076d

File tree

3 files changed

+124
-87
lines changed

3 files changed

+124
-87
lines changed

impeller/compiler/shader_lib/impeller/path.glsl

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,18 @@ struct CubicData {
2828
vec2 p2;
2929
};
3030

31-
struct Position {
32-
uint index;
33-
uint count;
31+
struct QuadDecomposition {
32+
float a0;
33+
float a2;
34+
float u0;
35+
float u_scale;
36+
uint line_count;
37+
float steps;
38+
};
39+
40+
struct PathComponent {
41+
uint index; // Location in buffer
42+
uint count; // Number of points. 4 = cubic, 3 = quad, 2 = line.
3443
};
3544

3645
/// Solve for point on a quadratic Bezier curve defined by starting point `p1`,
@@ -65,4 +74,92 @@ float Cross(vec2 p1, vec2 p2) {
6574
return p1.x * p2.y - p1.y * p2.x;
6675
}
6776

77+
QuadData GenerateQuadraticFromCubic(CubicData cubic,
78+
uint index,
79+
float quad_count) {
80+
float t0 = index / quad_count;
81+
float t1 = (index + 1) / quad_count;
82+
83+
// calculate the subsegment
84+
vec2 sub_p1 = CubicSolve(cubic, t0);
85+
vec2 sub_p2 = CubicSolve(cubic, t1);
86+
QuadData quad = QuadData(3.0 * (cubic.cp1 - cubic.p1), //
87+
3.0 * (cubic.cp2 - cubic.cp1), //
88+
3.0 * (cubic.p2 - cubic.cp2));
89+
float sub_scale = (t1 - t0) * (1.0 / 3.0);
90+
vec2 sub_cp1 = sub_p1 + sub_scale * QuadraticSolve(quad, t0);
91+
vec2 sub_cp2 = sub_p2 - sub_scale * QuadraticSolve(quad, t1);
92+
93+
vec2 quad_p1x2 = 3.0 * sub_cp1 - sub_p1;
94+
vec2 quad_p2x2 = 3.0 * sub_cp2 - sub_p2;
95+
96+
return QuadData(sub_p1, //
97+
((quad_p1x2 + quad_p2x2) / 4.0), //
98+
sub_p2);
99+
}
100+
101+
uint EstimateQuadraticCount(CubicData cubic, float accuracy) {
102+
// The maximum error, as a vector from the cubic to the best approximating
103+
// quadratic, is proportional to the third derivative, which is constant
104+
// across the segment. Thus, the error scales down as the third power of
105+
// the number of subdivisions. Our strategy then is to subdivide `t` evenly.
106+
//
107+
// This is an overestimate of the error because only the component
108+
// perpendicular to the first derivative is important. But the simplicity is
109+
// appealing.
110+
111+
// This magic number is the square of 36 / sqrt(3).
112+
// See: http://caffeineowl.com/graphics/2d/vectorial/cubic2quad01.html
113+
float max_hypot2 = 432.0 * accuracy * accuracy;
114+
115+
vec2 err_v = (3.0 * cubic.cp2 - cubic.p2) - (3.0 * cubic.cp1 - cubic.p1);
116+
float err = dot(err_v, err_v);
117+
return uint(max(1., ceil(pow(err * (1.0 / max_hypot2), 1. / 6.0))));
118+
}
119+
120+
QuadDecomposition DecomposeQuad(QuadData quad, float tolerance) {
121+
float sqrt_tolerance = sqrt(tolerance);
122+
123+
vec2 d01 = quad.cp - quad.p1;
124+
vec2 d12 = quad.p2 - quad.cp;
125+
vec2 dd = d01 - d12;
126+
float c = Cross(quad.p2 - quad.p1, dd);
127+
float x0 = dot(d01, dd) * 1. / c;
128+
float x2 = dot(d12, dd) * 1. / c;
129+
float scale = abs(c / (sqrt(dd.x * dd.x + dd.y * dd.y) * (x2 - x0)));
130+
131+
float a0 = ApproximateParabolaIntegral(x0);
132+
float a2 = ApproximateParabolaIntegral(x2);
133+
float val = 0.f;
134+
if (isfinite(scale)) {
135+
float da = abs(a2 - a0);
136+
float sqrt_scale = sqrt(scale);
137+
if ((x0 < 0 && x2 < 0) || (x0 >= 0 && x2 >= 0)) {
138+
val = da * sqrt_scale;
139+
} else {
140+
// cusp case
141+
float xmin = sqrt_tolerance / sqrt_scale;
142+
val = sqrt_tolerance * da / ApproximateParabolaIntegral(xmin);
143+
}
144+
}
145+
float u0 = ApproximateParabolaIntegral(a0);
146+
float u2 = ApproximateParabolaIntegral(a2);
147+
float u_scale = 1. / (u2 - u0);
148+
149+
uint line_count = uint(max(1., ceil(0.5 * val / sqrt_tolerance)) + 1.);
150+
float steps = 1. / line_count;
151+
152+
return QuadDecomposition(a0, a2, u0, u_scale, line_count, steps);
153+
}
154+
155+
vec2 GenerateLineFromQuad(QuadData quad,
156+
uint index,
157+
QuadDecomposition decomposition) {
158+
float u = index * decomposition.steps;
159+
float a = decomposition.a0 + (decomposition.a2 - decomposition.a0) * u;
160+
float t = (ApproximateParabolaIntegral(a) - decomposition.u0) *
161+
decomposition.u_scale;
162+
return QuadraticSolve(quad, t);
163+
}
164+
68165
#endif

impeller/fixtures/cubic_to_quads.comp

Lines changed: 12 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -30,54 +30,24 @@ shared uint count_sums[512];
3030

3131
void main() {
3232
uint ident = gl_GlobalInvocationID.x;
33-
if (ident >= cubics.count) {
34-
return;
33+
CubicData cubic;
34+
uint quad_count = 0;
35+
if (ident < cubics.count) {
36+
cubic = cubics.data[ident];
37+
quad_count = EstimateQuadraticCount(cubic, config.accuracy);
38+
quad_counts[ident] = quad_count;
3539
}
3640

37-
// The maximum error, as a vector from the cubic to the best approximating
38-
// quadratic, is proportional to the third derivative, which is constant
39-
// across the segment. Thus, the error scales down as the third power of
40-
// the number of subdivisions. Our strategy then is to subdivide `t` evenly.
41-
//
42-
// This is an overestimate of the error because only the component
43-
// perpendicular to the first derivative is important. But the simplicity is
44-
// appealing.
45-
46-
// This magic number is the square of 36 / sqrt(3).
47-
// See: http://caffeineowl.com/graphics/2d/vectorial/cubic2quad01.html
48-
float max_hypot2 = 432.0 * config.accuracy * config.accuracy;
49-
50-
CubicData cubic = cubics.data[ident];
51-
52-
vec2 err_v = (3.0 * cubic.cp2 - cubic.p2) - (3.0 * cubic.cp1 - cubic.p1);
53-
float err = dot(err_v, err_v);
54-
float quad_count = max(1., ceil(pow(err * (1.0 / max_hypot2), 1. / 6.0)));
55-
56-
quad_counts[ident] = uint(quad_count);
57-
5841
barrier();
42+
if (quad_count == 0) {
43+
return;
44+
}
45+
5946
count_sums[ident] = subgroupInclusiveAdd(quad_counts[ident]);
6047

6148
quads.count = count_sums[cubics.count - 1];
6249
for (uint i = 0; i < quad_count; i++) {
63-
float t0 = i / quad_count;
64-
float t1 = (i + 1) / quad_count;
65-
66-
// calculate the subsegment
67-
vec2 sub_p1 = CubicSolve(cubic, t0);
68-
vec2 sub_p2 = CubicSolve(cubic, t1);
69-
QuadData quad = QuadData(3.0 * (cubic.cp1 - cubic.p1), //
70-
3.0 * (cubic.cp2 - cubic.cp1), //
71-
3.0 * (cubic.p2 - cubic.cp2));
72-
float sub_scale = (t1 - t0) * (1.0 / 3.0);
73-
vec2 sub_cp1 = sub_p1 + sub_scale * QuadraticSolve(quad, t0);
74-
vec2 sub_cp2 = sub_p2 - sub_scale * QuadraticSolve(quad, t1);
75-
76-
vec2 quad_p1x2 = 3.0 * sub_cp1 - sub_p1;
77-
vec2 quad_p2x2 = 3.0 * sub_cp2 - sub_p2;
78-
uint offset = count_sums[ident] - uint(quad_count);
79-
quads.data[offset + i] = QuadData(sub_p1, //
80-
((quad_p1x2 + quad_p2x2) / 4.0), //
81-
sub_p2);
50+
uint offset = count_sums[ident] - quad_count;
51+
quads.data[offset + i] = GenerateQuadraticFromCubic(cubic, i, quad_count);
8252
}
8353
}

impeller/fixtures/quad_polyline.comp

Lines changed: 12 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -30,60 +30,30 @@ shared uint count_sums[512];
3030

3131
void main() {
3232
uint ident = gl_GlobalInvocationID.x;
33-
if (ident >= quads.count) {
34-
return;
33+
QuadData quad;
34+
QuadDecomposition decomposition;
35+
if (ident < quads.count) {
36+
quad = quads.data[ident];
37+
decomposition = DecomposeQuad(quad, config.tolerance);
38+
point_counts[ident] = uint(decomposition.line_count);
3539
}
3640

37-
QuadData quad = quads.data[ident];
38-
float sqrt_tolerance = sqrt(config.tolerance);
39-
40-
vec2 d01 = quad.cp - quad.p1;
41-
vec2 d12 = quad.p2 - quad.cp;
42-
vec2 dd = d01 - d12;
43-
float c = Cross(quad.p2 - quad.p1, dd);
44-
float x0 = dot(d01, dd) * 1. / c;
45-
float x2 = dot(d12, dd) * 1. / c;
46-
float scale = abs(c / (sqrt(dd.x * dd.x + dd.y * dd.y) * (x2 - x0)));
41+
barrier();
4742

48-
float a0 = ApproximateParabolaIntegral(x0);
49-
float a2 = ApproximateParabolaIntegral(x2);
50-
float val = 0.f;
51-
if (isfinite(scale)) {
52-
float da = abs(a2 - a0);
53-
float sqrt_scale = sqrt(scale);
54-
if ((x0 < 0 && x2 < 0) || (x0 >= 0 && x2 >= 0)) {
55-
val = da * sqrt_scale;
56-
} else {
57-
// cusp case
58-
float xmin = sqrt_tolerance / sqrt_scale;
59-
val = sqrt_tolerance * da / ApproximateParabolaIntegral(xmin);
60-
}
43+
if (decomposition.line_count == 0) {
44+
return;
6145
}
62-
float u0 = ApproximateParabolaIntegral(a0);
63-
float u2 = ApproximateParabolaIntegral(a2);
64-
float u_scale = 1. / (u2 - u0);
65-
66-
float line_count = max(1., ceil(0.5 * val / sqrt_tolerance)) + 1.;
67-
float steps = 1. / line_count;
68-
69-
point_counts[ident] = uint(line_count);
70-
71-
barrier();
7246
count_sums[ident] = subgroupInclusiveAdd(point_counts[ident]);
73-
barrier();
7447

7548
polyline.count = count_sums[quads.count - 1] + 1;
7649
polyline.data[0] = quads.data[0].p1;
7750

7851
// In theory this could be unrolled into a separate shader, but in practice
7952
// line_count usually pretty low and currently lack benchmark data to show
8053
// how much it would even help.
81-
for (uint i = 1; i < line_count; i += 1) {
82-
float u = i * steps;
83-
float a = a0 + (a2 - a0) * u;
84-
float t = (ApproximateParabolaIntegral(a) - u0) * u_scale;
85-
uint offset = count_sums[ident] - uint(line_count);
86-
polyline.data[offset + i] = QuadraticSolve(quad, t);
54+
for (uint i = 1; i < decomposition.line_count; i += 1) {
55+
uint offset = count_sums[ident] - uint(decomposition.line_count);
56+
polyline.data[offset + i] = GenerateLineFromQuad(quad, i, decomposition);
8757
}
8858
polyline.data[count_sums[ident]] = quad.p2;
8959
}

0 commit comments

Comments
 (0)