Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

[Impeller] Refactor Cubic/Quad tests to make sure all threads reach barrier() #40506

Merged
merged 1 commit into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 100 additions & 3 deletions impeller/compiler/shader_lib/impeller/path.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,18 @@ struct CubicData {
vec2 p2;
};

struct Position {
uint index;
uint count;
struct QuadDecomposition {
float a0;
float a2;
float u0;
float u_scale;
uint line_count;
float steps;
};

struct PathComponent {
uint index; // Location in buffer
uint count; // Number of points. 4 = cubic, 3 = quad, 2 = line.
};

/// Solve for point on a quadratic Bezier curve defined by starting point `p1`,
Expand Down Expand Up @@ -65,4 +74,92 @@ float Cross(vec2 p1, vec2 p2) {
return p1.x * p2.y - p1.y * p2.x;
}

QuadData GenerateQuadraticFromCubic(CubicData cubic,
uint index,
float quad_count) {
float t0 = index / quad_count;
float t1 = (index + 1) / quad_count;

// calculate the subsegment
vec2 sub_p1 = CubicSolve(cubic, t0);
vec2 sub_p2 = CubicSolve(cubic, t1);
QuadData quad = QuadData(3.0 * (cubic.cp1 - cubic.p1), //
3.0 * (cubic.cp2 - cubic.cp1), //
3.0 * (cubic.p2 - cubic.cp2));
float sub_scale = (t1 - t0) * (1.0 / 3.0);
vec2 sub_cp1 = sub_p1 + sub_scale * QuadraticSolve(quad, t0);
vec2 sub_cp2 = sub_p2 - sub_scale * QuadraticSolve(quad, t1);

vec2 quad_p1x2 = 3.0 * sub_cp1 - sub_p1;
vec2 quad_p2x2 = 3.0 * sub_cp2 - sub_p2;

return QuadData(sub_p1, //
((quad_p1x2 + quad_p2x2) / 4.0), //
sub_p2);
}

uint EstimateQuadraticCount(CubicData cubic, float accuracy) {
// The maximum error, as a vector from the cubic to the best approximating
// quadratic, is proportional to the third derivative, which is constant
// across the segment. Thus, the error scales down as the third power of
// the number of subdivisions. Our strategy then is to subdivide `t` evenly.
//
// This is an overestimate of the error because only the component
// perpendicular to the first derivative is important. But the simplicity is
// appealing.

// This magic number is the square of 36 / sqrt(3).
// See: http://caffeineowl.com/graphics/2d/vectorial/cubic2quad01.html
float max_hypot2 = 432.0 * accuracy * accuracy;

vec2 err_v = (3.0 * cubic.cp2 - cubic.p2) - (3.0 * cubic.cp1 - cubic.p1);
float err = dot(err_v, err_v);
return uint(max(1., ceil(pow(err * (1.0 / max_hypot2), 1. / 6.0))));
}

QuadDecomposition DecomposeQuad(QuadData quad, float tolerance) {
float sqrt_tolerance = sqrt(tolerance);

vec2 d01 = quad.cp - quad.p1;
vec2 d12 = quad.p2 - quad.cp;
vec2 dd = d01 - d12;
float c = Cross(quad.p2 - quad.p1, dd);
float x0 = dot(d01, dd) * 1. / c;
float x2 = dot(d12, dd) * 1. / c;
float scale = abs(c / (sqrt(dd.x * dd.x + dd.y * dd.y) * (x2 - x0)));

float a0 = ApproximateParabolaIntegral(x0);
float a2 = ApproximateParabolaIntegral(x2);
float val = 0.f;
if (isfinite(scale)) {
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if ((x0 < 0 && x2 < 0) || (x0 >= 0 && x2 >= 0)) {
val = da * sqrt_scale;
} else {
// cusp case
float xmin = sqrt_tolerance / sqrt_scale;
val = sqrt_tolerance * da / ApproximateParabolaIntegral(xmin);
}
}
float u0 = ApproximateParabolaIntegral(a0);
float u2 = ApproximateParabolaIntegral(a2);
float u_scale = 1. / (u2 - u0);

uint line_count = uint(max(1., ceil(0.5 * val / sqrt_tolerance)) + 1.);
float steps = 1. / line_count;

return QuadDecomposition(a0, a2, u0, u_scale, line_count, steps);
}

vec2 GenerateLineFromQuad(QuadData quad,
uint index,
QuadDecomposition decomposition) {
float u = index * decomposition.steps;
float a = decomposition.a0 + (decomposition.a2 - decomposition.a0) * u;
float t = (ApproximateParabolaIntegral(a) - decomposition.u0) *
decomposition.u_scale;
return QuadraticSolve(quad, t);
}

#endif
54 changes: 12 additions & 42 deletions impeller/fixtures/cubic_to_quads.comp
Original file line number Diff line number Diff line change
Expand Up @@ -30,54 +30,24 @@ shared uint count_sums[512];

void main() {
uint ident = gl_GlobalInvocationID.x;
if (ident >= cubics.count) {
return;
CubicData cubic;
uint quad_count = 0;
if (ident < cubics.count) {
cubic = cubics.data[ident];
quad_count = EstimateQuadraticCount(cubic, config.accuracy);
quad_counts[ident] = quad_count;
}

// The maximum error, as a vector from the cubic to the best approximating
// quadratic, is proportional to the third derivative, which is constant
// across the segment. Thus, the error scales down as the third power of
// the number of subdivisions. Our strategy then is to subdivide `t` evenly.
//
// This is an overestimate of the error because only the component
// perpendicular to the first derivative is important. But the simplicity is
// appealing.

// This magic number is the square of 36 / sqrt(3).
// See: http://caffeineowl.com/graphics/2d/vectorial/cubic2quad01.html
float max_hypot2 = 432.0 * config.accuracy * config.accuracy;

CubicData cubic = cubics.data[ident];

vec2 err_v = (3.0 * cubic.cp2 - cubic.p2) - (3.0 * cubic.cp1 - cubic.p1);
float err = dot(err_v, err_v);
float quad_count = max(1., ceil(pow(err * (1.0 / max_hypot2), 1. / 6.0)));

quad_counts[ident] = uint(quad_count);

barrier();
if (quad_count == 0) {
return;
}

count_sums[ident] = subgroupInclusiveAdd(quad_counts[ident]);

quads.count = count_sums[cubics.count - 1];
for (uint i = 0; i < quad_count; i++) {
float t0 = i / quad_count;
float t1 = (i + 1) / quad_count;

// calculate the subsegment
vec2 sub_p1 = CubicSolve(cubic, t0);
vec2 sub_p2 = CubicSolve(cubic, t1);
QuadData quad = QuadData(3.0 * (cubic.cp1 - cubic.p1), //
3.0 * (cubic.cp2 - cubic.cp1), //
3.0 * (cubic.p2 - cubic.cp2));
float sub_scale = (t1 - t0) * (1.0 / 3.0);
vec2 sub_cp1 = sub_p1 + sub_scale * QuadraticSolve(quad, t0);
vec2 sub_cp2 = sub_p2 - sub_scale * QuadraticSolve(quad, t1);

vec2 quad_p1x2 = 3.0 * sub_cp1 - sub_p1;
vec2 quad_p2x2 = 3.0 * sub_cp2 - sub_p2;
uint offset = count_sums[ident] - uint(quad_count);
quads.data[offset + i] = QuadData(sub_p1, //
((quad_p1x2 + quad_p2x2) / 4.0), //
sub_p2);
uint offset = count_sums[ident] - quad_count;
quads.data[offset + i] = GenerateQuadraticFromCubic(cubic, i, quad_count);
}
}
54 changes: 12 additions & 42 deletions impeller/fixtures/quad_polyline.comp
Original file line number Diff line number Diff line change
Expand Up @@ -30,60 +30,30 @@ shared uint count_sums[512];

void main() {
uint ident = gl_GlobalInvocationID.x;
if (ident >= quads.count) {
return;
QuadData quad;
QuadDecomposition decomposition;
if (ident < quads.count) {
quad = quads.data[ident];
decomposition = DecomposeQuad(quad, config.tolerance);
point_counts[ident] = uint(decomposition.line_count);
}

QuadData quad = quads.data[ident];
float sqrt_tolerance = sqrt(config.tolerance);

vec2 d01 = quad.cp - quad.p1;
vec2 d12 = quad.p2 - quad.cp;
vec2 dd = d01 - d12;
float c = Cross(quad.p2 - quad.p1, dd);
float x0 = dot(d01, dd) * 1. / c;
float x2 = dot(d12, dd) * 1. / c;
float scale = abs(c / (sqrt(dd.x * dd.x + dd.y * dd.y) * (x2 - x0)));
barrier();

float a0 = ApproximateParabolaIntegral(x0);
float a2 = ApproximateParabolaIntegral(x2);
float val = 0.f;
if (isfinite(scale)) {
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if ((x0 < 0 && x2 < 0) || (x0 >= 0 && x2 >= 0)) {
val = da * sqrt_scale;
} else {
// cusp case
float xmin = sqrt_tolerance / sqrt_scale;
val = sqrt_tolerance * da / ApproximateParabolaIntegral(xmin);
}
if (decomposition.line_count == 0) {
return;
}
float u0 = ApproximateParabolaIntegral(a0);
float u2 = ApproximateParabolaIntegral(a2);
float u_scale = 1. / (u2 - u0);

float line_count = max(1., ceil(0.5 * val / sqrt_tolerance)) + 1.;
float steps = 1. / line_count;

point_counts[ident] = uint(line_count);

barrier();
count_sums[ident] = subgroupInclusiveAdd(point_counts[ident]);
barrier();

polyline.count = count_sums[quads.count - 1] + 1;
polyline.data[0] = quads.data[0].p1;

// In theory this could be unrolled into a separate shader, but in practice
// line_count usually pretty low and currently lack benchmark data to show
// how much it would even help.
for (uint i = 1; i < line_count; i += 1) {
float u = i * steps;
float a = a0 + (a2 - a0) * u;
float t = (ApproximateParabolaIntegral(a) - u0) * u_scale;
uint offset = count_sums[ident] - uint(line_count);
polyline.data[offset + i] = QuadraticSolve(quad, t);
for (uint i = 1; i < decomposition.line_count; i += 1) {
uint offset = count_sums[ident] - uint(decomposition.line_count);
polyline.data[offset + i] = GenerateLineFromQuad(quad, i, decomposition);
}
polyline.data[count_sums[ident]] = quad.p2;
}