-
-
Notifications
You must be signed in to change notification settings - Fork 886
Vectorize TrimTransparentPixels in GifEncoderCore #2500
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -412,23 +412,142 @@ private static Buffer2DRegion<byte> TrimTransparentPixels(Buffer2D<byte> buffer, | |
| int bottom = int.MaxValue; | ||
| int left = int.MaxValue; | ||
| int right = int.MinValue; | ||
|
|
||
| // Run through th buffer in a single pass. Use variables to track the min/max values. | ||
| int minY = -1; | ||
| bool isTransparentRow = true; | ||
|
|
||
| // Run through the buffer in a single pass. Use variables to track the min/max values. | ||
| for (int y = 0; y < buffer.Height; y++) | ||
| { | ||
| isTransparentRow = true; | ||
| Span<byte> rowSpan = buffer.DangerousGetRowSpan(y); | ||
| ref byte rowPtr = ref MemoryMarshal.GetReference(rowSpan); | ||
| nint rowLength = (nint)(uint)rowSpan.Length; | ||
| nint x = 0; | ||
|
|
||
| #if NET7_0_OR_GREATER | ||
| if (Vector128.IsHardwareAccelerated && rowLength >= Vector128<byte>.Count) | ||
| { | ||
| Vector256<byte> trimmableVec256 = Vector256.Create(trimmableIndex); | ||
|
|
||
| if (Vector256.IsHardwareAccelerated && rowLength >= Vector256<byte>.Count) | ||
| { | ||
| do | ||
| { | ||
| Vector256<byte> vec = Vector256.LoadUnsafe(ref rowPtr, (nuint)x); | ||
| Vector256<byte> notEquals = ~Vector256.Equals(vec, trimmableVec256); | ||
|
|
||
| if (notEquals != Vector256<byte>.Zero) | ||
| { | ||
| isTransparentRow = false; | ||
| uint mask = notEquals.ExtractMostSignificantBits(); | ||
| nint start = x + (nint)uint.TrailingZeroCount(mask); | ||
| nint end = (nint)uint.LeadingZeroCount(mask); | ||
|
|
||
| // end is from the end, but we need the index from the beginning | ||
| end = x + Vector256<byte>.Count - 1 - end; | ||
|
|
||
| left = Math.Min(left, (int)start); | ||
| right = Math.Max(right, (int)end); | ||
| } | ||
|
|
||
| x += Vector256<byte>.Count; | ||
| } | ||
| while (x <= rowLength - Vector256<byte>.Count); | ||
| } | ||
|
|
||
| Vector128<byte> trimmableVec = Vector256.IsHardwareAccelerated | ||
| ? trimmableVec256.GetLower() | ||
| : Vector128.Create(trimmableIndex); | ||
|
|
||
| while (x <= rowLength - Vector128<byte>.Count) | ||
| { | ||
| Vector128<byte> vec = Vector128.LoadUnsafe(ref rowPtr, (nuint)x); | ||
| Vector128<byte> notEquals = ~Vector128.Equals(vec, trimmableVec); | ||
|
|
||
| if (notEquals != Vector128<byte>.Zero) | ||
| { | ||
| isTransparentRow = false; | ||
| uint mask = notEquals.ExtractMostSignificantBits(); | ||
| nint start = x + (nint)uint.TrailingZeroCount(mask); | ||
| nint end = (nint)uint.LeadingZeroCount(mask) - Vector128<byte>.Count; | ||
|
|
||
| // end is from the end, but we need the index from the beginning | ||
| end = x + Vector128<byte>.Count - 1 - end; | ||
|
|
||
| left = Math.Min(left, (int)start); | ||
| right = Math.Max(right, (int)end); | ||
| } | ||
|
|
||
| x += Vector128<byte>.Count; | ||
| } | ||
| } | ||
| #else | ||
| if (Sse41.IsSupported && rowLength >= Vector128<byte>.Count) | ||
| { | ||
| Vector256<byte> trimmableVec256 = Vector256.Create(trimmableIndex); | ||
|
|
||
| if (Avx2.IsSupported && rowLength >= Vector256<byte>.Count) | ||
| { | ||
| do | ||
| { | ||
| Vector256<byte> vec = Unsafe.ReadUnaligned<Vector256<byte>>(ref Unsafe.Add(ref rowPtr, x)); | ||
| Vector256<byte> notEquals = Avx2.CompareEqual(vec, trimmableVec256); | ||
| notEquals = Avx2.Xor(notEquals, Vector256<byte>.AllBitsSet); | ||
|
|
||
| if (!Avx.TestZ(notEquals, notEquals)) | ||
| { | ||
| isTransparentRow = false; | ||
| int mask = Avx2.MoveMask(notEquals); | ||
| nint start = x + (nint)(uint)BitOperations.TrailingZeroCount(mask); | ||
| nint end = (nint)(uint)BitOperations.LeadingZeroCount((uint)mask); | ||
|
|
||
| // end is from the end, but we need the index from the beginning | ||
| end = x + Vector256<byte>.Count - 1 - end; | ||
|
|
||
| left = Math.Min(left, (int)start); | ||
| right = Math.Max(right, (int)end); | ||
| } | ||
|
|
||
| x += Vector256<byte>.Count; | ||
| } | ||
| while (x <= rowLength - Vector256<byte>.Count); | ||
| } | ||
|
|
||
| Vector128<byte> trimmableVec = Sse41.IsSupported | ||
| ? trimmableVec256.GetLower() | ||
| : Vector128.Create(trimmableIndex); | ||
|
|
||
| while (x <= rowLength - Vector128<byte>.Count) | ||
| { | ||
| Vector128<byte> vec = Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.Add(ref rowPtr, x)); | ||
| Vector128<byte> notEquals = Sse2.CompareEqual(vec, trimmableVec); | ||
| notEquals = Sse2.Xor(notEquals, Vector128<byte>.AllBitsSet); | ||
|
|
||
| if (!Sse41.TestZ(notEquals, notEquals)) | ||
| { | ||
| isTransparentRow = false; | ||
| int mask = Sse2.MoveMask(notEquals); | ||
| nint start = x + (nint)(uint)BitOperations.TrailingZeroCount(mask); | ||
| nint end = (nint)(uint)BitOperations.LeadingZeroCount((uint)mask) - Vector128<byte>.Count; | ||
|
|
||
| // TODO: It may be possible to optimize this inner loop using SIMD. | ||
| for (int x = 0; x < rowSpan.Length; x++) | ||
| // end is from the end, but we need the index from the beginning | ||
| end = x + Vector128<byte>.Count - 1 - end; | ||
|
|
||
| left = Math.Min(left, (int)start); | ||
| right = Math.Max(right, (int)end); | ||
| } | ||
|
|
||
| x += Vector128<byte>.Count; | ||
| } | ||
| } | ||
| #endif | ||
| for (; x < rowLength; ++x) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The remainder could be handled vectorized too, by shifting the mask of the most significant bits around by the count of elements left in the final vector. |
||
| { | ||
| if (rowSpan[x] != trimmableIndex) | ||
| if (Unsafe.Add(ref rowPtr, x) != trimmableIndex) | ||
| { | ||
| isTransparentRow = false; | ||
| left = Math.Min(left, x); | ||
| right = Math.Max(right, x); | ||
| left = Math.Min(left, (int)x); | ||
| right = Math.Max(right, (int)x); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment I don't have any idea on how to make this branchless.
isTransparentRowcould be tracked in a vector, butleftandrightnot, as there's a mismatch of vector-types, namelybyteandint.A quite complicated approach would be to use
VectorXYZ<byte>and track the left and right -- but just before these can overflow merge it back to the scalarleft,rightand start over. But I guess the book-keeping is more work, so I'm not sure if this is actually faster. For sure the code gets painful.