From c3cbacd68911866ba1002754c77b8e704ce0c330 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 12:00:23 +0200 Subject: [PATCH 01/13] Revert "[Mono] Restore old code to solve the recent SpanHelpers regressions (#75917)" This reverts commit 254844a700179bb3b39149db1946691e3696d6fc. --- .../System.Private.CoreLib.Shared.projitems | 3 - .../src/System/MemoryExtensions.cs | 7 +- .../src/System/SpanHelpers.Mono.cs | 2697 ----------------- .../src/System/SpanHelpers.T.cs | 16 - 4 files changed, 1 insertion(+), 2722 deletions(-) delete mode 100644 src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index bbb2871fe815c8..d06769d1805656 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2526,7 +2526,4 @@ - - - \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index 44299c2238ed98..23c3e86ef0ad01 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -1750,7 +1750,6 @@ public static unsafe int IndexOfAny(this ReadOnlySpan span, ReadOnlySpan(this ReadOnlySpan span, ReadOnlySpan(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), span.Length); -#if !MONO // We don't have a mono overload for 4 values case 4: return SpanHelpers.LastIndexOfAnyValueType( ref spanRef, @@ -2034,7 +2032,6 @@ public static unsafe int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), Unsafe.Add(ref valueRef, 3), span.Length); -#endif case 5: return SpanHelpers.LastIndexOfAnyValueType( @@ -2075,7 +2072,6 @@ public static unsafe int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), span.Length); -#if !MONO // We don't have a mono overload for 4 values case 4: return SpanHelpers.LastIndexOfAnyValueType( ref spanRef, @@ -2084,7 +2080,6 @@ public static unsafe int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), Unsafe.Add(ref valueRef, 3), span.Length); -#endif case 5: return SpanHelpers.LastIndexOfAnyValueType( diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs deleted file mode 100644 index d6a7f09e7465b1..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs +++ /dev/null @@ -1,2697 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; - -namespace System -{ - internal static partial class SpanHelpers // helpers used by Mono - { - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfValueType(ref byte searchSpace, byte value, int length) - { - Debug.Assert(length >= 0); - - uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector128.IsHardwareAccelerated) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) - { - if (length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVector(ref searchSpace); - } - } - SequentialScan: - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) - goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) - goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) - goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - - offset += 1; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true; and remain length is greater than Vector length. - // However, we still have the redundant check to allow the JIT to see that the code is unreachable and eliminate it when the platform does not - // have hardware accelerated. After processing Vector lengths we return to SequentialScan to finish any remaining. - if (Vector256.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - if ((((nuint)(uint)Unsafe.AsPointer(ref searchSpace) + offset) & (nuint)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.strlen. - // Start with a check on Vector128 to align to Vector256, before moving to processing Vector256. - // This ensures we do not fault across memory pages while searching for an end of string. - Vector128 values = Vector128.Create(value); - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as below - uint matches = Vector128.Equals(values, search).ExtractMostSignificantBits(); - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - } - - lengthToExamine = GetByteVector256SpanLength(offset, length); - if (lengthToExamine > offset) - { - Vector256 values = Vector256.Create(value); - do - { - Vector256 search = Vector256.LoadUnsafe(ref searchSpace, offset); - uint matches = Vector256.Equals(values, search).ExtractMostSignificantBits(); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector256.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } while (lengthToExamine > offset); - } - - lengthToExamine = GetByteVector128SpanLength(offset, length); - if (lengthToExamine > offset) - { - Vector128 values = Vector128.Create(value); - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as above - uint matches = Vector128.Equals(values, search).ExtractMostSignificantBits(); - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - else if (Vector128.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - lengthToExamine = GetByteVector128SpanLength(offset, length); - - Vector128 values = Vector128.Create(value); - while (lengthToExamine > offset) - { - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as above - Vector128 compareResult = Vector128.Equals(values, search); - if (compareResult == Vector128.Zero) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - uint matches = compareResult.ExtractMostSignificantBits(); - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - else if (Vector.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - lengthToExamine = GetByteVectorSpanLength(offset, length); - - Vector values = new Vector(value); - - while (lengthToExamine > offset) - { - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)offset + LocateFirstFoundByte(matches); - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfValueType(ref short searchSpace, short value, int length) - => IndexOfChar(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfChar(ref char searchSpace, char value, int length) - { - Debug.Assert(length >= 0); - - nint offset = 0; - nint lengthToExamine = length; - - if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0) - { - // Input isn't char aligned, we won't be able to align it to a Vector - } - else if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - // Needs to be double length to allow us to align the data first. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) - { - // Needs to be double length to allow us to align the data first. - if (length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVector(ref searchSpace); - } - } - - SequentialScan: - // In the non-vector case lengthToExamine is the total length. - // In the vector case lengthToExamine first aligns to Vector, - // then in a second pass after the Vector lengths is the - // remaining data that is shorter than a Vector length. - while (lengthToExamine >= 4) - { - ref char current = ref Unsafe.Add(ref searchSpace, offset); - - if (value == current) - goto Found; - if (value == Unsafe.Add(ref current, 1)) - goto Found1; - if (value == Unsafe.Add(ref current, 2)) - goto Found2; - if (value == Unsafe.Add(ref current, 3)) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - if (value == Unsafe.Add(ref searchSpace, offset)) - goto Found; - - offset++; - lengthToExamine--; - } - - // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Avx2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - if (((nint)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (nint)offset)) & (nint)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256, - // before moving to processing Vector256. - - // If the input searchSpan has been fixed or pinned, this ensures we do not fault across memory pages - // while searching for an end of string. Specifically that this assumes that the length is either correct - // or that the data is pinned otherwise it may cause an AccessViolation from crossing a page boundary into an - // unowned page. If the search is unbounded (e.g. null terminator in wcslen) and the search value is not found, - // again this will likely cause an AccessViolation. However, correctly bounded searches will return -1 rather - // than ever causing an AV. - - // If the searchSpan has not been fixed or pinned the GC can relocate it during the execution of this - // method, so the alignment only acts as best endeavour. The GC cost is likely to dominate over - // the misalignment that may occur after; to we default to giving the GC a free hand to relocate and - // its up to the caller whether they are operating over fixed data. - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as below - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } - - lengthToExamine = GetCharVector256SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector256 values = Vector256.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector256.Count); - - Vector256 search = LoadVector256(ref searchSpace, offset); - int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector256.Count; - lengthToExamine -= Vector256.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - // Don't need to change lengthToExamine here as we don't use its current value again. - } - else - { - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Sse2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - Vector128 compareResult = AdvSimd.CompareEqual(values, search); - - if (compareResult == Vector128.Zero) - { - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - return (int)(offset + FindFirstMatchedLane(compareResult)); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Vector.IsHardwareAccelerated) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector.Count); - - lengthToExamine = GetCharVectorSpanLength(offset, length); - - if (lengthToExamine > 0) - { - Vector values = new Vector((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector.Count); - - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += Vector.Count; - lengthToExamine -= Vector.Count; - continue; - } - - // Find offset of first match - return (int)(offset + LocateFirstFoundChar(matches)); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)(offset); - } - - internal static unsafe int IndexOfValueType(ref T searchSpace, T value, int length) where T : struct, IEquatable - { - Debug.Assert(length >= 0); - - nint index = 0; // Use nint for arithmetic to avoid unnecessary 64->32->64 truncations - if (Vector.IsHardwareAccelerated && Vector.IsSupported && (Vector.Count * 2) <= length) - { - Vector valueVector = new Vector(value); - Vector compareVector; - Vector matchVector; - if ((uint)length % (uint)Vector.Count != 0) - { - // Number of elements is not a multiple of Vector.Count, so do one - // check and shift only enough for the remaining set to be a multiple - // of Vector.Count. - compareVector = Unsafe.As>(ref Unsafe.Add(ref searchSpace, index)); - matchVector = Vector.Equals(valueVector, compareVector); - if (matchVector != Vector.Zero) - { - goto VectorMatch; - } - index += length % Vector.Count; - length -= length % Vector.Count; - } - while (length > 0) - { - compareVector = Unsafe.As>(ref Unsafe.Add(ref searchSpace, index)); - matchVector = Vector.Equals(valueVector, compareVector); - if (matchVector != Vector.Zero) - { - goto VectorMatch; - } - index += Vector.Count; - length -= Vector.Count; - } - goto NotFound; - VectorMatch: - for (int i = 0; i < Vector.Count; i++) - if (compareVector[i].Equals(value)) - return (int)(index + i); - } - - while (length >= 8) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) - goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) - goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) - goto Found3; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 4))) - goto Found4; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 5))) - goto Found5; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 6))) - goto Found6; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 7))) - goto Found7; - - length -= 8; - index += 8; - } - - while (length >= 4) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) - goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) - goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) - goto Found3; - - length -= 4; - index += 4; - } - - while (length > 0) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - - index += 1; - length--; - } - NotFound: - return -1; - - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)index; - Found1: - return (int)(index + 1); - Found2: - return (int)(index + 2); - Found3: - return (int)(index + 3); - Found4: - return (int)(index + 4); - Found5: - return (int)(index + 5); - Found6: - return (int)(index + 6); - Found7: - return (int)(index + 7); - } - - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, int length) where T : struct, IEquatable - { - Debug.Assert(length >= 0, "Expected non-negative length"); - Debug.Assert(value0 is byte or short or int or long, "Expected caller to normalize to one of these types"); - - if (!Vector128.IsHardwareAccelerated || length < Vector128.Count) - { - for (int i = 0; i < length; i++) - { - if (!Unsafe.Add(ref searchSpace, i).Equals(value0)) - { - return i; - } - } - } - else - { - Vector128 notEquals, value0Vector = Vector128.Create(value0); - ref T current = ref searchSpace; - ref T oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128.Count); - - // Loop until either we've finished all elements or there's less than a vector's-worth remaining. - do - { - notEquals = ~Vector128.Equals(value0Vector, Vector128.LoadUnsafe(ref current)); - if (notEquals != Vector128.Zero) - { - return ComputeIndex(ref searchSpace, ref current, notEquals); - } - - current = ref Unsafe.Add(ref current, Vector128.Count); - } - while (!Unsafe.IsAddressGreaterThan(ref current, ref oneVectorAwayFromEnd)); - - // If any elements remain, process the last vector in the search space. - if ((uint)length % Vector128.Count != 0) - { - notEquals = ~Vector128.Equals(value0Vector, Vector128.LoadUnsafe(ref oneVectorAwayFromEnd)); - if (notEquals != Vector128.Zero) - { - return ComputeIndex(ref searchSpace, ref oneVectorAwayFromEnd, notEquals); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static int ComputeIndex(ref T searchSpace, ref T current, Vector128 notEquals) - { - uint notEqualsElements = notEquals.ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)(Unsafe.ByteOffset(ref searchSpace, ref current) / Unsafe.SizeOf()); - } - } - - return -1; - } - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int LastIndexOfValueType(ref byte searchSpace, byte value, int length) - { - Debug.Assert(length >= 0); - - uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) - goto Found7; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) - goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) - goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) - goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values = new Vector(value); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset - (nuint)Vector.Count)); - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int LastIndexOfValueType(ref short searchSpace, short value, int length) - => LastIndexOfValueType(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int LastIndexOfValueType(ref char searchSpace, char value, int length) - { - Debug.Assert(length >= 0); - - fixed (char* pChars = &searchSpace) - { - char* pCh = pChars + length; - char* pEndCh = pChars; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - // Figure out how many characters to read sequentially from the end until we are vector aligned - // This is equivalent to: length = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte - const int elementsPerByte = sizeof(ushort) / sizeof(byte); - length = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; - } - - SequentialScan: - while (length >= 4) - { - length -= 4; - pCh -= 4; - - if (*(pCh + 3) == value) - goto Found3; - if (*(pCh + 2) == value) - goto Found2; - if (*(pCh + 1) == value) - goto Found1; - if (*pCh == value) - goto Found; - } - - while (length > 0) - { - length--; - pCh--; - - if (*pCh == value) - goto Found; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Vector.IsHardwareAccelerated && pCh > pEndCh) - { - // Get the highest multiple of Vector.Count that is within the search space. - // That will be how many times we iterate in the loop below. - // This is equivalent to: length = Vector.Count * ((int)(pCh - pEndCh) / Vector.Count) - length = (int)((pCh - pEndCh) & ~(Vector.Count - 1)); - - // Get comparison Vector - Vector vComparison = new Vector(value); - - while (length > 0) - { - char* pStart = pCh - Vector.Count; - // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh (and hence pSart) is always vector aligned - Debug.Assert(((int)pStart & (Unsafe.SizeOf>() - 1)) == 0); - Vector vMatches = Vector.Equals(vComparison, Unsafe.Read>(pStart)); - if (Vector.Zero.Equals(vMatches)) - { - pCh -= Vector.Count; - length -= Vector.Count; - continue; - } - // Find offset of last match - return (int)(pStart - pEndCh) + LocateLastFoundChar(vMatches); - } - - if (pCh > pEndCh) - { - length = (int)(pCh - pEndCh); - goto SequentialScan; - } - } - - return -1; - Found: - return (int)(pCh - pEndCh); - Found1: - return (int)(pCh - pEndCh) + 1; - Found2: - return (int)(pCh - pEndCh) + 2; - Found3: - return (int)(pCh - pEndCh) + 3; - } - } - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int LastIndexOfValueType(ref T searchSpace, T value, int length) where T : IEquatable? - => LastIndexOf(ref searchSpace, value, length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int IndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported, and length is enough to use them so use that path. - // We jump forward to the intrinsics at the end of the method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, as it is used later - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create(value0); - Vector256 values1 = Vector256.Create(value1); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchSpace, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset - offset += (nuint)BitOperations.TrailingZeroCount(matches); - goto Found; - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - else if (Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchSpace, offset); - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)LocateFirstFoundByte(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, int length) - => IndexOfAnyChar(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value0), Unsafe.As(ref value1), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyChar(ref char searchStart, char value0, char value1, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int IndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, byte value2, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported, and length is enough to use them so use that path. - // We jump forward to the intrinsics at the end of the method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, as it is used later - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create(value0); - Vector256 values1 = Vector256.Create(value1); - Vector256 values2 = Vector256.Create(value2); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchSpace, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - Vector128 values2 = Vector128.Create(value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset - offset += (nuint)BitOperations.TrailingZeroCount(matches); - goto Found; - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - Vector128 values2 = Vector128.Create(value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)), - AdvSimd.CompareEqual(values2, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)), - AdvSimd.CompareEqual(values2, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - else if (Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchSpace, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)LocateFirstFoundByte(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, int length) - => IndexOfAnyValueType( - ref Unsafe.As(ref searchSpace), - Unsafe.As(ref value0), - Unsafe.As(ref value1), - Unsafe.As(ref value2), - length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyValueType(ref char searchStart, char value0, char value1, char value2, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - Vector256 values2 = Vector256.Create((ushort)value2); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - Vector128 values2 = Vector128.Create((ushort)value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, short value3, int length) - => IndexOfAnyValueType( - ref Unsafe.As(ref searchSpace), - Unsafe.As(ref value0), - Unsafe.As(ref value1), - Unsafe.As(ref value2), - Unsafe.As(ref value3), - length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyValueType(ref char searchStart, char value0, char value1, char value2, char value3, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - Vector256 values2 = Vector256.Create((ushort)value2); - Vector256 values3 = Vector256.Create((ushort)value3); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // We preform the Or at non-Vector level as we are using the maximum number of non-preserved registers, - // and more causes them first to be pushed to stack and then popped on exit to preseve their values. - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - Vector128 values2 = Vector128.Create((ushort)value2); - Vector128 values3 = Vector128.Create((ushort)value3); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - Vector values3 = new Vector(value3); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)), - Vector.Equals(search, values3)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)), - Vector.Equals(search, values3)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, value2, value3, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value, int length) - => LastIndexOfAnyExcept(ref searchSpace, value, length); - - internal static int LastIndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - Vector search = LoadVector(ref searchSpace, offset - (nuint)Vector.Count); - var matches = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyValueType(ref short searchSpace, short value0, short value1, int length) - => LastIndexOfAny(ref searchSpace, value0, value1, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, length); - - internal static int LastIndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, byte value2, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; - uint uValue2 = value2; - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - Vector search = LoadVector(ref searchSpace, offset - (nuint)Vector.Count); - - var matches = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, int length) - => LastIndexOfAny(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, value2, value3, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadVector128(ref char start, nint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadVector128(ref char start, nuint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (nint)offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 LoadVector256(ref char start, nint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 LoadVector256(ref char start, nuint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (nint)offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ref char Add(ref char start, nuint offset) => ref Unsafe.Add(ref start, (nint)offset); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint FindFirstMatchedLane(Vector128 compareResult) - { - Debug.Assert(AdvSimd.Arm64.IsSupported); - - // Mask to help find the first lane in compareResult that is set. - // MSB 0x10 corresponds to 1st lane, 0x01 corresponds to 0th lane and so forth. - Vector128 mask = Vector128.Create((ushort)0x1001).AsByte(); - - // Find the first lane that is set inside compareResult. - Vector128 maskedSelectedLanes = AdvSimd.And(compareResult, mask); - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(maskedSelectedLanes, maskedSelectedLanes); - ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); - - // It should be handled by compareResult != Vector.Zero - Debug.Assert(selectedLanes != 0); - - // Find the first lane that is set inside compareResult. - return (uint)BitOperations.TrailingZeroCount(selectedLanes) >> 2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int FindFirstMatchedLane(Vector128 compareResult) - { - Debug.Assert(AdvSimd.Arm64.IsSupported); - - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult.AsByte(), compareResult.AsByte()); - ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); - - // It should be handled by compareResult != Vector.Zero - Debug.Assert(selectedLanes != 0); - - return BitOperations.TrailingZeroCount(selectedLanes) >> 3; - } - - // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138 - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int LocateLastFoundChar(Vector match) - { - var vector64 = Vector.AsVectorUInt64(match); - ulong candidate = 0; - int i = Vector.Count - 1; - - // This pattern is only unrolled by the Jit if the limit is Vector.Count - // As such, we need a dummy iteration variable for that condition to be satisfied - for (int j = 0; j < Vector.Count; j++) - { - candidate = vector64[i]; - if (candidate != 0) - { - break; - } - - i--; - } - - // Single LEA instruction with jitted const (using function result) - return i * 4 + LocateLastFoundChar(candidate); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int LocateLastFoundChar(ulong match) - => BitOperations.Log2(match) >> 4; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, int length) - { - nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - return (nuint)(uint)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); - } - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 83a72eef3fbe7b..c6f5e6719bc955 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -1446,7 +1446,6 @@ internal static bool ContainsValueType(ref T searchSpace, T value, int length return false; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfChar(ref char searchSpace, char value, int length) => IndexOfValueType(ref Unsafe.As(ref searchSpace), (short)value, length); @@ -1458,7 +1457,6 @@ internal static int IndexOfValueType(ref T searchSpace, T value, int length) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value, int length) where T : struct, INumber => IndexOfValueType>(ref searchSpace, value, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfValueType(ref TValue searchSpace, TValue value, int length) @@ -1573,7 +1571,6 @@ private static int IndexOfValueType(ref TValue searchSpace, TV return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyChar(ref char searchSpace, char value0, char value1, int length) => IndexOfAnyValueType(ref Unsafe.As(ref searchSpace), (short)value0, (short)value1, length); @@ -1585,7 +1582,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, length); -#endif // having INumber constraint here allows to use == operator and get better perf compared to .Equals [MethodImpl(MethodImplOptions.AggressiveOptimization)] @@ -1724,7 +1720,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); @@ -1732,7 +1727,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, int length) @@ -1870,7 +1864,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); @@ -1878,7 +1871,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, TValue value3, int length) @@ -2114,7 +2106,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfValueType(ref T searchSpace, T value, int length) where T : struct, INumber => LastIndexOfValueType>(ref searchSpace, value, length); @@ -2122,7 +2113,6 @@ internal static int LastIndexOfValueType(ref T searchSpace, T value, int leng [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value, int length) where T : struct, INumber => LastIndexOfValueType>(ref searchSpace, value, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfValueType(ref TValue searchSpace, TValue value, int length) @@ -2234,7 +2224,6 @@ private static int LastIndexOfValueType(ref TValue searchSpace return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, length); @@ -2242,7 +2231,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, int length) @@ -2375,7 +2363,6 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); @@ -2383,7 +2370,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, int length) @@ -2517,7 +2503,6 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); @@ -2525,7 +2510,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, TValue value3, int length) From 76d56b2f9eb16796a4d227f95d4e3c5527e9fd99 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 19:58:04 +0200 Subject: [PATCH 02/13] [System.Span] Refactor hot loop code This would replace code like ``` load b.neq add ret load b.neq add ret load .... ``` with ``` load b.eq load b.eq load ... ``` This makes the code more compact in the hot loop, reduces overall code size and thus improves performance. This pattern is widely used and it was also used before with Span lookups. --- .../src/System/SpanHelpers.Byte.cs | 16 +- .../src/System/SpanHelpers.T.cs | 316 ++++++++++++------ 2 files changed, 235 insertions(+), 97 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 507d27822d5395..cc5e7d4d84f0c5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -986,13 +986,21 @@ public static nuint CommonPrefixLength(ref byte first, ref byte second, nuint le for (; (nint)i <= (nint)length - 4; i += 4) { - if (Unsafe.Add(ref first, i + 0) != Unsafe.Add(ref second, i + 0)) return i + 0; - if (Unsafe.Add(ref first, i + 1) != Unsafe.Add(ref second, i + 1)) return i + 1; - if (Unsafe.Add(ref first, i + 2) != Unsafe.Add(ref second, i + 2)) return i + 2; - if (Unsafe.Add(ref first, i + 3) != Unsafe.Add(ref second, i + 3)) return i + 3; + if (Unsafe.Add(ref first, i + 0) != Unsafe.Add(ref second, i + 0)) goto Found0; + if (Unsafe.Add(ref first, i + 1) != Unsafe.Add(ref second, i + 1)) goto Found1; + if (Unsafe.Add(ref first, i + 2) != Unsafe.Add(ref second, i + 2)) goto Found2; + if (Unsafe.Add(ref first, i + 3) != Unsafe.Add(ref second, i + 3)) goto Found3; } return length; + Found0: + return i; + Found1: + return i + 1; + Found2: + return i + 2; + Found3: + return i + 3; } Debug.Assert(length >= (uint)Vector128.Count); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index c6f5e6719bc955..66bf0618cab3ab 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -1474,14 +1474,14 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 8; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) return (int)offset + 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 4) == value)) return (int)offset + 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 5) == value)) return (int)offset + 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 6) == value)) return (int)offset + 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 7) == value)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) goto Found1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) goto Found2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) goto Found3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 4) == value)) goto Found4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 5) == value)) goto Found5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 6) == value)) goto Found6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 7) == value)) goto Found7; offset += 8; } @@ -1490,10 +1490,10 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) goto Found1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) goto Found2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) goto Found3; offset += 4; } @@ -1502,10 +1502,27 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1605,21 +1622,21 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found3; lookUp = Unsafe.Add(ref current, 4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found4; lookUp = Unsafe.Add(ref current, 5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found5; lookUp = Unsafe.Add(ref current, 6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found6; lookUp = Unsafe.Add(ref current, 7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found7; offset += 8; } @@ -1631,13 +1648,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found3; offset += 4; } @@ -1647,10 +1664,27 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1749,21 +1783,21 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found3; lookUp = Unsafe.Add(ref current, 4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found4; lookUp = Unsafe.Add(ref current, 5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found5; lookUp = Unsafe.Add(ref current, 6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found6; lookUp = Unsafe.Add(ref current, 7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found7; offset += 8; } @@ -1775,13 +1809,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found3; offset += 4; } @@ -1791,10 +1825,27 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1891,13 +1942,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found3; offset += 4; } @@ -1907,10 +1958,19 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; offset += 1; } + return -1; + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2011,13 +2071,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found3; offset += 4; } @@ -2027,10 +2087,20 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found; offset += 1; } + + return -1; + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2130,14 +2200,14 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 8; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) return (int)offset - 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 4) == value)) return (int)offset - 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 5) == value)) return (int)offset - 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 6) == value)) return (int)offset - 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 7) == value)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) goto FoundM1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) goto FoundM2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) goto FoundM3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 4) == value)) goto FoundM4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 5) == value)) goto FoundM5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 6) == value)) goto FoundM6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 7) == value)) goto FoundM7; offset -= 8; } @@ -2146,10 +2216,10 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) goto FoundM1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) goto FoundM2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) goto FoundM3; offset -= 4; } @@ -2158,10 +2228,27 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2253,21 +2340,21 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM3; lookUp = Unsafe.Add(ref current, -4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM4; lookUp = Unsafe.Add(ref current, -5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM5; lookUp = Unsafe.Add(ref current, -6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM6; lookUp = Unsafe.Add(ref current, -7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM7; offset -= 8; } @@ -2279,13 +2366,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM3; offset -= 4; } @@ -2295,10 +2382,27 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2392,21 +2496,21 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM3; lookUp = Unsafe.Add(ref current, -4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM4; lookUp = Unsafe.Add(ref current, -5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM5; lookUp = Unsafe.Add(ref current, -6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM6; lookUp = Unsafe.Add(ref current, -7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM7; offset -= 8; } @@ -2418,13 +2522,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM3; offset -= 4; } @@ -2434,10 +2538,27 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2530,13 +2651,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM3; offset -= 4; } @@ -2546,10 +2667,19 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; offset -= 1; } + return -1; + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { From c7d817fdb54915290ff81b5c0f8515750f8ae9a3 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 19:57:17 +0200 Subject: [PATCH 03/13] [mono][interp] Replace compare + brfalse/brtrue with single conditional branch --- src/mono/mono/mini/interp/transform.c | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 831268c14b3d2e..f59f71d7787bfc 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -9449,6 +9449,34 @@ interp_super_instructions (TransformData *td) } } } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode) && is_short_offset (noe, ins->info.target_bb->native_offset_estimate)) { + if (opcode == MINT_BRFALSE_I4 || opcode == MINT_BRTRUE_I4) { + gboolean negate = opcode == MINT_BRFALSE_I4; + int cond_sreg = ins->sregs [0]; + InterpInst *def = td->locals [cond_sreg].def; + if (def != NULL) { + int replace_opcode = -1; + switch (def->opcode) { + case MINT_CEQ_I4: replace_opcode = negate ? MINT_BNE_UN_I4 : MINT_BEQ_I4; break; + case MINT_CEQ_I8: replace_opcode = negate ? MINT_BNE_UN_I8 : MINT_BEQ_I8; break; + // Add more opcodes + default: + break; + } + if (replace_opcode != -1) { + ins->opcode = replace_opcode; + ins->sregs [0] = def->sregs [0]; + ins->sregs [1] = def->sregs [1]; + interp_clear_ins (def); + if (td->verbose_level) { + g_print ("superins: "); + dump_interp_inst (ins); + } + // The newly added opcode could be part of further superinstructions. Retry + ins = ins->prev; + continue; + } + } + } InterpInst *prev_ins = interp_prev_ins (ins); if (prev_ins && prev_ins->opcode == MINT_SAFEPOINT) { int condbr_op = get_unop_condbr_sp (opcode); @@ -9460,7 +9488,6 @@ interp_super_instructions (TransformData *td) dump_interp_inst (ins); } } - } } else if (opcode == MINT_STOBJ_VT_NOREF) { int sreg_src = ins->sregs [1]; From 300afdb97ee2d7353e85507315a58fbe3cf7a5d2 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 22:16:40 +0200 Subject: [PATCH 04/13] [mono][interp] Dump in/out links for bblocks during verbose logging --- src/mono/mono/mini/interp/transform.c | 33 +++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index f59f71d7787bfc..c6d804a392a923 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1627,6 +1627,32 @@ dump_interp_inst (InterpInst *ins) g_string_free (str, TRUE); } +static GString* +get_interp_bb_links (InterpBasicBlock *bb) +{ + GString *str = g_string_new (""); + + if (bb->in_count) { + g_string_append_printf (str, "IN (%d", bb->in_bb [0]->index); + for (int i = 1; i < bb->in_count; i++) + g_string_append_printf (str, " %d", bb->in_bb [i]->index); + g_string_append_printf (str, "), "); + } else { + g_string_append_printf (str, "IN (nil), "); + } + + if (bb->out_count) { + g_string_append_printf (str, "OUT (%d", bb->out_bb [0]->index); + for (int i = 1; i < bb->out_count; i++) + g_string_append_printf (str, " %d", bb->out_bb [i]->index); + g_string_append_printf (str, ")"); + } else { + g_string_append_printf (str, "OUT (nil)"); + } + + return str; +} + static void dump_interp_bb (InterpBasicBlock *bb) { @@ -8722,8 +8748,11 @@ interp_cprop (TransformData *td) for (ins = bb->first_ins; ins != NULL; ins = ins->next) foreach_local_var (td, ins, local_defs, clear_local_defs); - if (td->verbose_level) - g_print ("BB%d\n", bb->index); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + g_print ("\nBB%d: %s\n", bb->index, bb_info->str); + g_string_free (bb_info, TRUE); + } for (ins = bb->first_ins; ins != NULL; ins = ins->next) { int opcode = ins->opcode; From e9f50e246a20993f422fe5bfdb2014ec974faaa5 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 23:07:29 +0200 Subject: [PATCH 05/13] [mono][interp] Improve detection of dead bblocks Before we were marking bblocks as dead if they had their in_count 0. This is not enough however, since it doesn't account for loops. We now do a full traversal of the bblock graph to detect unreachable bblocks. --- src/mono/mono/mini/interp/transform.c | 43 +++++++++++++++++++++++++-- src/mono/mono/mini/interp/transform.h | 1 + 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index c6d804a392a923..4fb8ff9ee82128 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -686,7 +686,6 @@ interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock { gboolean needs_cprop = FALSE; - g_assert (!bb->in_count); for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { if (ins->opcode == MINT_LDLOCA_S) { td->locals [ins->sregs [0]].indirects--; @@ -696,6 +695,8 @@ interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock } } } + while (bb->in_count) + interp_unlink_bblocks (bb->in_bb [0], bb); while (bb->out_count) interp_unlink_bblocks (bb, bb->out_bb [0]); prev_bb->next_bb = bb->next_bb; @@ -8248,6 +8249,42 @@ generate_compacted_code (TransformData *td) g_ptr_array_free (td->relocs, TRUE); } +static void +interp_mark_reachable_bblocks (TransformData *td) +{ + InterpBasicBlock **queue = mono_mem_manager_alloc0 (td->mem_manager, td->bb_count * sizeof (InterpBasicBlock*)); + InterpBasicBlock *current; + int cur_index = 0; + int next_position = 0; + + // FIXME There is no need to force eh bblocks to remain alive + current = td->entry_bb; + while (current != NULL) { + if (current->eh_block) { + queue [next_position++] = current; + current->reachable = TRUE; + } else { + current->reachable = FALSE; + } + current = current->next_bb; + } + + queue [next_position++] = td->entry_bb; + td->entry_bb->reachable = TRUE; + + // We have the roots, traverse everything else + while (cur_index < next_position) { + current = queue [cur_index++]; + for (int i = 0; i < current->out_count; i++) { + InterpBasicBlock *child = current->out_bb [i]; + if (!child->reachable) { + queue [next_position++] = child; + child->reachable = TRUE; + } + } + } +} + // Traverse the list of basic blocks and merge adjacent blocks static gboolean interp_optimize_bblocks (TransformData *td) @@ -8255,11 +8292,13 @@ interp_optimize_bblocks (TransformData *td) InterpBasicBlock *bb = td->entry_bb; gboolean needs_cprop = FALSE; + interp_mark_reachable_bblocks (td); + while (TRUE) { InterpBasicBlock *next_bb = bb->next_bb; if (!next_bb) break; - if (next_bb->in_count == 0 && !next_bb->eh_block) { + if (!next_bb->reachable) { if (td->verbose_level) g_print ("Removed BB%d\n", next_bb->index); needs_cprop |= interp_remove_bblock (td, next_bb, bb); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 9c78df4fc5dacb..ed2ec0d9f27bbc 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -125,6 +125,7 @@ struct _InterpBasicBlock { SeqPoint **pred_seq_points; guint num_pred_seq_points; + int reachable : 1; // This block has special semantics and it shouldn't be optimized away int eh_block : 1; int dead: 1; From 17c8da03fc2fd5df38c3451c45ded3417caf63a0 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 11:26:55 +0200 Subject: [PATCH 06/13] [mono][interp] Reorder bblocks to facilitate propagation of values Consider for example the following pattern used commonly with conditional branches: ``` br.s [nil <- nil], BB0 ... ceq0.i4 [32 <- 40], br.s [nil <- nil], BB1 BB0: ldc.i4.0 [32 <- nil], BB1: brfalse.i4.s [nil <- 32], BB_EXIT BB2: ldstr [56 <- nil], 2 ``` This commit reorders this code to look like: ``` br.s [nil <- nil], BB0 ... ceq0.i4 [32 <- 40], brfalse.i4.s [nil <- 32], BB_EXIT br.s [nil <- nil], BB2 BB0 ldc.i4.0 [32 <- nil], BB1: brfalse.i4.s [nil <- 32], BB_EXIT BB2: ldstr [56 <- nil], 2 ``` This means we will have duplicated brfalse instructions, but every basic block reaching the conditional branch will have information about the condition. For example ceq0.i4 + brfalse is equivalent to brtrue, ldc.i4.0 + brfalse is equivalent to unconditional branch. After other future optimizations applied on the bblocks graph, like removal, merging and propagation of target, the resulting code in this example would look like: ``` br.s [nil <- nil], BB_EXIT ... brtrue.i4.s [nil <- 40], BB_EXIT BB2: ldstr [56 <- nil], 2 ``` Which is a great simplification over the original code. --- src/mono/mono/mini/interp/transform.c | 110 ++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 6 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 4fb8ff9ee82128..ae41fd689a1f2a 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -280,11 +280,17 @@ interp_clear_ins (InterpInst *ins) ins->opcode = MINT_NOP; } +static gboolean +interp_ins_is_nop (InterpInst *ins) +{ + return ins->opcode == MINT_NOP || ins->opcode == MINT_IL_SEQ_POINT; +} + static InterpInst* interp_prev_ins (InterpInst *ins) { ins = ins->prev; - while (ins && (ins->opcode == MINT_NOP || ins->opcode == MINT_IL_SEQ_POINT)) + while (ins && interp_ins_is_nop (ins)) ins = ins->prev; return ins; } @@ -300,6 +306,26 @@ check_stack_helper (TransformData *td, int n) return TRUE; } +static InterpInst* +interp_first_ins (InterpBasicBlock *bb) +{ + InterpInst *ins = bb->first_ins; + if (!ins || !interp_ins_is_nop (ins)) + return ins; + while (ins && interp_ins_is_nop (ins)) + ins = ins->next; + return ins; +} + +static InterpInst* +interp_last_ins (InterpBasicBlock *bb) +{ + InterpInst *ins = bb->last_ins; + if (!ins || !interp_ins_is_nop (ins)) + return ins; + return interp_prev_ins (ins); +} + #define CHECK_STACK(td, n) \ do { \ if (!check_stack_helper (td, n)) \ @@ -3617,6 +3643,18 @@ interp_field_from_token (MonoMethod *method, guint32 token, MonoClass **klass, M return field; } +static InterpBasicBlock* +alloc_bb (TransformData *td) +{ + InterpBasicBlock *bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + bb->il_offset = -1; + bb->native_offset = -1; + bb->stack_height = -1; + bb->index = td->bb_count++; + + return bb; +} + static InterpBasicBlock* get_bb (TransformData *td, unsigned char *ip, gboolean make_list) { @@ -3624,13 +3662,10 @@ get_bb (TransformData *td, unsigned char *ip, gboolean make_list) InterpBasicBlock *bb = td->offset_to_bb [offset]; if (!bb) { - bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + bb = alloc_bb (td); + bb->il_offset = offset; - bb->native_offset = -1; - bb->stack_height = -1; - bb->index = td->bb_count++; td->offset_to_bb [offset] = bb; - /* Add the blocks in reverse order */ if (make_list) td->basic_blocks = g_list_prepend_mempool (td->mempool, td->basic_blocks, bb); @@ -8285,6 +8320,67 @@ interp_mark_reachable_bblocks (TransformData *td) } } +static void +interp_reorder_bblocks (TransformData *td) +{ + InterpBasicBlock *bb; + + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + InterpInst *first = interp_first_ins (bb); + if (first && MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { + // This means this bblock has a single instruction, the conditional branch + int i = 0; + while (i < bb->in_count) { + InterpBasicBlock *in_bb = bb->in_bb [i]; + InterpInst *last_ins = interp_last_ins (in_bb); + if (last_ins && last_ins->opcode == MINT_BR) { + // This bblock is reached unconditionally from one of its parents + // Move the conditional branch inside the parent to facilitate propagation + // of condition value. + InterpBasicBlock *cond_true_bb = first->info.target_bb; + InterpBasicBlock *next_bb = bb->next_bb; + + // parent bb will do the conditional branch + interp_unlink_bblocks (in_bb, bb); + last_ins->opcode = first->opcode; + last_ins->sregs [0] = first->sregs [0]; + last_ins->sregs [1] = first->sregs [1]; + last_ins->info.target_bb = cond_true_bb; + interp_link_bblocks (td, in_bb, cond_true_bb); + + // Create new fallthrough bb between in_bb and in_bb->next_bb + InterpBasicBlock *new_bb = alloc_bb (td); + new_bb->next_bb = in_bb->next_bb; + in_bb->next_bb = new_bb; + interp_link_bblocks (td, in_bb, new_bb); + + + InterpInst *new_inst = interp_insert_ins_bb (td, new_bb, NULL, MINT_BR); + new_inst->info.target_bb = next_bb; + + interp_link_bblocks (td, new_bb, next_bb); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + GString* in_bb_info = get_interp_bb_links (in_bb); + GString* new_bb_info = get_interp_bb_links (new_bb); + g_print ("Moved cond branch BB%d into BB%d, new BB%d\n", bb->index, in_bb->index, new_bb->index); + g_print ("\tBB%d: %s\n", bb->index, bb_info->str); + g_print ("\tBB%d: %s\n", in_bb->index, in_bb_info->str); + g_print ("\tBB%d: %s\n", new_bb->index, new_bb_info->str); + g_string_free (bb_info, TRUE); + g_string_free (in_bb_info, TRUE); + g_string_free (new_bb_info, TRUE); + } + // Since we changed links, in_bb might have changed, loop again from the start + i = 0; + } else { + i++; + } + } + } + } +} + // Traverse the list of basic blocks and merge adjacent blocks static gboolean interp_optimize_bblocks (TransformData *td) @@ -8292,6 +8388,8 @@ interp_optimize_bblocks (TransformData *td) InterpBasicBlock *bb = td->entry_bb; gboolean needs_cprop = FALSE; + interp_reorder_bblocks (td); + interp_mark_reachable_bblocks (td); while (TRUE) { From 83b2d059db4431134a1fa4719cab3dddffe6c8bc Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 15 Nov 2022 17:22:33 +0200 Subject: [PATCH 07/13] [mono][interp] Don't optimize out bblocks that are tiering patchpoint targets Even though they can be become unreachable in the current method, they can still be called when the unoptimized method gets tiered at this point. Add assert to prevent such issues in the future. --- src/mono/mono/mini/interp/tiering.c | 4 +++- src/mono/mono/mini/interp/transform.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/interp/tiering.c b/src/mono/mono/mini/interp/tiering.c index 7830c66c4c1110..1aa02dd341efeb 100644 --- a/src/mono/mono/mini/interp/tiering.c +++ b/src/mono/mono/mini/interp/tiering.c @@ -209,5 +209,7 @@ mono_interp_tier_up_frame_patchpoint (InterpFrame *frame, ThreadContext *context } context->stack_pointer = (guchar*)frame->stack + optimized_method->alloca_size; frame->imethod = optimized_method; - return optimized_method->code + lookup_patchpoint_data (optimized_method, bb_index); + int offset = lookup_patchpoint_data (optimized_method, bb_index); + g_assert (offset != G_MAXINT32); + return optimized_method->code + offset; } diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index ae41fd689a1f2a..2bd2879371006e 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8295,7 +8295,7 @@ interp_mark_reachable_bblocks (TransformData *td) // FIXME There is no need to force eh bblocks to remain alive current = td->entry_bb; while (current != NULL) { - if (current->eh_block) { + if (current->eh_block || current->patchpoint_data) { queue [next_position++] = current; current->reachable = TRUE; } else { @@ -8401,7 +8401,7 @@ interp_optimize_bblocks (TransformData *td) g_print ("Removed BB%d\n", next_bb->index); needs_cprop |= interp_remove_bblock (td, next_bb, bb); continue; - } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block) { + } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block && !next_bb->patchpoint_data) { g_assert (next_bb->in_bb [0] == bb); interp_merge_bblocks (td, bb, next_bb); if (td->verbose_level) From fe8288a55239eead691cd12e1aa8b132830fa6f3 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 15 Nov 2022 17:32:45 +0200 Subject: [PATCH 08/13] [mono][interp] Make bblock reordering more conservative If we are unlikely to gain anything from propagating the condition (if we don't have information about any of the condition operand vars), simply avoid the optimization. --- src/mono/mono/mini/interp/transform.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 2bd2879371006e..1f431534d4d08b 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8320,6 +8320,20 @@ interp_mark_reachable_bblocks (TransformData *td) } } +static gboolean +interp_prev_ins_defines_var (InterpInst *ins, int var1, int var2) +{ + // Check max of 5 instructions + for (int i = 0; i < 5; i++) { + ins = interp_prev_ins (ins); + if (!ins) + return FALSE; + if (mono_interp_op_dregs [ins->opcode] && (ins->dreg == var1 || ins->dreg == var2)) + return TRUE; + } + return FALSE; +} + static void interp_reorder_bblocks (TransformData *td) { @@ -8330,10 +8344,11 @@ interp_reorder_bblocks (TransformData *td) if (first && MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { // This means this bblock has a single instruction, the conditional branch int i = 0; + int lookup_var2 = (mono_interp_op_dregs [first->opcode] > 1) ? first->sregs [1] : -1; while (i < bb->in_count) { InterpBasicBlock *in_bb = bb->in_bb [i]; InterpInst *last_ins = interp_last_ins (in_bb); - if (last_ins && last_ins->opcode == MINT_BR) { + if (last_ins && last_ins->opcode == MINT_BR && interp_prev_ins_defines_var (last_ins, first->sregs [0], lookup_var2)) { // This bblock is reached unconditionally from one of its parents // Move the conditional branch inside the parent to facilitate propagation // of condition value. From 65feed01276cac6f8ce413586c5c03318cd6d485 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 20:31:52 +0200 Subject: [PATCH 09/13] [mono][interp] Add basic removal of unused defines If we store in a var and this var is not used and redefined by the end of the basic block, then we can clear the original store. --- src/mono/mono/mini/interp/mintops.h | 3 +++ src/mono/mono/mini/interp/transform.c | 19 +++++++++++++++---- src/mono/mono/mini/interp/transform.h | 2 ++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index 974cd3c7f457f3..44ff6c5cbe144a 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -80,6 +80,9 @@ typedef enum { #define MINT_IS_STIND_INT(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_I8) #define MINT_IS_STIND(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_REF) +// TODO Add more +#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_MONO_LDPTR) + #define MINT_CALL_ARGS 2 #define MINT_CALL_ARGS_SREG -2 diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 1f431534d4d08b..ccbd9a90d1b30d 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8455,10 +8455,7 @@ interp_local_deadce (TransformData *td) // Kill instructions that don't use stack and are storing into dead locals for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { - if (MINT_IS_MOV (ins->opcode) || - MINT_IS_LDC_I4 (ins->opcode) || - MINT_IS_LDC_I8 (ins->opcode) || - ins->opcode == MINT_MONO_LDPTR || + if (MINT_NO_SIDE_EFFECTS (ins->opcode) || ins->opcode == MINT_LDLOCA_S) { int dreg = ins->dreg; if (td->locals [dreg].flags & INTERP_LOCAL_FLAG_DEAD) { @@ -8847,6 +8844,7 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de int sreg = *psreg; local_ref_count [sreg]++; + local_defs [sreg].ref_count++; if (local_defs [sreg].type == LOCAL_VALUE_LOCAL) { int cprop_local = local_defs [sreg].local; @@ -8871,6 +8869,7 @@ clear_local_defs (TransformData *td, int var, void *data) LocalValue *local_defs = (LocalValue*) data; local_defs [var].type = LOCAL_VALUE_NONE; local_defs [var].ins = NULL; + local_defs [var].ref_count = 0; } static void @@ -8940,6 +8939,18 @@ interp_cprop (TransformData *td) } if (num_dregs) { + // Check if the previous definition of this var was used at all. + // If it wasn't we can just clear the instruction + if (local_defs [dreg].ins != NULL && + local_defs [dreg].ref_count == 0 && + !td->locals [dreg].indirects) { + InterpInst *prev_def = local_defs [dreg].ins; + if (MINT_NO_SIDE_EFFECTS (prev_def->opcode)) { + for (int i = 0; i < mono_interp_op_sregs [prev_def->opcode]; i++) + local_ref_count [prev_def->sregs [i]]--; + interp_clear_ins (prev_def); + } + } local_defs [dreg].type = LOCAL_VALUE_NONE; local_defs [dreg].ins = ins; local_defs [dreg].def_index = ins_index; diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index ed2ec0d9f27bbc..00869c73ad53d3 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -59,6 +59,8 @@ typedef struct { // The instruction that writes this local. InterpInst *ins; int def_index; + // ref count for ins->dreg + int ref_count; } LocalValue; struct _InterpInst { From 3f7bd3d1e143db3439b50eb7ea0a39c17f4808b7 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 21:40:32 +0200 Subject: [PATCH 10/13] [mono][interp] Clear unused defines of local only vars We detect if a var's value never escapes the definition of a bblock. We mark such vars and clear unused definitions of that var from other bblocks. --- src/mono/mono/mini/interp/transform.c | 54 ++++++++++++++++++++++++--- src/mono/mono/mini/interp/transform.h | 3 ++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index ccbd9a90d1b30d..a1a92eaac9dbdb 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8440,17 +8440,26 @@ interp_local_deadce (TransformData *td) for (unsigned int i = 0; i < td->locals_size; i++) { g_assert (local_ref_count [i] >= 0); g_assert (td->locals [i].indirects >= 0); - if (!local_ref_count [i] && - !td->locals [i].indirects && - (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD) == 0) { + if (td->locals [i].indirects || (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD)) + continue; + if (!local_ref_count [i]) { needs_dce = TRUE; td->locals [i].flags |= INTERP_LOCAL_FLAG_DEAD; + } else if (!(td->locals [i].flags & INTERP_LOCAL_FLAG_UNKNOWN_USE)) { + if (!(td->locals [i].flags & INTERP_LOCAL_FLAG_LOCAL_ONLY)) { + // The value of this var is not passed between multiple basic blocks + td->locals [i].flags |= INTERP_LOCAL_FLAG_LOCAL_ONLY; + if (td->verbose_level) + g_print ("Var %d is local only\n", i); + needs_cprop = TRUE; + } } + td->locals [i].flags &= ~INTERP_LOCAL_FLAG_UNKNOWN_USE; } // Return early if all locals are alive if (!needs_dce) - return FALSE; + return needs_cprop; // Kill instructions that don't use stack and are storing into dead locals for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { @@ -8860,6 +8869,8 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de local_ref_count [cprop_local]++; if (td->verbose_level) dump_interp_inst (ins); + } else if (!local_defs [sreg].ins) { + td->locals [sreg].flags |= INTERP_LOCAL_FLAG_UNKNOWN_USE; } } @@ -8872,6 +8883,34 @@ clear_local_defs (TransformData *td, int var, void *data) local_defs [var].ref_count = 0; } +static void +clear_unused_defs (TransformData *td, int var, void *data) +{ + if (!(td->locals [var].flags & INTERP_LOCAL_FLAG_LOCAL_ONLY)) + return; + if (td->locals [var].indirects) + return; + + LocalValue *local_def = &((LocalValue*) data) [var]; + InterpInst *def_ins = local_def->ins; + if (!def_ins) + return; + if (local_def->ref_count) + return; + + // This is a local only var that is defined in this bblock and its value is not used + // at all in this bblock. Clear the definition + if (MINT_NO_SIDE_EFFECTS (def_ins->opcode)) { + for (int i = 0; i < mono_interp_op_sregs [def_ins->opcode]; i++) + td->local_ref_count [def_ins->sregs [i]]--; + if (td->verbose_level) { + g_print ("kill unused local def:\n\t"); + dump_interp_inst (def_ins); + } + interp_clear_ins (def_ins); + } +} + static void interp_cprop (TransformData *td) { @@ -8880,6 +8919,7 @@ interp_cprop (TransformData *td) InterpBasicBlock *bb; gboolean needs_retry; int ins_index; + int iteration_count = 0; td->local_ref_count = local_ref_count; retry: @@ -8887,7 +8927,7 @@ interp_cprop (TransformData *td) memset (local_ref_count, 0, td->locals_size * sizeof (int)); if (td->verbose_level) - g_print ("\ncprop iteration\n"); + g_print ("\ncprop iteration %d\n", iteration_count++); for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *ins; @@ -9285,8 +9325,12 @@ interp_cprop (TransformData *td) needs_retry = TRUE; } } + ins_index++; } + + for (ins = bb->first_ins; ins != NULL; ins = ins->next) + foreach_local_var (td, ins, local_defs, clear_unused_defs); } needs_retry |= interp_local_deadce (td); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 00869c73ad53d3..7c080406ab5c61 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -21,6 +21,9 @@ #define INTERP_LOCAL_FLAG_GLOBAL 8 #define INTERP_LOCAL_FLAG_NO_CALL_ARGS 16 +#define INTERP_LOCAL_FLAG_UNKNOWN_USE 32 +#define INTERP_LOCAL_FLAG_LOCAL_ONLY 64 + typedef struct _InterpInst InterpInst; typedef struct _InterpBasicBlock InterpBasicBlock; From 7b417393cfd5fc062d93b117d540f2e64cd70a6d Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 22:22:47 +0200 Subject: [PATCH 11/13] [mono][interp] Propagate target branches If a bblock contains only an unconditional br, then all bblocks branching into it can just call the target directly instead. --- src/mono/mono/mini/interp/transform.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index a1a92eaac9dbdb..7820b8ac9715b2 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8341,7 +8341,9 @@ interp_reorder_bblocks (TransformData *td) for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *first = interp_first_ins (bb); - if (first && MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { + if (!first) + continue; + if (MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { // This means this bblock has a single instruction, the conditional branch int i = 0; int lookup_var2 = (mono_interp_op_dregs [first->opcode] > 1) ? first->sregs [1] : -1; @@ -8392,6 +8394,36 @@ interp_reorder_bblocks (TransformData *td) i++; } } + } else if (first->opcode == MINT_BR) { + // All bblocks jumping into this bblock can jump directly into the br target + int i = 0; + while (i < bb->in_count) { + InterpBasicBlock *in_bb = bb->in_bb [i]; + InterpInst *last_ins = interp_last_ins (in_bb); + if (last_ins && (MINT_IS_CONDITIONAL_BRANCH (last_ins->opcode) || + MINT_IS_UNCONDITIONAL_BRANCH (last_ins->opcode)) && + last_ins->info.target_bb == bb) { + InterpBasicBlock *target_bb = first->info.target_bb; + last_ins->info.target_bb = target_bb; + interp_unlink_bblocks (in_bb, bb); + interp_link_bblocks (td, in_bb, target_bb); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + GString* in_bb_info = get_interp_bb_links (in_bb); + GString* target_bb_info = get_interp_bb_links (target_bb); + g_print ("Propagated target bb BB%d into BB%d\n", target_bb->index, in_bb->index); + g_print ("\tBB%d: %s\n", bb->index, bb_info->str); + g_print ("\tBB%d: %s\n", in_bb->index, in_bb_info->str); + g_print ("\tBB%d: %s\n", target_bb->index, target_bb_info->str); + g_string_free (bb_info, TRUE); + g_string_free (in_bb_info, TRUE); + g_string_free (target_bb_info, TRUE); + } + i = 0; + } else { + i++; + } + } } } } From 3558b6dddeace2929c07362a8bd8baf957438dac Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sat, 3 Dec 2022 21:18:38 +0200 Subject: [PATCH 12/13] [mono][interp] Add super instruction for (var + ct1) * ct2 This pattern is used in low level unsafe code when using (var + ct1) as an index into an array, where ct2 is the sizeof of array element. Also fix diplay of two shorts when dumping instructions. --- src/mono/mono/mini/interp/interp.c | 8 ++++++++ src/mono/mono/mini/interp/mintops.def | 3 +++ src/mono/mono/mini/interp/transform.c | 23 ++++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 0976c9bf5e466b..e93261901b51a5 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -5111,6 +5111,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) * (gint16)ip [3]; ip += 4; MINT_IN_BREAK; + MINT_IN_CASE(MINT_ADD_MUL_I4_IMM) + LOCAL_VAR (ip [1], gint32) = (LOCAL_VAR (ip [2], gint32) + (gint16)ip [3]) * (gint16)ip [4]; + ip += 5; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_ADD_MUL_I8_IMM) + LOCAL_VAR (ip [1], gint64) = (LOCAL_VAR (ip [2], gint64) + (gint16)ip [3]) * (gint16)ip [4]; + ip += 5; + MINT_IN_BREAK; MINT_IN_CASE(MINT_MUL_R4) BINOP(float, *); MINT_IN_BREAK; diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 56080ef6ca50d9..27b74620b41a2c 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -644,6 +644,9 @@ OPDEF(MINT_RET_I8_IMM, "ret.i8.imm", 2, 0, 0, MintOpShortInt) OPDEF(MINT_ADD_I4_IMM, "add.i4.imm", 4, 1, 1, MintOpShortInt) OPDEF(MINT_ADD_I8_IMM, "add.i8.imm", 4, 1, 1, MintOpShortInt) +OPDEF(MINT_ADD_MUL_I4_IMM, "add.mul.i4.imm", 5, 1, 1, MintOpTwoShorts) +OPDEF(MINT_ADD_MUL_I8_IMM, "add.mul.i8.imm", 5, 1, 1, MintOpTwoShorts) + OPDEF(MINT_MUL_I4_IMM, "mul.i4.imm", 4, 1, 1, MintOpShortInt) OPDEF(MINT_MUL_I8_IMM, "mul.i8.imm", 4, 1, 1, MintOpShortInt) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 7820b8ac9715b2..5c2b9744babffc 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1479,7 +1479,7 @@ dump_interp_ins_data (InterpInst *ins, gint32 ins_offset, const guint16 *data, i g_string_append_printf (str, " %u", *(guint16*)data); break; case MintOpTwoShorts: - g_string_append_printf (str, " %u,%u", *(guint16*)data, *(guint16 *)(data + 1)); + g_string_append_printf (str, " %d,%d", *(gint16*)data, *(gint16 *)(data + 1)); break; case MintOpTwoInts: g_string_append_printf (str, " %u,%u", (guint32)READ32(data), (guint32)READ32(data + 2)); @@ -9586,6 +9586,27 @@ interp_super_instructions (TransformData *td) dump_interp_inst (new_inst); } } + } else if (opcode == MINT_MUL_I4_IMM || opcode == MINT_MUL_I8_IMM) { + int sreg = ins->sregs [0]; + InterpInst *def = td->locals [sreg].def; + if (def != NULL && td->local_ref_count [sreg] == 1) { + gboolean is_i4 = opcode == MINT_MUL_I4_IMM; + if ((is_i4 && def->opcode == MINT_ADD_I4_IMM) || + (!is_i4 && def->opcode == MINT_ADD_I8_IMM)) { + InterpInst *new_inst = interp_insert_ins (td, ins, is_i4 ? MINT_ADD_MUL_I4_IMM : MINT_ADD_MUL_I8_IMM); + new_inst->dreg = ins->dreg; + new_inst->sregs [0] = def->sregs [0]; + new_inst->data [0] = def->data [0]; + new_inst->data [1] = ins->data [0]; + interp_clear_ins (def); + interp_clear_ins (ins); + local_ref_count [sreg]--; + if (td->verbose_level) { + g_print ("superins: "); + dump_interp_inst (new_inst); + } + } + } } else if (MINT_IS_BINOP_SHIFT (opcode)) { // ldc + sh -> sh.imm gint16 imm; From 32b8723666eaabd753ef0b4d83f8d0e57fa8102d Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sun, 4 Dec 2022 19:54:31 +0200 Subject: [PATCH 13/13] [mono][interp] Add new ldind super instruction These new instructions can apply addition and multiplication with constant to the offset var. --- src/mono/mono/mini/interp/interp.c | 35 +++++++++++++++++++++++ src/mono/mono/mini/interp/mintops.def | 7 +++++ src/mono/mono/mini/interp/mintops.h | 1 + src/mono/mono/mini/interp/transform.c | 41 +++++++++++++++++++++++++++ 4 files changed, 84 insertions(+) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index e93261901b51a5..939799f3dc84b8 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -4910,6 +4910,41 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; #endif MINT_IN_BREAK; +#define LDIND_OFFSET_ADD_MUL(datatype,casttype,unaligned) do { \ + MONO_DISABLE_WARNING(4127) \ + gpointer ptr = LOCAL_VAR (ip [2], gpointer); \ + NULL_CHECK (ptr); \ + ptr = (char*)ptr + (LOCAL_VAR (ip [3], mono_i) + (gint16)ip [4]) * (gint16)ip [5]; \ + if (unaligned && ((gsize)ptr % SIZEOF_VOID_P)) \ + memcpy (locals + ip [1], ptr, sizeof (datatype)); \ + else \ + LOCAL_VAR (ip [1], datatype) = *(casttype*)ptr; \ + ip += 6; \ + MONO_RESTORE_WARNING \ +} while (0) + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I1) + LDIND_OFFSET_ADD_MUL(gint32, gint8, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_U1) + LDIND_OFFSET_ADD_MUL(gint32, guint8, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I2) + LDIND_OFFSET_ADD_MUL(gint32, gint16, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_U2) + LDIND_OFFSET_ADD_MUL(gint32, guint16, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I4) + LDIND_OFFSET_ADD_MUL(gint32, gint32, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I8) +#ifdef NO_UNALIGNED_ACCESS + LDIND_OFFSET_ADD_MUL(gint64, gint64, TRUE); +#else + LDIND_OFFSET_ADD_MUL(gint64, gint64, FALSE); +#endif + MINT_IN_BREAK; + #define LDIND_OFFSET_IMM(datatype,casttype,unaligned) do { \ MONO_DISABLE_WARNING(4127) \ gpointer ptr = LOCAL_VAR (ip [2], gpointer); \ diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 27b74620b41a2c..edeac370a61722 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -156,6 +156,13 @@ OPDEF(MINT_LDIND_OFFSET_IMM_U2, "ldind_off_imm.u2", 4, 1, 1, MintOpShortInt) OPDEF(MINT_LDIND_OFFSET_IMM_I4, "ldind_off_imm.i4", 4, 1, 1, MintOpShortInt) OPDEF(MINT_LDIND_OFFSET_IMM_I8, "ldind_off_imm.i8", 4, 1, 1, MintOpShortInt) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I1, "ldind_off_add_mul_imm.i1", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_U1, "ldind_off_add_mul_imm.u1", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I2, "ldind_off_add_mul_imm.i2", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_U2, "ldind_off_add_mul_imm.u2", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I4, "ldind_off_add_mul_imm.i4", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I8, "ldind_off_add_mul_imm.i8", 6, 1, 2, MintOpTwoShorts) + OPDEF(MINT_STIND_I1, "stind.i1", 3, 0, 2, MintOpNoArgs) OPDEF(MINT_STIND_I2, "stind.i2", 3, 0, 2, MintOpNoArgs) OPDEF(MINT_STIND_I4, "stind.i4", 3, 0, 2, MintOpNoArgs) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index 44ff6c5cbe144a..caea1370af8a1e 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -79,6 +79,7 @@ typedef enum { #define MINT_IS_LDIND(op) ((op) >= MINT_LDIND_I1 && (op) <= MINT_LDIND_R8) #define MINT_IS_STIND_INT(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_I8) #define MINT_IS_STIND(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_REF) +#define MINT_IS_LDIND_OFFSET(op) ((op) >= MINT_LDIND_OFFSET_I1 && (op) <= MINT_LDIND_OFFSET_I8) // TODO Add more #define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_MONO_LDPTR) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 5c2b9744babffc..0716e6c2401807 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -145,6 +145,8 @@ MonoInterpStats mono_interp_stats; #define MINT_STIND_I MINT_STIND_I8 #define MINT_LDELEM_I MINT_LDELEM_I8 #define MINT_STELEM_I MINT_STELEM_I8 +#define MINT_MUL_P_IMM MINT_MUL_I8_IMM +#define MINT_ADD_MUL_P_IMM MINT_ADD_MUL_I8_IMM #else #define MINT_MOV_P MINT_MOV_4 #define MINT_LDNULL MINT_LDC_I4_0 @@ -152,6 +154,8 @@ MonoInterpStats mono_interp_stats; #define MINT_STIND_I MINT_STIND_I4 #define MINT_LDELEM_I MINT_LDELEM_I4 #define MINT_STELEM_I MINT_STELEM_I4 +#define MINT_MUL_P_IMM MINT_MUL_I4_IMM +#define MINT_ADD_MUL_P_IMM MINT_ADD_MUL_I4_IMM #endif static const char *stack_type_string [] = { "I4", "I8", "R4", "R8", "O ", "VT", "MP", "F " }; @@ -9654,6 +9658,43 @@ interp_super_instructions (TransformData *td) } } } + } else if (MINT_IS_LDIND_OFFSET (opcode)) { + int sreg_off = ins->sregs [1]; + InterpInst *def = td->locals [sreg_off].def; + if (def != NULL && td->local_ref_count [sreg_off] == 1) { + if (def->opcode == MINT_MUL_P_IMM || def->opcode == MINT_ADD_P_IMM || def->opcode == MINT_ADD_MUL_P_IMM) { + int ldind_offset_op = MINT_LDIND_OFFSET_ADD_MUL_IMM_I1 + (opcode - MINT_LDIND_OFFSET_I1); + InterpInst *new_inst = interp_insert_ins (td, ins, ldind_offset_op); + new_inst->dreg = ins->dreg; + new_inst->sregs [0] = ins->sregs [0]; // base + new_inst->sregs [1] = def->sregs [0]; // off + + // set the add and mul immediates + switch (def->opcode) { + case MINT_ADD_P_IMM: + new_inst->data [0] = def->data [0]; + new_inst->data [1] = 1; + break; + case MINT_MUL_P_IMM: + new_inst->data [0] = 0; + new_inst->data [1] = def->data [0]; + break; + case MINT_ADD_MUL_P_IMM: + new_inst->data [0] = def->data [0]; + new_inst->data [1] = def->data [1]; + break; + } + + interp_clear_ins (def); + interp_clear_ins (ins); + local_ref_count [sreg_off]--; + mono_interp_stats.super_instructions++; + if (td->verbose_level) { + g_print ("method %s:%s, superins: ", m_class_get_name (td->method->klass), td->method->name); + dump_interp_inst (new_inst); + } + } + } } else if (MINT_IS_STIND_INT (opcode)) { int sreg_base = ins->sregs [0]; InterpInst *def = td->locals [sreg_base].def;