Skip to content

Commit 191485c

Browse files
[release/5.0-preview8] ARM64 HWIntrinsic usage in System.Text.Unicode (#39738)
* AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirstInvalidChar (#39050) * AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirstInvalidChar * Move using directive outside #if. Improve Arm64MoveMask. * Change overloads * UIn64 in Arm64MoveMask * Build error implicit conversion fix * Rename method and use simpler version * Use ShiftRightArithmetic instead of CompareEqual + And. * Remove unnecessary comment * Add missing shims causing Linux build to fail * AdvSimd support for System.Text.Unicode.Utf8Utility.TranscodeToUtf8 (#39041) * AdvSimd support for System.Text.Unicode.Utf8Utility.TranscodeToUtf8 * Readd using to prevent build failure. Add AdvSimd equivalent operation to TestZ. * Inverted condition * Address IsSupported order, improve use ExtractNarrowingSaturated usage * Rename source to result, second argument utf16Data * Improve CompareTest * Add shims causing failures in Linux * Use unsigned version of ExtractNarrowingSaturate, avoid using MinAcross and use MaxPairwise instead * Missing support check for Sse2.X64 * Add missing case for AdvSimd * Use MinPairwise for short * AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte (#38653) * AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte * Move comment to the top, add shims. * Little endian checks * Use custom MoveMask method for AdvSimd * Address suggestions to improve the AdvSimdMoveMask method * Define initialMask outside MoveMask method * UInt64 in Arm64MoveMask * Add unit test case to verify intrinsics improvement * Avoid casting to smaller integer type * Typo and comment * Use ShiftRightArithmetic instead of CompareEqual + And. Remove test case causing other unit tests to fail. * Use AddPairwise version of GetNotAsciiBytes * Add missing shims causing Linux build to fail * Simplify GetNonAsciiBytes to only one AddPairwise call, shorter bitmask * Respect data type returned by masking method * Address suggestions - assert trailingzerocount and bring back uint mask * Trailing zeroes in AdvSimd need to be divided by 4, and total number should not be larger than 16 * Avoid declaring static field which causes PNSE in Utf8String.Experimental (S.P.Corelib code is used for being NetStandard) * Prefer using nuint for BitConverter.TrailingZeroCount * Fix build failure in net472 debug AdvSimd Utf16Utility (#39652) Co-authored-by: Carlos Sanchez Lopez <[email protected]>
1 parent 498c798 commit 191485c

File tree

4 files changed

+181
-41
lines changed

4 files changed

+181
-41
lines changed

src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Diagnostics;
55
using System.Runtime.CompilerServices;
66
using System.Runtime.Intrinsics;
7+
using System.Runtime.Intrinsics.Arm;
78
using System.Runtime.Intrinsics.X86;
89
using System.Numerics;
910

@@ -78,7 +79,7 @@ static Utf16Utility()
7879
long tempUtf8CodeUnitCountAdjustment = 0;
7980
int tempScalarCountAdjustment = 0;
8081

81-
if (Sse2.IsSupported)
82+
if ((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported)
8283
{
8384
if (inputLength >= Vector128<ushort>.Count)
8485
{
@@ -87,17 +88,34 @@ static Utf16Utility()
8788
Vector128<short> vector8800 = Vector128.Create(unchecked((short)0x8800));
8889
Vector128<ushort> vectorZero = Vector128<ushort>.Zero;
8990

91+
Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
92+
Vector128.Create(0x80402010_08040201).AsByte() :
93+
Vector128.Create(0x01020408_10204080).AsByte();
94+
9095
do
9196
{
92-
Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
93-
uint mask;
97+
Vector128<ushort> utf16Data;
98+
if (AdvSimd.Arm64.IsSupported)
99+
{
100+
utf16Data = AdvSimd.LoadVector128((ushort*)pInputBuffer); // unaligned
101+
}
102+
else
103+
{
104+
utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
105+
}
94106

95107
Vector128<ushort> charIsNonAscii;
96-
if (Sse41.IsSupported)
108+
109+
if (AdvSimd.Arm64.IsSupported)
110+
{
111+
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
112+
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
113+
charIsNonAscii = AdvSimd.Min(utf16Data, vector0080);
114+
}
115+
else if (Sse41.IsSupported)
97116
{
98117
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
99118
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
100-
101119
charIsNonAscii = Sse41.Min(utf16Data, vector0080);
102120
}
103121
else
@@ -111,16 +129,34 @@ static Utf16Utility()
111129

112130
#if DEBUG
113131
// Quick check to ensure we didn't accidentally set the 0x8000 bit of any element.
114-
uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
132+
uint debugMask;
133+
if (AdvSimd.Arm64.IsSupported)
134+
{
135+
debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte(), bitMask128);
136+
}
137+
else
138+
{
139+
debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
140+
}
115141
Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'.");
116142
#endif // DEBUG
117143

118144
// Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding
119145
// input was 0x0800 <= [value]. This also handles the missing range a few lines above.
120146

121-
Vector128<ushort> charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
147+
Vector128<ushort> charIsThreeByteUtf8Encoded;
148+
uint mask;
122149

123-
mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
150+
if (AdvSimd.IsSupported)
151+
{
152+
charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11));
153+
mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte(), bitMask128);
154+
}
155+
else
156+
{
157+
charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
158+
mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
159+
}
124160

125161
// Each even bit of mask will be 1 only if the char was >= 0x0080,
126162
// and each odd bit of mask will be 1 only if the char was >= 0x0800.
@@ -151,9 +187,16 @@ static Utf16Utility()
151187
// Surrogates need to be special-cased for two reasons: (a) we need
152188
// to account for the fact that we over-counted in the addition above;
153189
// and (b) they require separate validation.
154-
155-
utf16Data = Sse2.Add(utf16Data, vectorA800);
156-
mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
190+
if (AdvSimd.Arm64.IsSupported)
191+
{
192+
utf16Data = AdvSimd.Add(utf16Data, vectorA800);
193+
mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte(), bitMask128);
194+
}
195+
else
196+
{
197+
utf16Data = Sse2.Add(utf16Data, vectorA800);
198+
mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
199+
}
157200

158201
if (mask != 0)
159202
{
@@ -178,7 +221,15 @@ static Utf16Utility()
178221
// Since 'mask' already has 00 in these positions (since the corresponding char
179222
// wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.
180223

181-
uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
224+
uint mask2;
225+
if (AdvSimd.Arm64.IsSupported)
226+
{
227+
mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte(), bitMask128);
228+
}
229+
else
230+
{
231+
mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
232+
}
182233

183234
// 'lowSurrogatesMask' has its bits occur in pairs:
184235
// - 01 if the corresponding char was a low surrogate char,
@@ -433,5 +484,20 @@ static Utf16Utility()
433484
scalarCountAdjustment = tempScalarCountAdjustment;
434485
return pInputBuffer;
435486
}
487+
488+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
489+
private static uint GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bitMask128)
490+
{
491+
Debug.Assert(AdvSimd.Arm64.IsSupported);
492+
493+
Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
494+
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128);
495+
496+
// self-pairwise add until all flags have moved to the first two bytes of the vector
497+
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
498+
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
499+
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
500+
return extractedBits.AsUInt16().ToScalar();
501+
}
436502
}
437503
}

src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Numerics;
77
using System.Runtime.CompilerServices;
88
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.Arm;
910
using System.Runtime.Intrinsics.X86;
1011

1112
#if SYSTEM_PRIVATE_CORELIB
@@ -882,7 +883,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
882883
// is not enabled.
883884

884885
Unsafe.SkipInit(out Vector128<short> nonAsciiUtf16DataMask);
885-
if (Sse41.X64.IsSupported)
886+
if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian))
886887
{
887888
nonAsciiUtf16DataMask = Vector128.Create(unchecked((short)0xFF80)); // mask of non-ASCII bits in a UTF-16 char
888889
}
@@ -940,10 +941,8 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
940941
uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2;
941942
uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining);
942943

943-
if (Sse41.X64.IsSupported)
944+
if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian))
944945
{
945-
Debug.Assert(BitConverter.IsLittleEndian, "SSE41 requires little-endian.");
946-
947946
// Try reading and writing 8 elements per iteration.
948947
uint maxIters = minElementsRemaining / 8;
949948
ulong possibleNonAsciiQWord;
@@ -952,14 +951,30 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
952951
for (i = 0; (uint)i < maxIters; i++)
953952
{
954953
utf16Data = Unsafe.ReadUnaligned<Vector128<short>>(pInputBuffer);
955-
if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask))
954+
955+
if (AdvSimd.IsSupported)
956956
{
957-
goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
958-
}
957+
Vector128<short> isUtf16DataNonAscii = AdvSimd.CompareTest(utf16Data, nonAsciiUtf16DataMask);
958+
bool hasNonAsciiDataInVector = AdvSimd.Arm64.MinPairwise(isUtf16DataNonAscii, isUtf16DataNonAscii).AsUInt64().ToScalar() != 0;
959959

960-
// narrow and write
960+
if (hasNonAsciiDataInVector)
961+
{
962+
goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
963+
}
961964

962-
Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64());
965+
Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data);
966+
AdvSimd.Store(pOutputBuffer, lower);
967+
}
968+
else
969+
{
970+
if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask))
971+
{
972+
goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
973+
}
974+
975+
// narrow and write
976+
Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64());
977+
}
963978

964979
pInputBuffer += 8;
965980
pOutputBuffer += 8;
@@ -978,7 +993,16 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
978993
}
979994

980995
utf16Data = Vector128.CreateScalarUnsafe(possibleNonAsciiQWord).AsInt16();
981-
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
996+
997+
if (AdvSimd.IsSupported)
998+
{
999+
Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data);
1000+
AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0);
1001+
}
1002+
else
1003+
{
1004+
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
1005+
}
9821006

9831007
pInputBuffer += 4;
9841008
pOutputBuffer += 4;
@@ -990,7 +1014,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
9901014
LoopTerminatedDueToNonAsciiDataInVectorLocal:
9911015

9921016
outputBytesRemaining -= 8 * i;
993-
possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64());
1017+
1018+
if (Sse2.X64.IsSupported)
1019+
{
1020+
possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64());
1021+
}
1022+
else
1023+
{
1024+
possibleNonAsciiQWord = utf16Data.AsUInt64().ToScalar();
1025+
}
9941026

9951027
// Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector,
9961028
// then check whether it's all-ASCII. If so, narrow and write to the destination
@@ -1000,7 +1032,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
10001032

10011033
if (Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)) // all chars in first QWORD are ASCII
10021034
{
1003-
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
1035+
if (AdvSimd.IsSupported)
1036+
{
1037+
Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data);
1038+
AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0);
1039+
}
1040+
else
1041+
{
1042+
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
1043+
}
10041044
pInputBuffer += 4;
10051045
pOutputBuffer += 4;
10061046
outputBytesRemaining -= 4;

0 commit comments

Comments
 (0)