6
6
using System . Numerics ;
7
7
using System . Runtime . CompilerServices ;
8
8
using System . Runtime . Intrinsics ;
9
+ using System . Runtime . Intrinsics . Arm ;
9
10
using System . Runtime . Intrinsics . X86 ;
10
11
11
12
#if SYSTEM_PRIVATE_CORELIB
@@ -882,7 +883,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
882
883
// is not enabled.
883
884
884
885
Unsafe . SkipInit ( out Vector128 < short > nonAsciiUtf16DataMask ) ;
885
- if ( Sse41 . X64 . IsSupported )
886
+ if ( Sse41 . X64 . IsSupported || ( AdvSimd . Arm64 . IsSupported && BitConverter . IsLittleEndian ) )
886
887
{
887
888
nonAsciiUtf16DataMask = Vector128 . Create ( unchecked ( ( short ) 0xFF80 ) ) ; // mask of non-ASCII bits in a UTF-16 char
888
889
}
@@ -940,10 +941,8 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
940
941
uint inputCharsRemaining = ( uint ) ( pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer ) + 2 ;
941
942
uint minElementsRemaining = ( uint ) Math . Min ( inputCharsRemaining , outputBytesRemaining ) ;
942
943
943
- if ( Sse41 . X64 . IsSupported )
944
+ if ( Sse41 . X64 . IsSupported || ( AdvSimd . Arm64 . IsSupported && BitConverter . IsLittleEndian ) )
944
945
{
945
- Debug . Assert ( BitConverter . IsLittleEndian , "SSE41 requires little-endian." ) ;
946
-
947
946
// Try reading and writing 8 elements per iteration.
948
947
uint maxIters = minElementsRemaining / 8 ;
949
948
ulong possibleNonAsciiQWord ;
@@ -952,14 +951,30 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
952
951
for ( i = 0 ; ( uint ) i < maxIters ; i ++ )
953
952
{
954
953
utf16Data = Unsafe . ReadUnaligned < Vector128 < short > > ( pInputBuffer ) ;
955
- if ( ! Sse41 . TestZ ( utf16Data , nonAsciiUtf16DataMask ) )
954
+
955
+ if ( AdvSimd . IsSupported )
956
956
{
957
- goto LoopTerminatedDueToNonAsciiDataInVectorLocal ;
958
- }
957
+ Vector128 < short > isUtf16DataNonAscii = AdvSimd . CompareTest ( utf16Data , nonAsciiUtf16DataMask ) ;
958
+ bool hasNonAsciiDataInVector = AdvSimd . Arm64 . MinPairwise ( isUtf16DataNonAscii , isUtf16DataNonAscii ) . AsUInt64 ( ) . ToScalar ( ) != 0 ;
959
959
960
- // narrow and write
960
+ if ( hasNonAsciiDataInVector )
961
+ {
962
+ goto LoopTerminatedDueToNonAsciiDataInVectorLocal ;
963
+ }
961
964
962
- Sse2 . StoreScalar ( ( ulong * ) pOutputBuffer /* unaligned */ , Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt64 ( ) ) ;
965
+ Vector64 < byte > lower = AdvSimd . ExtractNarrowingSaturateUnsignedLower ( utf16Data ) ;
966
+ AdvSimd . Store ( pOutputBuffer , lower ) ;
967
+ }
968
+ else
969
+ {
970
+ if ( ! Sse41 . TestZ ( utf16Data , nonAsciiUtf16DataMask ) )
971
+ {
972
+ goto LoopTerminatedDueToNonAsciiDataInVectorLocal ;
973
+ }
974
+
975
+ // narrow and write
976
+ Sse2 . StoreScalar ( ( ulong * ) pOutputBuffer /* unaligned */ , Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt64 ( ) ) ;
977
+ }
963
978
964
979
pInputBuffer += 8 ;
965
980
pOutputBuffer += 8 ;
@@ -978,7 +993,16 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
978
993
}
979
994
980
995
utf16Data = Vector128 . CreateScalarUnsafe ( possibleNonAsciiQWord ) . AsInt16 ( ) ;
981
- Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
996
+
997
+ if ( AdvSimd . IsSupported )
998
+ {
999
+ Vector64 < byte > lower = AdvSimd . ExtractNarrowingSaturateUnsignedLower ( utf16Data ) ;
1000
+ AdvSimd . StoreSelectedScalar ( ( uint * ) pOutputBuffer , lower . AsUInt32 ( ) , 0 ) ;
1001
+ }
1002
+ else
1003
+ {
1004
+ Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
1005
+ }
982
1006
983
1007
pInputBuffer += 4 ;
984
1008
pOutputBuffer += 4 ;
@@ -990,7 +1014,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
990
1014
LoopTerminatedDueToNonAsciiDataInVectorLocal :
991
1015
992
1016
outputBytesRemaining -= 8 * i ;
993
- possibleNonAsciiQWord = Sse2 . X64 . ConvertToUInt64 ( utf16Data . AsUInt64 ( ) ) ;
1017
+
1018
+ if ( Sse2 . X64 . IsSupported )
1019
+ {
1020
+ possibleNonAsciiQWord = Sse2 . X64 . ConvertToUInt64 ( utf16Data . AsUInt64 ( ) ) ;
1021
+ }
1022
+ else
1023
+ {
1024
+ possibleNonAsciiQWord = utf16Data . AsUInt64 ( ) . ToScalar ( ) ;
1025
+ }
994
1026
995
1027
// Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector,
996
1028
// then check whether it's all-ASCII. If so, narrow and write to the destination
@@ -1000,7 +1032,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
1000
1032
1001
1033
if ( Utf16Utility . AllCharsInUInt64AreAscii ( possibleNonAsciiQWord ) ) // all chars in first QWORD are ASCII
1002
1034
{
1003
- Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
1035
+ if ( AdvSimd . IsSupported )
1036
+ {
1037
+ Vector64 < byte > lower = AdvSimd . ExtractNarrowingSaturateUnsignedLower ( utf16Data ) ;
1038
+ AdvSimd . StoreSelectedScalar ( ( uint * ) pOutputBuffer , lower . AsUInt32 ( ) , 0 ) ;
1039
+ }
1040
+ else
1041
+ {
1042
+ Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
1043
+ }
1004
1044
pInputBuffer += 4 ;
1005
1045
pOutputBuffer += 4 ;
1006
1046
outputBytesRemaining -= 4 ;
0 commit comments