@@ -1902,8 +1902,20 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
1902
1902
{
1903
1903
GenTree* lArg = call->gtArgs .GetUserArgByIndex (0 )->GetNode ();
1904
1904
GenTree* rArg = call->gtArgs .GetUserArgByIndex (1 )->GetNode ();
1905
- // TODO: Add SIMD path for [16..128] via GT_HWINTRINSIC nodes
1906
- if (cnsSize <= 16 )
1905
+
1906
+ ssize_t MaxUnrollSize = 16 ;
1907
+ #ifdef FEATURE_SIMD
1908
+ MaxUnrollSize = 32 ;
1909
+ #ifdef TARGET_XARCH
1910
+ if (comp->compOpportunisticallyDependsOn (InstructionSet_Vector256))
1911
+ {
1912
+ MaxUnrollSize = 64 ;
1913
+ }
1914
+ // TODO-XARCH-AVX512: Consider enabling this for AVX512
1915
+ #endif
1916
+ #endif
1917
+
1918
+ if (cnsSize <= MaxUnrollSize)
1907
1919
{
1908
1920
unsigned loadWidth = 1 << BitOperations::Log2 ((unsigned )cnsSize);
1909
1921
var_types loadType;
@@ -1919,11 +1931,25 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
1919
1931
{
1920
1932
loadType = TYP_INT;
1921
1933
}
1922
- else if ((loadWidth == 8 ) || (loadWidth == 16 ))
1934
+ else if ((loadWidth == 8 ) || (MaxUnrollSize == 16 ))
1923
1935
{
1924
1936
loadWidth = 8 ;
1925
1937
loadType = TYP_LONG;
1926
1938
}
1939
+ #ifdef FEATURE_SIMD
1940
+ else if ((loadWidth == 16 ) || (MaxUnrollSize == 32 ))
1941
+ {
1942
+ loadWidth = 16 ;
1943
+ loadType = TYP_SIMD16;
1944
+ }
1945
+ #ifdef TARGET_XARCH
1946
+ else if ((loadWidth == 32 ) || (MaxUnrollSize == 64 ))
1947
+ {
1948
+ loadWidth = 32 ;
1949
+ loadType = TYP_SIMD32;
1950
+ }
1951
+ #endif // TARGET_XARCH
1952
+ #endif // FEATURE_SIMD
1927
1953
else
1928
1954
{
1929
1955
unreached ();
@@ -1932,8 +1958,26 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
1932
1958
1933
1959
GenTree* result = nullptr ;
1934
1960
1961
+ auto newBinaryOp = [](Compiler* comp, genTreeOps oper, var_types type, GenTree* op1,
1962
+ GenTree* op2) -> GenTree* {
1963
+ #ifdef FEATURE_SIMD
1964
+ if (varTypeIsSIMD (op1))
1965
+ {
1966
+ if (GenTree::OperIsCmpCompare (oper))
1967
+ {
1968
+ assert (type == TYP_INT);
1969
+ return comp->gtNewSimdCmpOpAllNode (oper, TYP_BOOL, op1, op2, CORINFO_TYPE_NATIVEUINT,
1970
+ genTypeSize (op1));
1971
+ }
1972
+ return comp->gtNewSimdBinOpNode (oper, op1->TypeGet (), op1, op2, CORINFO_TYPE_NATIVEUINT,
1973
+ genTypeSize (op1));
1974
+ }
1975
+ #endif
1976
+ return comp->gtNewOperNode (oper, type, op1, op2);
1977
+ };
1978
+
1935
1979
// loadWidth == cnsSize means a single load is enough for both args
1936
- if (( loadWidth == (unsigned )cnsSize) && (loadWidth <= 8 ) )
1980
+ if (loadWidth == (unsigned )cnsSize)
1937
1981
{
1938
1982
// We're going to emit something like the following:
1939
1983
//
@@ -1943,7 +1987,7 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
1943
1987
//
1944
1988
GenTree* lIndir = comp->gtNewIndir (loadType, lArg);
1945
1989
GenTree* rIndir = comp->gtNewIndir (loadType, rArg);
1946
- result = comp-> gtNewOperNode ( GT_EQ, TYP_INT, lIndir, rIndir);
1990
+ result = newBinaryOp (comp, GT_EQ, TYP_INT, lIndir, rIndir);
1947
1991
1948
1992
BlockRange ().InsertAfter (lArg, lIndir);
1949
1993
BlockRange ().InsertAfter (rArg, rIndir);
@@ -1990,17 +2034,17 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
1990
2034
//
1991
2035
GenTree* l1Indir = comp->gtNewIndir (loadType, lArgUse.Def ());
1992
2036
GenTree* r1Indir = comp->gtNewIndir (loadType, rArgUse.Def ());
1993
- GenTree* lXor = comp-> gtNewOperNode ( GT_XOR, actualLoadType, l1Indir, r1Indir);
2037
+ GenTree* lXor = newBinaryOp (comp, GT_XOR, actualLoadType, l1Indir, r1Indir);
1994
2038
GenTree* l2Offs = comp->gtNewIconNode (cnsSize - loadWidth, TYP_I_IMPL);
1995
- GenTree* l2AddOffs = comp-> gtNewOperNode ( GT_ADD, lArg->TypeGet (), lArgClone, l2Offs);
2039
+ GenTree* l2AddOffs = newBinaryOp (comp, GT_ADD, lArg->TypeGet (), lArgClone, l2Offs);
1996
2040
GenTree* l2Indir = comp->gtNewIndir (loadType, l2AddOffs);
1997
2041
GenTree* r2Offs = comp->gtCloneExpr (l2Offs); // offset is the same
1998
- GenTree* r2AddOffs = comp-> gtNewOperNode ( GT_ADD, rArg->TypeGet (), rArgClone, r2Offs);
2042
+ GenTree* r2AddOffs = newBinaryOp (comp, GT_ADD, rArg->TypeGet (), rArgClone, r2Offs);
1999
2043
GenTree* r2Indir = comp->gtNewIndir (loadType, r2AddOffs);
2000
- GenTree* rXor = comp-> gtNewOperNode ( GT_XOR, actualLoadType, l2Indir, r2Indir);
2001
- GenTree* resultOr = comp-> gtNewOperNode ( GT_OR, actualLoadType, lXor, rXor);
2002
- GenTree* zeroCns = comp->gtNewIconNode ( 0 , actualLoadType);
2003
- result = comp-> gtNewOperNode ( GT_EQ, TYP_INT, resultOr, zeroCns);
2044
+ GenTree* rXor = newBinaryOp (comp, GT_XOR, actualLoadType, l2Indir, r2Indir);
2045
+ GenTree* resultOr = newBinaryOp (comp, GT_OR, actualLoadType, lXor, rXor);
2046
+ GenTree* zeroCns = comp->gtNewZeroConNode ( actualLoadType);
2047
+ result = newBinaryOp (comp, GT_EQ, TYP_INT, resultOr, zeroCns);
2004
2048
2005
2049
BlockRange ().InsertAfter (rArgClone, l1Indir, r1Indir, l2Offs, l2AddOffs);
2006
2050
BlockRange ().InsertAfter (l2AddOffs, l2Indir, r2Offs, r2AddOffs, r2Indir);
0 commit comments