Skip to content

Commit ae67560

Browse files
committed
Define the new AVX512 instructions to be exposed
1 parent da1d9da commit ae67560

File tree

14 files changed

+340
-130
lines changed

14 files changed

+340
-130
lines changed

src/coreclr/jit/codegeninterface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ class CodeGenInterface
179179
public:
180180
static bool instIsFP(instruction ins);
181181
#if defined(TARGET_XARCH)
182-
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
182+
bool instIsEmbeddedBroadcastCompatible(instruction ins);
183183
static bool instIsEmbeddedMaskingCompatible(instruction ins);
184184

185185
static unsigned instInputSize(instruction ins);

src/coreclr/jit/codegenxarch.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9552,10 +9552,10 @@ void CodeGen::genAmd64EmitterUnitTestsAvx10v2()
95529552
theEmitter->emitIns_R_R(INS_vucomxss, EA_16BYTE, REG_XMM0, REG_XMM1);
95539553

95549554
// VMOVD
9555-
theEmitter->emitIns_R_R(INS_vmovd, EA_16BYTE, REG_XMM0, REG_XMM1);
9555+
theEmitter->emitIns_R_R(INS_vmovd_simd, EA_16BYTE, REG_XMM0, REG_XMM1);
95569556

95579557
// VMOVW
9558-
theEmitter->emitIns_R_R(INS_vmovw, EA_16BYTE, REG_XMM0, REG_XMM1);
9558+
theEmitter->emitIns_R_R(INS_vmovw_simd, EA_16BYTE, REG_XMM0, REG_XMM1);
95599559
}
95609560

95619561
/*****************************************************************************

src/coreclr/jit/emit.h

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -663,8 +663,8 @@ class emitter
663663
private:
664664
// The assembly instruction
665665
#if defined(TARGET_XARCH)
666-
static_assert_no_msg(INS_count <= 1024);
667-
instruction _idIns : 10;
666+
static_assert_no_msg(INS_count <= 2048);
667+
instruction _idIns : 11;
668668
#define MAX_ENCODED_SIZE 15
669669
#elif defined(TARGET_ARM64)
670670
#define INSTR_ENCODED_SIZE 4
@@ -752,8 +752,8 @@ class emitter
752752

753753
////////////////////////////////////////////////////////////////////////
754754
// Space taken up to here:
755-
// x86: 17 bits
756-
// amd64: 17 bits
755+
// x86: 18 bits
756+
// amd64: 18 bits
757757
// arm: 16 bits
758758
// arm64: 21 bits
759759
// loongarch64: 14 bits
@@ -786,21 +786,22 @@ class emitter
786786

787787
// The idReg1 and idReg2 fields hold the first and second register
788788
// operand(s), whenever these are present. Note that currently the
789-
// size of these fields is 6 bits on all targets, and care needs to
790-
// be taken to make sure all of these fields stay reasonably packed.
789+
// size of these fields is 6 bits on most targets, but 7 on others,
790+
// and care needs to be taken to make sure all of these fields stay
791+
// reasonably packed.
791792

792793
// Note that we use the _idReg1 and _idReg2 fields to hold
793794
// the live gcrefReg mask for the call instructions on x86/x64
794795
//
795-
#if !defined(TARGET_AMD64)
796+
#if !defined(TARGET_XARCH)
796797
regNumber _idReg1 : REGNUM_BITS; // register num
797798
regNumber _idReg2 : REGNUM_BITS;
798799
#endif
799800

800801
////////////////////////////////////////////////////////////////////////
801802
// Space taken up to here:
802-
// x86: 38 bits
803-
// amd64: 26 bits
803+
// x86: 27 bits
804+
// amd64: 27 bits
804805
// arm: 32 bits
805806
// arm64: 46 bits
806807
// loongarch64: 28 bits
@@ -818,7 +819,7 @@ class emitter
818819
unsigned _idCustom1 : 1;
819820
unsigned _idCustom2 : 1;
820821
unsigned _idCustom3 : 1;
821-
#if defined(TARGET_AMD64)
822+
#if defined(TARGET_XARCH)
822823
regNumber _idReg1 : REGNUM_BITS; // register num
823824
regNumber _idReg2 : REGNUM_BITS;
824825
#endif
@@ -888,8 +889,8 @@ class emitter
888889

889890
////////////////////////////////////////////////////////////////////////
890891
// Space taken up to here:
891-
// x86: 49 bits
892-
// amd64: 51 bits
892+
// x86: 50 bits
893+
// amd64: 52 bits
893894
// arm: 48 bits
894895
// arm64: 55 bits
895896
// loongarch64: 46 bits
@@ -906,8 +907,10 @@ class emitter
906907
#define ID_EXTRA_BITFIELD_BITS (23)
907908
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
908909
#define ID_EXTRA_BITFIELD_BITS (14)
909-
#elif defined(TARGET_XARCH)
910-
#define ID_EXTRA_BITFIELD_BITS (19)
910+
#elif defined(TARGET_X86)
911+
#define ID_EXTRA_BITFIELD_BITS (18)
912+
#elif defined(TARGET_AMD64)
913+
#define ID_EXTRA_BITFIELD_BITS (20)
911914
#else
912915
#error Unsupported or unset target architecture
913916
#endif
@@ -941,8 +944,8 @@ class emitter
941944

942945
////////////////////////////////////////////////////////////////////////
943946
// Space taken up to here (with/without prev offset, assuming host==target):
944-
// x86: 55/51 bits
945-
// amd64: 58/53 bits
947+
// x86: 56/52 bits
948+
// amd64: 59/54 bits
946949
// arm: 54/50 bits
947950
// arm64: 62/57 bits
948951
// loongarch64: 53/48 bits
@@ -953,12 +956,11 @@ class emitter
953956
/* Use whatever bits are left over for small constants */
954957

955958
#define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS)
956-
C_ASSERT(ID_BIT_SMALL_CNS > 0);
957959

958960
////////////////////////////////////////////////////////////////////////
959961
// Small constant size (with/without prev offset, assuming host==target):
960-
// x86: 10/14 bits
961-
// amd64: 9/14 bits
962+
// x86: 8/12 bits
963+
// amd64: 5/10 bits
962964
// arm: 10/14 bits
963965
// arm64: 2/7 bits
964966
// loongarch64: 11/16 bits

src/coreclr/jit/emitxarch.cpp

Lines changed: 63 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -90,31 +90,7 @@ bool emitter::IsApxOnlyInstruction(instruction ins)
9090
return (ins >= INS_FIRST_APX_INSTRUCTION) && (ins <= INS_LAST_APX_INSTRUCTION);
9191
}
9292

93-
bool emitter::IsFMAInstruction(instruction ins)
94-
{
95-
return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
96-
}
97-
98-
bool emitter::IsAVXVNNIInstruction(instruction ins)
99-
{
100-
return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION);
101-
}
102-
103-
bool emitter::IsBMIInstruction(instruction ins)
104-
{
105-
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
106-
}
107-
108-
//------------------------------------------------------------------------
109-
// IsPermuteVar2xInstruction: Is this an Avx512 permutex2var instruction?
110-
//
111-
// Arguments:
112-
// ins - The instruction to check.
113-
//
114-
// Returns:
115-
// `true` if it is a permutex2var instruction.
116-
//
117-
bool emitter::IsPermuteVar2xInstruction(instruction ins)
93+
bool emitter::Is3OpRmwInstruction(instruction ins)
11894
{
11995
switch (ins)
12096
{
@@ -136,11 +112,18 @@ bool emitter::IsPermuteVar2xInstruction(instruction ins)
136112

137113
default:
138114
{
139-
return false;
115+
return ((ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION)) ||
116+
((ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION)) ||
117+
((ins >= INS_FIRST_AVXIFMA_INSTRUCTION) && (ins <= INS_LAST_AVXIFMA_INSTRUCTION));
140118
}
141119
}
142120
}
143121

122+
bool emitter::IsBMIInstruction(instruction ins)
123+
{
124+
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
125+
}
126+
144127
//------------------------------------------------------------------------
145128
// IsKMOVInstruction: Is this an Avx512 KMOV instruction?
146129
//
@@ -255,18 +238,6 @@ regNumber emitter::getSseShiftRegNumber(instruction ins)
255238
}
256239
}
257240

258-
bool emitter::HasVexEncoding(instruction ins)
259-
{
260-
insFlags flags = CodeGenInterface::instInfo[ins];
261-
return (flags & Encoding_VEX) != 0;
262-
}
263-
264-
bool emitter::HasEvexEncoding(instruction ins)
265-
{
266-
insFlags flags = CodeGenInterface::instInfo[ins];
267-
return (flags & Encoding_EVEX) != 0;
268-
}
269-
270241
bool emitter::HasRex2Encoding(instruction ins)
271242
{
272243
insFlags flags = CodeGenInterface::instInfo[ins];
@@ -291,7 +262,29 @@ bool emitter::IsVexEncodableInstruction(instruction ins) const
291262
{
292263
return false;
293264
}
294-
return HasVexEncoding(ins);
265+
266+
switch (ins)
267+
{
268+
case INS_vpdpbusd:
269+
case INS_vpdpwssd:
270+
case INS_vpdpbusds:
271+
case INS_vpdpwssds:
272+
{
273+
return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNI);
274+
}
275+
276+
case INS_vpmadd52huq:
277+
case INS_vpmadd52luq:
278+
{
279+
return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVXIFMA);
280+
}
281+
282+
default:
283+
{
284+
insFlags flags = CodeGenInterface::instInfo[ins];
285+
return (flags & Encoding_VEX) != 0;
286+
}
287+
}
295288
}
296289

297290
//------------------------------------------------------------------------
@@ -312,14 +305,37 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const
312305

313306
switch (ins)
314307
{
308+
case INS_aesdec:
309+
case INS_aesdeclast:
310+
case INS_aesenc:
311+
case INS_aesenclast:
312+
{
313+
return emitComp->compOpportunisticallyDependsOn(InstructionSet_AES_V256);
314+
}
315+
315316
case INS_pclmulqdq:
316317
{
317318
return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
318319
}
319320

321+
case INS_vpdpbusd:
322+
case INS_vpdpwssd:
323+
case INS_vpdpbusds:
324+
case INS_vpdpwssds:
325+
{
326+
return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX512VNNI);
327+
}
328+
329+
case INS_vpmadd52huq:
330+
case INS_vpmadd52luq:
331+
{
332+
return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX512IFMA);
333+
}
334+
320335
default:
321336
{
322-
return HasEvexEncoding(ins);
337+
insFlags flags = CodeGenInterface::instInfo[ins];
338+
return (flags & Encoding_EVEX) != 0;
323339
}
324340
}
325341
}
@@ -2053,7 +2069,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
20532069

20542070
if (IsApxExtendedEvexInstruction(ins))
20552071
{
2056-
if (!HasEvexEncoding(ins))
2072+
if (!IsEvexEncodableInstruction(ins))
20572073
{
20582074
// Legacy-promoted insutrcions are not labeled with Encoding_EVEX.
20592075
code |= MAP4_IN_BYTE_EVEX_PREFIX;
@@ -10008,7 +10024,7 @@ void emitter::emitIns_SIMD_R_R_R_A(instruction ins,
1000810024
GenTreeIndir* indir,
1000910025
insOpts instOptions)
1001010026
{
10011-
assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins));
10027+
assert(Is3OpRmwInstruction(ins));
1001210028
assert(UseSimdEncoding());
1001310029

1001410030
// Ensure we aren't overwriting op2
@@ -10041,7 +10057,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
1004110057
int offs,
1004210058
insOpts instOptions)
1004310059
{
10044-
assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins));
10060+
assert(Is3OpRmwInstruction(ins));
1004510061
assert(UseSimdEncoding());
1004610062

1004710063
// Ensure we aren't overwriting op2
@@ -10072,7 +10088,7 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins,
1007210088
regNumber op3Reg,
1007310089
insOpts instOptions)
1007410090
{
10075-
if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins))
10091+
if (Is3OpRmwInstruction(ins))
1007610092
{
1007710093
assert(UseSimdEncoding());
1007810094

@@ -10159,7 +10175,7 @@ void emitter::emitIns_SIMD_R_R_R_S(instruction ins,
1015910175
int offs,
1016010176
insOpts instOptions)
1016110177
{
10162-
assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins));
10178+
assert(Is3OpRmwInstruction(ins));
1016310179
assert(UseSimdEncoding());
1016410180

1016510181
// Ensure we aren't overwriting op2
@@ -20883,8 +20899,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
2088320899
case INS_vmovdqu8:
2088420900
case INS_vmovdqu16:
2088520901
case INS_vmovdqu64:
20886-
case INS_vmovd:
20887-
case INS_vmovw:
20902+
case INS_vmovd_simd:
20903+
case INS_vmovw_simd:
2088820904
case INS_movaps:
2088920905
case INS_movups:
2089020906
case INS_movapd:

src/coreclr/jit/emitxarch.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,19 +120,15 @@ static bool IsSSEInstruction(instruction ins);
120120
static bool IsSSEOrAVXInstruction(instruction ins);
121121
static bool IsAVXOnlyInstruction(instruction ins);
122122
static bool IsAvx512OnlyInstruction(instruction ins);
123-
static bool IsFMAInstruction(instruction ins);
124-
static bool IsPermuteVar2xInstruction(instruction ins);
125123
static bool IsKMOVInstruction(instruction ins);
126-
static bool IsAVXVNNIInstruction(instruction ins);
124+
static bool Is3OpRmwInstruction(instruction ins);
127125
static bool IsBMIInstruction(instruction ins);
128126
static bool IsKInstruction(instruction ins);
129127
static bool IsKInstructionWithLBit(instruction ins);
130128
static bool IsApxOnlyInstruction(instruction ins);
131129

132130
static regNumber getBmiRegNumber(instruction ins);
133131
static regNumber getSseShiftRegNumber(instruction ins);
134-
static bool HasVexEncoding(instruction ins);
135-
static bool HasEvexEncoding(instruction ins);
136132
static bool HasRex2Encoding(instruction ins);
137133
static bool HasApxNdd(instruction ins);
138134
static bool HasApxNf(instruction ins);

src/coreclr/jit/gentree.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20591,15 +20591,15 @@ bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const
2059120591
// Return Value:
2059220592
// true if the intrinsic node lowering instruction has a EVEX embedded broadcast support
2059320593
//
20594-
bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic() const
20594+
bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const
2059520595
{
2059620596
if (OperIsHWIntrinsic())
2059720597
{
2059820598
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();
2059920599
var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType();
2060020600
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, nullptr);
2060120601

20602-
if (CodeGenInterface::instIsEmbeddedBroadcastCompatible(ins))
20602+
if (comp->codeGen->instIsEmbeddedBroadcastCompatible(ins))
2060320603
{
2060420604
insTupleType tupleType = emitter::insTupleTypeInfo(ins);
2060520605

src/coreclr/jit/gentree.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,7 +1501,7 @@ struct GenTree
15011501
bool isRMWHWIntrinsic(Compiler* comp);
15021502
#if defined(TARGET_XARCH)
15031503
bool isEvexCompatibleHWIntrinsic(Compiler* comp) const;
1504-
bool isEmbeddedBroadcastCompatibleHWIntrinsic() const;
1504+
bool isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const;
15051505
#endif // TARGET_XARCH
15061506
bool isEmbeddedMaskingCompatibleHWIntrinsic() const;
15071507
#else
@@ -1526,7 +1526,7 @@ struct GenTree
15261526
return false;
15271527
}
15281528

1529-
bool isEmbeddedBroadcastCompatibleHWIntrinsic() const
1529+
bool isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const
15301530
{
15311531
return false;
15321532
}

src/coreclr/jit/hwintrinsiclistxarch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,9 +1043,9 @@ HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncationSaturatio
10431043
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
10441044
HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg)
10451045
HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg)
1046-
HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw, INS_vmovw, INS_vmovd, INS_vmovd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
1046+
HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
10471047
HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
1048-
HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw, INS_vmovw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
1048+
HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
10491049
#define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar
10501050

10511051
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************

0 commit comments

Comments
 (0)