Skip to content

Commit 035b16f

Browse files
anthonycaninoBruceForstall
authored andcommitted
[JIT] Enable conditional chaining for Intel APX (dotnet#111072)
* Enable conditional compare chaining for AMD64. * Reduce duplication from `optSwitchDetectLikely`. * Update src/coreclr/jit/lsrabuild.cpp Co-authored-by: Bruce Forstall <[email protected]> * Update src/coreclr/jit/lowerxarch.cpp Co-authored-by: Bruce Forstall <[email protected]> * Update src/coreclr/jit/lowerxarch.cpp Co-authored-by: Bruce Forstall <[email protected]> * Widen the potential candidates for ccmp folding. Also lifts GenConditionDesc into CodeGenInterface to better check which flag lowerings will produce multiple instructions. * Refactor some common code into lower.cpp. Some code will conflict with latest changes. I've squashed so we can discuss how to merge in properly. * Refactored common code out. * Review edits. * Fix build errors. * Formatting. --------- Co-authored-by: Bruce Forstall <[email protected]>
1 parent fed5b12 commit 035b16f

19 files changed

+463
-189
lines changed

src/coreclr/jit/codegen.h

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ class CodeGen final : public CodeGenInterface
969969
void genIntToFloatCast(GenTree* treeNode);
970970
void genCkfinite(GenTree* treeNode);
971971
void genCodeForCompare(GenTreeOp* tree);
972-
#ifdef TARGET_ARM64
972+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
973973
void genCodeForCCMP(GenTreeCCMP* ccmp);
974974
#endif
975975
void genCodeForSelect(GenTreeOp* select);
@@ -1708,53 +1708,13 @@ class CodeGen final : public CodeGenInterface
17081708
static insOpts ShiftOpToInsOpts(genTreeOps op);
17091709
#elif defined(TARGET_XARCH)
17101710
static instruction JumpKindToCmov(emitJumpKind condition);
1711+
static instruction JumpKindToCcmp(emitJumpKind condition);
1712+
static insOpts OptsFromCFlags(insCflags flags);
17111713
#endif
1712-
1713-
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1714-
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
1715-
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
1716-
// certain floating point conditions.
1717-
// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF,
1718-
// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0
1719-
// and then jump if ZF is 1:
1720-
// JP fallThroughBlock
1721-
// JE jumpDestBlock
1722-
// fallThroughBlock:
1723-
// ...
1724-
// jumpDestBlock:
1725-
//
1726-
// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so
1727-
// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like
1728-
// pattern is used to encode the above:
1729-
// { EJ_jnp, GT_AND, EJ_je }
1730-
// { EJ_jp, GT_OR, EJ_jne }
1731-
//
1732-
// For more details check inst_JCC and inst_SETCC functions.
1733-
//
1734-
struct GenConditionDesc
1735-
{
1736-
emitJumpKind jumpKind1;
1737-
genTreeOps oper;
1738-
emitJumpKind jumpKind2;
1739-
char padTo4Bytes;
1740-
1741-
static const GenConditionDesc& Get(GenCondition condition)
1742-
{
1743-
assert(condition.GetCode() < ArrLen(map));
1744-
const GenConditionDesc& desc = map[condition.GetCode()];
1745-
assert(desc.jumpKind1 != EJ_NONE);
1746-
assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR));
1747-
assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE));
1748-
return desc;
1749-
}
1750-
1751-
private:
1752-
static const GenConditionDesc map[32];
1753-
};
1754-
17551714
void inst_JCC(GenCondition condition, BasicBlock* target);
17561715
void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg);
17571716

1717+
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
17581718
void genCodeForJcc(GenTreeCC* tree);
17591719
void genCodeForSetcc(GenTreeCC* setcc);
17601720
void genCodeForJTrue(GenTreeOp* jtrue);

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4165,7 +4165,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
41654165
}
41664166

41674167
// clang-format off
4168-
const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
4168+
const GenConditionDesc GenConditionDesc::map[32]
41694169
{
41704170
{ }, // NONE
41714171
{ }, // 1

src/coreclr/jit/codegeninterface.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,4 +828,47 @@ class CodeGenInterface
828828
#endif
829829
};
830830

831+
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
832+
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
833+
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
834+
// certain floating point conditions.
835+
// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF,
836+
// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0
837+
// and then jump if ZF is 1:
838+
// JP fallThroughBlock
839+
// JE jumpDestBlock
840+
// fallThroughBlock:
841+
// ...
842+
// jumpDestBlock:
843+
//
844+
// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so
845+
// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like
846+
// pattern is used to encode the above:
847+
// { EJ_jnp, GT_AND, EJ_je }
848+
// { EJ_jp, GT_OR, EJ_jne }
849+
//
850+
// For more details check inst_JCC and inst_SETCC functions.
851+
//
852+
struct GenConditionDesc
853+
{
854+
emitJumpKind jumpKind1;
855+
genTreeOps oper;
856+
emitJumpKind jumpKind2;
857+
char padTo4Bytes;
858+
859+
static const GenConditionDesc& Get(GenCondition condition)
860+
{
861+
assert(condition.GetCode() < ArrLen(map));
862+
const GenConditionDesc& desc = map[condition.GetCode()];
863+
assert(desc.jumpKind1 != EJ_NONE);
864+
assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR));
865+
assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE));
866+
return desc;
867+
}
868+
869+
private:
870+
static const GenConditionDesc map[32];
871+
};
872+
#endif // !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
873+
831874
#endif // _CODEGEN_INTERFACE_H_

src/coreclr/jit/codegenxarch.cpp

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
15731573
return s_table[condition];
15741574
}
15751575

1576+
//------------------------------------------------------------------------
1577+
// JumpKindToCcmp:
1578+
// Convert an emitJumpKind to the corresponding ccmp instruction.
1579+
//
1580+
// Arguments:
1581+
// condition - the condition
1582+
//
1583+
// Returns:
1584+
// A ccmp instruction.
1585+
//
1586+
instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
1587+
{
1588+
static constexpr instruction s_table[EJ_COUNT] = {
1589+
INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe,
1590+
INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg,
1591+
};
1592+
1593+
static_assert_no_msg(s_table[EJ_NONE] == INS_none);
1594+
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
1595+
static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo);
1596+
static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno);
1597+
static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb);
1598+
static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae);
1599+
static_assert_no_msg(s_table[EJ_je] == INS_ccmpe);
1600+
static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne);
1601+
static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe);
1602+
static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa);
1603+
static_assert_no_msg(s_table[EJ_js] == INS_ccmps);
1604+
static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns);
1605+
static_assert_no_msg(s_table[EJ_jp] == INS_none);
1606+
static_assert_no_msg(s_table[EJ_jnp] == INS_none);
1607+
static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl);
1608+
static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge);
1609+
static_assert_no_msg(s_table[EJ_jle] == INS_ccmple);
1610+
static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg);
1611+
1612+
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
1613+
return s_table[condition];
1614+
}
1615+
15761616
//------------------------------------------------------------------------
15771617
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
15781618
//
@@ -1669,7 +1709,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)
16691709
}
16701710

16711711
// clang-format off
1672-
const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
1712+
const GenConditionDesc GenConditionDesc::map[32]
16731713
{
16741714
{ }, // NONE
16751715
{ }, // 1
@@ -2260,6 +2300,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
22602300
// Do nothing; these nodes are simply markers for debug info.
22612301
break;
22622302

2303+
#if defined(TARGET_AMD64)
2304+
case GT_CCMP:
2305+
genCodeForCCMP(treeNode->AsCCMP());
2306+
break;
2307+
#endif
2308+
22632309
default:
22642310
{
22652311
#ifdef DEBUG
@@ -8908,6 +8954,84 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
89088954
regSet.verifyRegistersUsed(killMask);
89098955
}
89108956

8957+
//-----------------------------------------------------------------------------------------
8958+
// OptsFromCFlags - Convert condition flags into approxpriate insOpts.
8959+
//
8960+
// Arguments:
8961+
// flags - The condition flags to be converted.
8962+
//
8963+
// Return Value:
8964+
// An insOpts value encoding the condition flags.
8965+
//
8966+
// Notes:
8967+
// This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate
8968+
// instruction options used for setting the default flag values in extneded EVEX
8969+
// encoding conditional instructions.
8970+
//
8971+
insOpts CodeGen::OptsFromCFlags(insCflags flags)
8972+
{
8973+
unsigned opts = 0x0;
8974+
if (flags & INS_FLAGS_CF)
8975+
opts |= INS_OPTS_EVEX_dfv_cf;
8976+
if (flags & INS_FLAGS_ZF)
8977+
opts |= INS_OPTS_EVEX_dfv_zf;
8978+
if (flags & INS_FLAGS_SF)
8979+
opts |= INS_OPTS_EVEX_dfv_sf;
8980+
if (flags & INS_FLAGS_OF)
8981+
opts |= INS_OPTS_EVEX_dfv_of;
8982+
return (insOpts)opts;
8983+
}
8984+
8985+
#ifdef TARGET_AMD64
8986+
8987+
//-----------------------------------------------------------------------------------------
8988+
// genCodeForCCMP - Generate code for a conditional compare (CCMP) node.
8989+
//
8990+
// Arguments:
8991+
// ccmp - The GenTreeCCMP node representing the conditional compare.
8992+
//
8993+
// Return Value:
8994+
// None.
8995+
//
8996+
// Notes:
8997+
// This function generates code for a conditional compare operation. On X86,
8998+
// comparisons using the extended EVEX encoding and ccmp instruction.
8999+
void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
9000+
{
9001+
emitter* emit = GetEmitter();
9002+
assert(emit->UsePromotedEVEXEncoding());
9003+
9004+
genConsumeOperands(ccmp);
9005+
GenTree* op1 = ccmp->gtGetOp1();
9006+
GenTree* op2 = ccmp->gtGetOp2();
9007+
var_types op1Type = genActualType(op1->TypeGet());
9008+
var_types op2Type = genActualType(op2->TypeGet());
9009+
emitAttr cmpSize = emitActualTypeSize(op1Type);
9010+
regNumber srcReg1 = op1->GetRegNum();
9011+
9012+
// No float support or swapping op1 and op2 to generate cmp reg, imm.
9013+
assert(!varTypeIsFloating(op2Type));
9014+
assert(!op1->isContainedIntOrIImmed());
9015+
9016+
// For the ccmp flags, invert the condition of the compare.
9017+
// For the condition, use the previous compare.
9018+
const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition);
9019+
instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1);
9020+
insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal);
9021+
9022+
if (op2->isContainedIntOrIImmed())
9023+
{
9024+
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
9025+
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
9026+
}
9027+
else
9028+
{
9029+
regNumber srcReg2 = op2->GetRegNum();
9030+
emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts);
9031+
}
9032+
}
9033+
#endif // TARGET_AMD64
9034+
89119035
#if defined(DEBUG) && defined(TARGET_AMD64)
89129036

89139037
/*****************************************************************************

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7003,7 +7003,7 @@ class Compiler
70037003
PhaseStatus optOptimizeBools();
70047004
PhaseStatus optSwitchRecognition();
70057005
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
7006-
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
7006+
bool optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion = false);
70077007

70087008
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
70097009
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication

src/coreclr/jit/emitxarch.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19904,6 +19904,14 @@ emitter::insFormat emitter::ExtractMemoryFormat(insFormat insFmt) const
1990419904
return IF_NONE;
1990519905
}
1990619906

19907+
#ifdef TARGET_AMD64
19908+
// true if this 'imm' can be encoded as a input operand to a ccmp instruction
19909+
/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm)
19910+
{
19911+
return (((INT32)imm) == imm);
19912+
}
19913+
#endif
19914+
1990719915
#if defined(DEBUG) || defined(LATE_DISASM)
1990819916

1990919917
//----------------------------------------------------------------------------------------

src/coreclr/jit/emitxarch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,4 +1321,9 @@ inline bool HasExtendedGPReg(const instrDesc* id) const;
13211321

13221322
inline bool HasMaskReg(const instrDesc* id) const;
13231323

1324+
#ifdef TARGET_AMD64
1325+
// true if this 'imm' can be encoded as a input operand to a ccmp instruction
1326+
static bool emitIns_valid_imm_for_ccmp(INT64 imm);
1327+
#endif // TARGET_AMD64
1328+
13241329
#endif // TARGET_XARCH

src/coreclr/jit/gentree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9410,7 +9410,7 @@ enum insCC : unsigned
94109410
};
94119411
#endif
94129412

9413-
#if defined(TARGET_ARM64)
9413+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
94149414
struct GenTreeCCMP final : public GenTreeOpCC
94159415
{
94169416
insCflags gtFlagsVal;

src/coreclr/jit/gtlist.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
244244
GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR)
245245
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
246246
GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR)
247-
#ifdef TARGET_ARM64
248-
// The arm64 ccmp instruction. If the specified condition is true, compares two
247+
248+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
249+
// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two
249250
// operands and sets the condition flags according to the result. Otherwise
250251
// sets the condition flags to the specified immediate value.
251252
GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
253+
#endif
254+
255+
256+
#ifdef TARGET_ARM64
252257
// Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1.
253258
// If op2 is null, computes result = condition ? op1 + 1 : op1.
254259
GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR)

src/coreclr/jit/gtstructs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
116116
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
117117
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
118118
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
119-
#ifdef TARGET_ARM64
119+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
120120
GTSTRUCT_1(CCMP , GT_CCMP)
121+
#endif
122+
#ifdef TARGET_ARM64
121123
GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC)
122124
#else
123125
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)

0 commit comments

Comments
 (0)