Skip to content

Commit cc8b45f

Browse files
committed
[VectorCombine][X86] foldShuffleOfCastops - fold shuffle(cast(x),cast(y)) -> cast(shuffle(x,y)) iff cost efficient
Based off the existing foldShuffleOfBinops fold Fixes #67803
1 parent d53b829 commit cc8b45f

File tree

3 files changed

+114
-46
lines changed

3 files changed

+114
-46
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

+75
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class VectorCombine {
112112
bool foldSingleElementStore(Instruction &I);
113113
bool scalarizeLoadExtract(Instruction &I);
114114
bool foldShuffleOfBinops(Instruction &I);
115+
bool foldShuffleOfCastops(Instruction &I);
115116
bool foldShuffleFromReductions(Instruction &I);
116117
bool foldTruncFromReductions(Instruction &I);
117118
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1432,6 +1433,79 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
14321433
return true;
14331434
}
14341435

1436+
/// Try to convert "shuffle (castop), (castop)" with a shared castop operand into
1437+
/// "castop (shuffle)".
1438+
bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
1439+
Value *V0, *V1;
1440+
ArrayRef<int> Mask;
1441+
if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)),
1442+
m_Mask(Mask))))
1443+
return false;
1444+
1445+
auto *C0 = dyn_cast<CastInst>(V0);
1446+
auto *C1 = dyn_cast<CastInst>(V1);
1447+
if (!C0 || !C1)
1448+
return false;
1449+
1450+
Instruction::CastOps Opcode = C0->getOpcode();
1451+
if (Opcode == Instruction::BitCast || C0->getSrcTy() != C1->getSrcTy())
1452+
return false;
1453+
1454+
// Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
1455+
if (Opcode != C1->getOpcode()) {
1456+
auto IsZExtNNeg = [](Instruction *I) {
1457+
auto *NNI = dyn_cast<PossiblyNonNegInst>(I);
1458+
return NNI && NNI->hasNonNeg();
1459+
};
1460+
if (!((C0->getOpcode() == Instruction::SExt && IsZExtNNeg(C1)) ||
1461+
(C1->getOpcode() == Instruction::SExt && IsZExtNNeg(C0))))
1462+
return false;
1463+
Opcode = Instruction::SExt;
1464+
}
1465+
1466+
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
1467+
auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1468+
auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1469+
if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1470+
return false;
1471+
assert(CastDstTy->getElementCount() == CastSrcTy->getElementCount() &&
1472+
"Unexpected src/dst element counts");
1473+
1474+
auto *NewShuffleDstTy =
1475+
FixedVectorType::get(CastSrcTy->getScalarType(), Mask.size());
1476+
1477+
// Try to replace a castop with a shuffle if the shuffle is not costly.
1478+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1479+
1480+
InstructionCost OldCost =
1481+
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
1482+
TTI::CastContextHint::None, CostKind) +
1483+
TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
1484+
TTI::CastContextHint::None, CostKind);
1485+
OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
1486+
CastDstTy, Mask, CostKind);
1487+
1488+
InstructionCost NewCost = TTI.getShuffleCost(
1489+
TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, Mask, CostKind);
1490+
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
1491+
TTI::CastContextHint::None, CostKind);
1492+
if (NewCost > OldCost)
1493+
return false;
1494+
1495+
Value *Shuf =
1496+
Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), Mask);
1497+
Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
1498+
1499+
// Intersect flags from the old casts.
1500+
if (auto *NewInst = dyn_cast<Instruction>(Cast)) {
1501+
NewInst->copyIRFlags(C0);
1502+
NewInst->andIRFlags(C1);
1503+
}
1504+
1505+
replaceValue(I, *Cast);
1506+
return true;
1507+
}
1508+
14351509
/// Given a commutative reduction, the order of the input lanes does not alter
14361510
/// the results. We can use this to remove certain shuffles feeding the
14371511
/// reduction, removing the need to shuffle at all.
@@ -1986,6 +2060,7 @@ bool VectorCombine::run() {
19862060
break;
19872061
case Instruction::ShuffleVector:
19882062
MadeChange |= foldShuffleOfBinops(I);
2063+
MadeChange |= foldShuffleOfCastops(I);
19892064
MadeChange |= foldSelectShuffle(I);
19902065
break;
19912066
case Instruction::BitCast:

llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll

+1-5
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b
99
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32>
1010
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[Y:%.*]] to <8 x i32>
1111
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP0]], [[TMP1]]
12-
; CHECK-NEXT: [[CMP_I21:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13-
; CHECK-NEXT: [[SEXT_I22:%.*]] = sext <4 x i1> [[CMP_I21]] to <4 x i32>
14-
; CHECK-NEXT: [[CMP_I:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
15-
; CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
16-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[SEXT_I22]], <4 x i32> [[SEXT_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
12+
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32>
1713
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
1814
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <32 x i8> [[TMP5]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1915
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>

llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll

+38-41
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66

77
define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
88
; CHECK-LABEL: @concat_zext_v8i16_v16i32(
9-
; CHECK-NEXT: [[X0:%.*]] = zext <8 x i16> [[A0:%.*]] to <8 x i32>
10-
; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32>
11-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10+
; CHECK-NEXT: [[R:%.*]] = zext <16 x i16> [[TMP1]] to <16 x i32>
1211
; CHECK-NEXT: ret <16 x i32> [[R]]
1312
;
1413
%x0 = zext <8 x i16> %a0 to <8 x i32>
@@ -19,9 +18,8 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
1918

2019
define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
2120
; CHECK-LABEL: @concat_zext_nneg_v8i16_v16i32(
22-
; CHECK-NEXT: [[X0:%.*]] = zext nneg <8 x i16> [[A0:%.*]] to <8 x i32>
23-
; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32>
24-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
21+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22+
; CHECK-NEXT: [[R:%.*]] = zext nneg <16 x i16> [[TMP1]] to <16 x i32>
2523
; CHECK-NEXT: ret <16 x i32> [[R]]
2624
;
2725
%x0 = zext nneg <8 x i16> %a0 to <8 x i32>
@@ -30,13 +28,17 @@ define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
3028
ret <16 x i32> %r
3129
}
3230

33-
; TODO - sext + zext nneg -> sext
3431
define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) {
35-
; CHECK-LABEL: @concat_sext_zext_nneg_v8i16_v8i32(
36-
; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
37-
; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32>
38-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
39-
; CHECK-NEXT: ret <16 x i32> [[R]]
32+
; SSE-LABEL: @concat_sext_zext_nneg_v8i16_v8i32(
33+
; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
34+
; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32>
35+
; SSE-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
36+
; SSE-NEXT: ret <16 x i32> [[R]]
37+
;
38+
; AVX-LABEL: @concat_sext_zext_nneg_v8i16_v8i32(
39+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
40+
; AVX-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
41+
; AVX-NEXT: ret <16 x i32> [[R]]
4042
;
4143
%x0 = sext <8 x i16> %a0 to <8 x i32>
4244
%x1 = zext nneg <8 x i16> %a1 to <8 x i32>
@@ -46,9 +48,8 @@ define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a
4648

4749
define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
4850
; CHECK-LABEL: @concat_sext_v8i16_v16i32(
49-
; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
50-
; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32>
51-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
51+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
52+
; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
5253
; CHECK-NEXT: ret <16 x i32> [[R]]
5354
;
5455
%x0 = sext <8 x i16> %a0 to <8 x i32>
@@ -59,9 +60,8 @@ define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
5960

6061
define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) {
6162
; CHECK-LABEL: @concat_sext_v4i1_v8i32(
62-
; CHECK-NEXT: [[X0:%.*]] = sext <4 x i1> [[A0:%.*]] to <4 x i32>
63-
; CHECK-NEXT: [[X1:%.*]] = sext <4 x i1> [[A1:%.*]] to <4 x i32>
64-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
64+
; CHECK-NEXT: [[R:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32>
6565
; CHECK-NEXT: ret <8 x i32> [[R]]
6666
;
6767
%x0 = sext <4 x i1> %a0 to <4 x i32>
@@ -72,9 +72,8 @@ define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) {
7272

7373
define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) {
7474
; CHECK-LABEL: @concat_trunc_v4i32_v8i16(
75-
; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16>
76-
; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16>
77-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
75+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
76+
; CHECK-NEXT: [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16>
7877
; CHECK-NEXT: ret <8 x i16> [[R]]
7978
;
8079
%x0 = trunc <4 x i32> %a0 to <4 x i16>
@@ -85,9 +84,8 @@ define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) {
8584

8685
define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) {
8786
; CHECK-LABEL: @concat_inttoptr_v4i32_v8iptr(
88-
; CHECK-NEXT: [[X0:%.*]] = inttoptr <4 x i32> [[A0:%.*]] to <4 x ptr>
89-
; CHECK-NEXT: [[X1:%.*]] = inttoptr <4 x i32> [[A1:%.*]] to <4 x ptr>
90-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x ptr> [[X0]], <4 x ptr> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
87+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
88+
; CHECK-NEXT: [[R:%.*]] = inttoptr <8 x i32> [[TMP1]] to <8 x ptr>
9189
; CHECK-NEXT: ret <8 x ptr> [[R]]
9290
;
9391
%x0 = inttoptr <4 x i32> %a0 to <4 x ptr>
@@ -98,9 +96,8 @@ define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) {
9896

9997
define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) {
10098
; CHECK-LABEL: @concat_ptrtoint_v8i16_v16i32(
101-
; CHECK-NEXT: [[X0:%.*]] = ptrtoint <8 x ptr> [[A0:%.*]] to <8 x i64>
102-
; CHECK-NEXT: [[X1:%.*]] = ptrtoint <8 x ptr> [[A1:%.*]] to <8 x i64>
103-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i64> [[X0]], <8 x i64> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
99+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
100+
; CHECK-NEXT: [[R:%.*]] = ptrtoint <16 x ptr> [[TMP1]] to <16 x i64>
104101
; CHECK-NEXT: ret <16 x i64> [[R]]
105102
;
106103
%x0 = ptrtoint <8 x ptr> %a0 to <8 x i64>
@@ -110,11 +107,16 @@ define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) {
110107
}
111108

112109
define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) {
113-
; CHECK-LABEL: @concat_fpext_v4f32_v8f64(
114-
; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double>
115-
; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double>
116-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
117-
; CHECK-NEXT: ret <8 x double> [[R]]
110+
; SSE-LABEL: @concat_fpext_v4f32_v8f64(
111+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
112+
; SSE-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double>
113+
; SSE-NEXT: ret <8 x double> [[R]]
114+
;
115+
; AVX-LABEL: @concat_fpext_v4f32_v8f64(
116+
; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double>
117+
; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double>
118+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
119+
; AVX-NEXT: ret <8 x double> [[R]]
118120
;
119121
%x0 = fpext <4 x float> %a0 to <4 x double>
120122
%x1 = fpext <4 x float> %a1 to <4 x double>
@@ -139,9 +141,8 @@ define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double>
139141

140142
define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
141143
; CHECK-LABEL: @rconcat_sext_v8i16_v16i32(
142-
; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
143-
; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32>
144-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
144+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
145+
; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
145146
; CHECK-NEXT: ret <16 x i32> [[R]]
146147
;
147148
%x0 = sext <8 x i16> %a0 to <8 x i32>
@@ -154,9 +155,8 @@ define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
154155

155156
define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) {
156157
; CHECK-LABEL: @interleave_fpext_v4f32_v8f64(
157-
; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double>
158-
; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double>
159-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
158+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
159+
; CHECK-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double>
160160
; CHECK-NEXT: ret <8 x double> [[R]]
161161
;
162162
%x0 = fpext <4 x float> %a0 to <4 x double>
@@ -226,6 +226,3 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
226226
%r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
227227
ret <16 x i32> %r
228228
}
229-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
230-
; AVX: {{.*}}
231-
; SSE: {{.*}}

0 commit comments

Comments
 (0)