Skip to content

Commit a8f13db

Browse files
committed
[InstCombine] fold shuffle of fabs
shuffle (fabs X), Mask --> fabs (shuffle X, Mask) shuffle (fabs X), (fabs Y), Mask --> fabs (shuf X, Y, Mask) https://alive2.llvm.org/ce/z/JH2nkf This generalizes the existing fneg transforms to also work with fabs. A likely follow-up would generalize this further to move any unary intrinsic op.
1 parent 73d8343 commit a8f13db

File tree

2 files changed

+60
-35
lines changed

2 files changed

+60
-35
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,37 +2407,51 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
24072407
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
24082408
}
24092409

2410-
/// Canonicalize FP negate after shuffle.
2411-
static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
2412-
InstCombiner::BuilderTy &Builder) {
2413-
Instruction *FNeg0;
2410+
/// Canonicalize FP negate/abs after shuffle.
2411+
static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2412+
InstCombiner::BuilderTy &Builder) {
2413+
auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
24142414
Value *X;
2415-
if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
2416-
m_FNeg(m_Value(X)))))
2415+
if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
24172416
return nullptr;
24182417

2419-
// shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
2420-
if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2418+
bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2419+
2420+
// Match 1-input (unary) shuffle.
2421+
// shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2422+
if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
24212423
Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2422-
return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
2424+
if (IsFNeg)
2425+
return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2426+
2427+
Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
2428+
Intrinsic::fabs, Shuf.getType());
2429+
CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2430+
NewF->setFastMathFlags(S0->getFastMathFlags());
2431+
return NewF;
24232432
}
24242433

2425-
Instruction *FNeg1;
2434+
// Match 2-input (binary) shuffle.
2435+
auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
24262436
Value *Y;
2427-
if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
2428-
m_FNeg(m_Value(Y)))))
2437+
if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2438+
S0->getOpcode() != S1->getOpcode() ||
2439+
(!S0->hasOneUse() && !S1->hasOneUse()))
24292440
return nullptr;
24302441

2431-
// shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
2432-
if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
2433-
Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2434-
Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
2435-
NewFNeg->copyIRFlags(FNeg0);
2436-
NewFNeg->andIRFlags(FNeg1);
2437-
return NewFNeg;
2442+
// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2443+
Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2444+
Instruction *NewF;
2445+
if (IsFNeg) {
2446+
NewF = UnaryOperator::CreateFNeg(NewShuf);
2447+
} else {
2448+
Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
2449+
Intrinsic::fabs, Shuf.getType());
2450+
NewF = CallInst::Create(FAbs, {NewShuf});
24382451
}
2439-
2440-
return nullptr;
2452+
NewF->copyIRFlags(S0);
2453+
NewF->andIRFlags(S1);
2454+
return NewF;
24412455
}
24422456

24432457
/// Canonicalize casts after shuffle.
@@ -2815,7 +2829,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
28152829
if (Instruction *I = narrowVectorSelect(SVI, Builder))
28162830
return I;
28172831

2818-
if (Instruction *I = foldFNegShuffle(SVI, Builder))
2832+
if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder))
28192833
return I;
28202834

28212835
if (Instruction *I = foldCastShuffle(SVI, Builder))

llvm/test/Transforms/InstCombine/vec_shuffle.ll

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,8 +1797,8 @@ define <4 x i32> @PR46872(<4 x i32> %x) {
17971797

17981798
define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
17991799
; CHECK-LABEL: @fabs_unary_shuf(
1800-
; CHECK-NEXT: [[NX:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
1801-
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
1800+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
1801+
; CHECK-NEXT: [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
18021802
; CHECK-NEXT: ret <2 x float> [[R]]
18031803
;
18041804
%nx = call nsz nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
@@ -1808,8 +1808,8 @@ define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
18081808

18091809
define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
18101810
; CHECK-LABEL: @fabs_unary_shuf_widen(
1811-
; CHECK-NEXT: [[NX:%.*]] = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
1812-
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
1811+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
1812+
; CHECK-NEXT: [[R:%.*]] = call ninf <4 x half> @llvm.fabs.v4f16(<4 x half> [[TMP1]])
18131813
; CHECK-NEXT: ret <4 x half> [[R]]
18141814
;
18151815
%nx = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
@@ -1819,15 +1819,17 @@ define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
18191819

18201820
define <2 x double> @fabs_unary_shuf_narrow(<4 x double> %x) {
18211821
; CHECK-LABEL: @fabs_unary_shuf_narrow(
1822-
; CHECK-NEXT: [[NX:%.*]] = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> [[X:%.*]])
1823-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[NX]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
1822+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
1823+
; CHECK-NEXT: [[R:%.*]] = call nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
18241824
; CHECK-NEXT: ret <2 x double> [[R]]
18251825
;
18261826
%nx = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
18271827
%r = shufflevector <4 x double> %nx, <4 x double> poison, <2 x i32> <i32 3, i32 0>
18281828
ret <2 x double> %r
18291829
}
18301830

1831+
; negative test - extra use prevents canonicalization
1832+
18311833
define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) {
18321834
; CHECK-LABEL: @fabs_unary_shuf_use(
18331835
; CHECK-NEXT: [[NX:%.*]] = call nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
@@ -1841,11 +1843,12 @@ define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) {
18411843
ret <2 x float> %r
18421844
}
18431845

1846+
; intersect FMF
1847+
18441848
define <4 x float> @fabs_shuf(<4 x float> %x, <4 x float> %y) {
18451849
; CHECK-LABEL: @fabs_shuf(
1846-
; CHECK-NEXT: [[NX:%.*]] = call ninf nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
1847-
; CHECK-NEXT: [[NY:%.*]] = call nnan ninf <4 x float> @llvm.fabs.v4f32(<4 x float> [[Y:%.*]])
1848-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1850+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1851+
; CHECK-NEXT: [[R:%.*]] = call ninf <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
18491852
; CHECK-NEXT: ret <4 x float> [[R]]
18501853
;
18511854
%nx = call nsz ninf <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -1854,12 +1857,14 @@ define <4 x float> @fabs_shuf(<4 x float> %x, <4 x float> %y) {
18541857
ret <4 x float> %r
18551858
}
18561859

1860+
; length-changing shuffle and extra use are ok
1861+
18571862
define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
18581863
; CHECK-LABEL: @fabs_shuf_widen_use1(
18591864
; CHECK-NEXT: [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
18601865
; CHECK-NEXT: call void @use(<2 x float> [[NX]])
1861-
; CHECK-NEXT: [[NY:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[Y:%.*]])
1862-
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> [[NY]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
1866+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
1867+
; CHECK-NEXT: [[R:%.*]] = call nnan <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
18631868
; CHECK-NEXT: ret <4 x float> [[R]]
18641869
;
18651870
%nx = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
@@ -1869,12 +1874,14 @@ define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
18691874
ret <4 x float> %r
18701875
}
18711876

1877+
; length-changing shuffle and extra use are ok
1878+
18721879
define <2 x float> @fabs_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
18731880
; CHECK-LABEL: @fabs_shuf_narrow_use2(
1874-
; CHECK-NEXT: [[NX:%.*]] = call nnan nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
18751881
; CHECK-NEXT: [[NY:%.*]] = call nnan nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[Y:%.*]])
18761882
; CHECK-NEXT: call void @use4(<4 x float> [[NY]])
1877-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <2 x i32> <i32 3, i32 5>
1883+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y]], <2 x i32> <i32 3, i32 5>
1884+
; CHECK-NEXT: [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
18781885
; CHECK-NEXT: ret <2 x float> [[R]]
18791886
;
18801887
%nx = call nsz nnan <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -1884,6 +1891,8 @@ define <2 x float> @fabs_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
18841891
ret <2 x float> %r
18851892
}
18861893

1894+
; negative test - too many extra uses
1895+
18871896
define <2 x float> @fabs_shuf_use3(<2 x float> %x, <2 x float> %y) {
18881897
; CHECK-LABEL: @fabs_shuf_use3(
18891898
; CHECK-NEXT: [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
@@ -2016,6 +2025,8 @@ define <2 x float> @fneg_shuf_use3(<2 x float> %x, <2 x float> %y) {
20162025
ret <2 x float> %r
20172026
}
20182027

2028+
; negative test - mixed opcodes
2029+
20192030
define <4 x float> @fabs_fneg_shuf(<4 x float> %x, <4 x float> %y) {
20202031
; CHECK-LABEL: @fabs_fneg_shuf(
20212032
; CHECK-NEXT: [[NX:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])

0 commit comments

Comments
 (0)