[InstCombine] fold shuffle of fabs

rotateright · rotateright · commit a8f13dbdeb31 · 2023-02-03T14:23:17.000-05:00
shuffle (fabs X), Mask --> fabs (shuffle X, Mask) shuffle (fabs X), (fabs Y), Mask --> fabs (shuf X, Y, Mask) https://alive2.llvm.org/ce/z/JH2nkf This generalizes the existing fneg transforms to also work with fabs. A likely follow-up would generalize this further to move any unary intrinsic op.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2407,37 +2407,51 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
   return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
 }
 
-/// Canonicalize FP negate after shuffle.
-static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
-                                    InstCombiner::BuilderTy &Builder) {
-  Instruction *FNeg0;
+/// Canonicalize FP negate/abs after shuffle.
+static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
+                                          InstCombiner::BuilderTy &Builder) {
+  auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
   Value *X;
-  if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
-                                              m_FNeg(m_Value(X)))))
+  if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
     return nullptr;
 
-  // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
-  if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
+  bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
+
+  // Match 1-input (unary) shuffle.
+  // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
+  if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
     Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
-    return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
+    if (IsFNeg)
+      return UnaryOperator::CreateFNegFMF(NewShuf, S0);
+
+    Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
+                                               Intrinsic::fabs, Shuf.getType());
+    CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
+    NewF->setFastMathFlags(S0->getFastMathFlags());
+    return NewF;
   }
 
-  Instruction *FNeg1;
+  // Match 2-input (binary) shuffle.
+  auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
   Value *Y;
-  if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
-                                              m_FNeg(m_Value(Y)))))
+  if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
+      S0->getOpcode() != S1->getOpcode() ||
+      (!S0->hasOneUse() && !S1->hasOneUse()))
     return nullptr;
 
-  // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
-  if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
-    Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
-    Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
-    NewFNeg->copyIRFlags(FNeg0);
-    NewFNeg->andIRFlags(FNeg1);
-    return NewFNeg;
+  // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
+  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
+  Instruction *NewF;
+  if (IsFNeg) {
+    NewF = UnaryOperator::CreateFNeg(NewShuf);
+  } else {
+    Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
+                                               Intrinsic::fabs, Shuf.getType());
+    NewF = CallInst::Create(FAbs, {NewShuf});
   }
-
-  return nullptr;
+  NewF->copyIRFlags(S0);
+  NewF->andIRFlags(S1);
+  return NewF;
 }
 
 /// Canonicalize casts after shuffle.
@@ -2815,7 +2829,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (Instruction *I = narrowVectorSelect(SVI, Builder))
     return I;
 
-  if (Instruction *I = foldFNegShuffle(SVI, Builder))
+  if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder))
     return I;
 
   if (Instruction *I = foldCastShuffle(SVI, Builder))
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1797,8 +1797,8 @@ define <4 x i32> @PR46872(<4 x i32> %x) {
 
 define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf(
-; CHECK-NEXT:    [[NX:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %nx = call nsz nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
@@ -1808,8 +1808,8 @@ define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
 
 define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf_widen(
-; CHECK-NEXT:    [[NX:%.*]] = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = call ninf <4 x half> @llvm.fabs.v4f16(<4 x half> [[TMP1]])
 ; CHECK-NEXT:    ret <4 x half> [[R]]
 ;
   %nx = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
@@ -1819,15 +1819,17 @@ define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
 
 define <2 x double> @fabs_unary_shuf_narrow(<4 x double> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf_narrow(
-; CHECK-NEXT:    [[NX:%.*]] = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x double> [[NX]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x double> [[R]]
 ;
   %nx = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
   %r = shufflevector <4 x double> %nx, <4 x double> poison, <2 x i32> <i32 3, i32 0>
   ret <2 x double> %r
 }
 
+; negative test - extra use prevents canonicalization
+
 define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf_use(
 ; CHECK-NEXT:    [[NX:%.*]] = call nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
@@ -1841,11 +1843,12 @@ define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) {
   ret <2 x float> %r
 }
 
+; intersect FMF
+
 define <4 x float> @fabs_shuf(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fabs_shuf(
-; CHECK-NEXT:    [[NX:%.*]] = call ninf nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
-; CHECK-NEXT:    [[NY:%.*]] = call nnan ninf <4 x float> @llvm.fabs.v4f32(<4 x float> [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[R:%.*]] = call ninf <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %nx = call nsz ninf <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -1854,12 +1857,14 @@ define <4 x float> @fabs_shuf(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
+; length-changing shuffle and extra use are ok
+
 define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fabs_shuf_widen_use1(
 ; CHECK-NEXT:    [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
 ; CHECK-NEXT:    call void @use(<2 x float> [[NX]])
-; CHECK-NEXT:    [[NY:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> [[NY]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = call nnan <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %nx = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
@@ -1869,12 +1874,14 @@ define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
   ret <4 x float> %r
 }
 
+; length-changing shuffle and extra use are ok
+
 define <2 x float> @fabs_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fabs_shuf_narrow_use2(
-; CHECK-NEXT:    [[NX:%.*]] = call nnan nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
 ; CHECK-NEXT:    [[NY:%.*]] = call nnan nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[Y:%.*]])
 ; CHECK-NEXT:    call void @use4(<4 x float> [[NY]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <2 x i32> <i32 3, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y]], <2 x i32> <i32 3, i32 5>
+; CHECK-NEXT:    [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %nx = call nsz nnan <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -1884,6 +1891,8 @@ define <2 x float> @fabs_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
   ret <2 x float> %r
 }
 
+; negative test - too many extra uses
+
 define <2 x float> @fabs_shuf_use3(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fabs_shuf_use3(
 ; CHECK-NEXT:    [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
@@ -2016,6 +2025,8 @@ define <2 x float> @fneg_shuf_use3(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %r
 }
 
+; negative test - mixed opcodes
+
 define <4 x float> @fabs_fneg_shuf(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fabs_fneg_shuf(
 ; CHECK-NEXT:    [[NX:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])