Skip to content

Reland "[LoopVectorizer] Add support for chaining partial reductions #120272" #124282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 28, 2025

Conversation

NickGuy-Arm
Copy link
Contributor

Change getScaledReduction to take an existing vector, rather than creating and returning a new one each call.
Rename getScaledReduction to getScaledReductions to more accurately reflect what it's now doing.

@llvmbot
Copy link
Member

llvmbot commented Jan 24, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-vectorizers

Author: Nicholas Guy (NickGuy-Arm)

Changes

Change getScaledReduction to take an existing vector, rather than creating and returning a new one each call.
Rename getScaledReduction to getScaledReductions to more accurately reflect what it's now doing.


Patch is 79.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124282.diff

4 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+48-29)
  • (modified) llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (+3-3)
  • (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+4-1)
  • (added) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll (+1025)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e4e87704c1c97a..a68c59209f5d41 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8684,12 +8684,11 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
 /// are valid so recipes can be formed later.
 void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
   // Find all possible partial reductions.
-  SmallVector<std::pair<PartialReductionChain, unsigned>, 1>
+  SmallVector<std::pair<PartialReductionChain, unsigned>>
       PartialReductionChains;
-  for (const auto &[Phi, RdxDesc] : Legal->getReductionVars())
-    if (std::optional<std::pair<PartialReductionChain, unsigned>> Pair =
-            getScaledReduction(Phi, RdxDesc, Range))
-      PartialReductionChains.push_back(*Pair);
+  for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) {
+    getScaledReductions(Phi, RdxDesc.getLoopExitInstr(), Range, PartialReductionChains);
+  }
 
   // A partial reduction is invalid if any of its extends are used by
   // something that isn't another partial reduction. This is because the
@@ -8717,39 +8716,55 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
   }
 }
 
-std::optional<std::pair<PartialReductionChain, unsigned>>
-VPRecipeBuilder::getScaledReduction(PHINode *PHI,
-                                    const RecurrenceDescriptor &Rdx,
-                                    VFRange &Range) {
+bool
+VPRecipeBuilder::getScaledReductions(Instruction *PHI, Instruction *RdxExitInstr,
+                                    VFRange &Range, SmallVector<std::pair<PartialReductionChain, unsigned>> &Chains) {
+
+  if (!CM.TheLoop->contains(RdxExitInstr))
+    return false;
+
   // TODO: Allow scaling reductions when predicating. The select at
   // the end of the loop chooses between the phi value and most recent
   // reduction result, both of which have different VFs to the active lane
   // mask when scaling.
-  if (CM.blockNeedsPredicationForAnyReason(Rdx.getLoopExitInstr()->getParent()))
-    return std::nullopt;
+  if (CM.blockNeedsPredicationForAnyReason(RdxExitInstr->getParent()))
+    return false;
 
-  auto *Update = dyn_cast<BinaryOperator>(Rdx.getLoopExitInstr());
+  auto *Update = dyn_cast<BinaryOperator>(RdxExitInstr);
   if (!Update)
-    return std::nullopt;
+    return false;
 
   Value *Op = Update->getOperand(0);
   Value *PhiOp = Update->getOperand(1);
-  if (Op == PHI) {
-    Op = Update->getOperand(1);
-    PhiOp = Update->getOperand(0);
+  if (Op == PHI)
+    std::swap(Op, PhiOp);
+
+
+  // Try and get a scaled reduction from the first non-phi operand.
+  // If one is found, we use the discovered reduction instruction in
+  // place of the accumulator for costing.
+  if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+    if (getScaledReductions(PHI, OpInst, Range, Chains)) {
+      PHI = Chains.rbegin()->first.Reduction;
+
+      Op = Update->getOperand(0);
+      PhiOp = Update->getOperand(1);
+      if (Op == PHI)
+        std::swap(Op, PhiOp);
+    }
   }
   if (PhiOp != PHI)
-    return std::nullopt;
+    return false;
 
   auto *BinOp = dyn_cast<BinaryOperator>(Op);
   if (!BinOp || !BinOp->hasOneUse())
-    return std::nullopt;
+    return false;
 
   using namespace llvm::PatternMatch;
   Value *A, *B;
   if (!match(BinOp->getOperand(0), m_ZExtOrSExt(m_Value(A))) ||
       !match(BinOp->getOperand(1), m_ZExtOrSExt(m_Value(B))))
-    return std::nullopt;
+    return false;
 
   Instruction *ExtA = cast<Instruction>(BinOp->getOperand(0));
   Instruction *ExtB = cast<Instruction>(BinOp->getOperand(1));
@@ -8759,7 +8774,7 @@ VPRecipeBuilder::getScaledReduction(PHINode *PHI,
   TTI::PartialReductionExtendKind OpBExtend =
       TargetTransformInfo::getPartialReductionExtendKind(ExtB);
 
-  PartialReductionChain Chain(Rdx.getLoopExitInstr(), ExtA, ExtB, BinOp);
+  PartialReductionChain Chain(RdxExitInstr, ExtA, ExtB, BinOp);
 
   unsigned TargetScaleFactor =
       PHI->getType()->getPrimitiveSizeInBits().getKnownScalarFactor(
@@ -8773,10 +8788,12 @@ VPRecipeBuilder::getScaledReduction(PHINode *PHI,
                 std::make_optional(BinOp->getOpcode()));
             return Cost.isValid();
           },
-          Range))
-    return std::make_pair(Chain, TargetScaleFactor);
+          Range)) {
+    Chains.push_back(std::make_pair(Chain, TargetScaleFactor));
+    return true;
+  }
 
-  return std::nullopt;
+  return false;
 }
 
 VPRecipeBase *
@@ -8871,12 +8888,14 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
          "Unexpected number of operands for partial reduction");
 
   VPValue *BinOp = Operands[0];
-  VPValue *Phi = Operands[1];
-  if (isa<VPReductionPHIRecipe>(BinOp->getDefiningRecipe()))
-    std::swap(BinOp, Phi);
-
-  return new VPPartialReductionRecipe(Reduction->getOpcode(), BinOp, Phi,
-                                      Reduction);
+  VPValue *Accumulator = Operands[1];
+  VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
+  if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
+      isa<VPPartialReductionRecipe>(BinOpRecipe))
+    std::swap(BinOp, Accumulator);
+
+  return new VPPartialReductionRecipe(Reduction->getOpcode(), BinOp,
+                                      Accumulator, Reduction);
 }
 
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 44745bfd46f891..18f3f22579e5a8 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -142,9 +142,9 @@ class VPRecipeBuilder {
   /// Returns null if no scaled reduction was found, otherwise a pair with a
   /// struct containing reduction information and the scaling factor between the
   /// number of elements in the input and output.
-  std::optional<std::pair<PartialReductionChain, unsigned>>
-  getScaledReduction(PHINode *PHI, const RecurrenceDescriptor &Rdx,
-                     VFRange &Range);
+  bool
+  getScaledReductions(Instruction *PHI, Instruction *RdxExitInstr,
+                     VFRange &Range, SmallVector<std::pair<PartialReductionChain, unsigned>> &Chains);
 
 public:
   VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9124905c997176..133719b3e90b9e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2456,7 +2456,10 @@ class VPPartialReductionRecipe : public VPSingleDefRecipe {
       : VPSingleDefRecipe(VPDef::VPPartialReductionSC,
                           ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
         Opcode(Opcode) {
-    assert(isa<VPReductionPHIRecipe>(getOperand(1)->getDefiningRecipe()) &&
+    [[maybe_unused]] auto *AccumulatorRecipe =
+        getOperand(1)->getDefiningRecipe();
+    assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
+            isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
            "Unexpected operand order for partial reduction recipe");
   }
   ~VPPartialReductionRecipe() override = default;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
new file mode 100644
index 00000000000000..bedf8b6b3a9b56
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -0,0 +1,1025 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --mattr=+neon,+dotprod -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-NEON
+; RUN: opt --mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-SVE
+; RUN: opt --mattr=+sve -vectorizer-maximize-bandwidth -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-SVE-MAXBW
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-unknown-elf"
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) vscale_range(1,16)
+define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
+; CHECK-NEON-LABEL: define i32 @chained_partial_reduce_add_sub(
+; CHECK-NEON-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEON-NEXT:  entry:
+; CHECK-NEON-NEXT:    [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
+; CHECK-NEON-NEXT:    [[DIV27:%.*]] = lshr i32 [[N]], 1
+; CHECK-NEON-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
+; CHECK-NEON-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
+; CHECK-NEON-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEON:       vector.ph:
+; CHECK-NEON-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
+; CHECK-NEON-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEON-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-NEON:       vector.body:
+; CHECK-NEON-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEON-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEON-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEON-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEON-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEON-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEON-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0
+; CHECK-NEON-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEON-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0
+; CHECK-NEON-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEON-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i32 0
+; CHECK-NEON-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1
+; CHECK-NEON-NEXT:    [[TMP7:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
+; CHECK-NEON-NEXT:    [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
+; CHECK-NEON-NEXT:    [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
+; CHECK-NEON-NEXT:    [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEON-NEXT:    [[TMP11:%.*]] = add <16 x i32> [[VEC_PHI]], [[TMP10]]
+; CHECK-NEON-NEXT:    [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]]
+; CHECK-NEON-NEXT:    [[TMP13]] = sub <16 x i32> [[TMP11]], [[TMP12]]
+; CHECK-NEON-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEON-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEON-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEON:       middle.block:
+; CHECK-NEON-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP13]])
+; CHECK-NEON-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEON-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+;
+; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_add_sub(
+; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SVE-NEXT:  entry:
+; CHECK-SVE-NEXT:    [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
+; CHECK-SVE-NEXT:    [[DIV27:%.*]] = lshr i32 [[N]], 1
+; CHECK-SVE-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
+; CHECK-SVE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-SVE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-SVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
+; CHECK-SVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-SVE:       vector.ph:
+; CHECK-SVE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-SVE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-SVE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
+; CHECK-SVE-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-SVE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-SVE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-SVE-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-SVE:       vector.body:
+; CHECK-SVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-SVE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-SVE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-SVE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]]
+; CHECK-SVE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-SVE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[TMP6]]
+; CHECK-SVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0
+; CHECK-SVE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
+; CHECK-SVE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i32 0
+; CHECK-SVE-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP11]], align 1
+; CHECK-SVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i32 0
+; CHECK-SVE-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 4 x i8>, ptr [[TMP12]], align 1
+; CHECK-SVE-NEXT:    [[TMP13:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
+; CHECK-SVE-NEXT:    [[TMP14:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
+; CHECK-SVE-NEXT:    [[TMP15:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD2]] to <vscale x 4 x i32>
+; CHECK-SVE-NEXT:    [[TMP16:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP14]]
+; CHECK-SVE-NEXT:    [[TMP17:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP16]]
+; CHECK-SVE-NEXT:    [[TMP18:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP15]]
+; CHECK-SVE-NEXT:    [[TMP19]] = sub <vscale x 4 x i32> [[TMP17]], [[TMP18]]
+; CHECK-SVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-SVE-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-SVE-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-SVE:       middle.block:
+; CHECK-SVE-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
+; CHECK-SVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-SVE-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+;
+; CHECK-SVE-MAXBW-LABEL: define i32 @chained_partial_reduce_add_sub(
+; CHECK-SVE-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SVE-MAXBW-NEXT:  entry:
+; CHECK-SVE-MAXBW-NEXT:    [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
+; CHECK-SVE-MAXBW-NEXT:    [[DIV27:%.*]] = lshr i32 [[N]], 1
+; CHECK-SVE-MAXBW-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
+; CHECK-SVE-MAXBW-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-SVE-MAXBW-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 8
+; CHECK-SVE-MAXBW-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
+; CHECK-SVE-MAXBW-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-SVE-MAXBW:       vector.ph:
+; CHECK-SVE-MAXBW-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-SVE-MAXBW-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
+; CHECK-SVE-MAXBW-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-SVE-MAXBW-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 8
+; CHECK-SVE-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-SVE-MAXBW:       vector.body:
+; CHECK-SVE-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-SVE-MAXBW-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[TMP6]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0
+; CHECK-SVE-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
+; CHECK-SVE-MAXBW-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i32 0
+; CHECK-SVE-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1
+; CHECK-SVE-MAXBW-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i32 0
+; CHECK-SVE-MAXBW-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
+; CHECK-SVE-MAXBW-NEXT:    [[TMP13:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
+; CHECK-SVE-MAXBW-NEXT:    [[TMP14:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32>
+; CHECK-SVE-MAXBW-NEXT:    [[TMP15:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
+; CHECK-SVE-MAXBW-NEXT:    [[TMP16:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP14]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP17:%.*]] = add <vscale x 8 x i32> [[VEC_PHI]], [[TMP16]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP18:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP15]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP19]] = sub <vscale x 8 x i32> [[TMP17]], [[TMP18]]
+; CHECK-SVE-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-SVE-MAXBW-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-SVE-MAXBW-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-SVE-MAXBW:       middle.block:
+; CHECK-SVE-MAXBW-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP19]])
+; CHECK-SVE-MAXBW-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-SVE-MAXBW-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+;
+entry:
+  %cmp28.not = icmp ult i32 %N, 2
+  %div27 = lshr i32 %N, 1
+  %wide.trip.count = zext nneg i32 %div27 to i64
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %res.0.lcssa = phi i32 [ %sub, %for.body ]
+  ret i32 %res.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %res = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %indvars.iv
+  %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %indvars.iv
+  %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %indvars.iv
+  %a.val = load i8, ptr %a.ptr, align 1
+  %b.val = load i8, ptr %b.ptr, align 1
+  %c.val = load i8, ptr %c.ptr, align 1
+  %a.ext = sext i8 %a.val to i32
+  %b.ext = sext i8 %b.val to i32
+  %c.ext = sext i8 %c.val to i32
+  %mul.ab = mul nsw i32 %a.ext, %b.ext
+  %add = add nsw i32 %res, %mul.ab
+  %mul.ac = mul nsw i32 %a.ext, %c.ext
+  %sub = sub i32 %add, %mul.ac
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+}
+
+define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
+; CHECK-NEON-LABEL: define i32 @chained_partial_reduce_add_add(
+; CHECK-NEON-SAME: ptr [[A:%.*]], ptr...
[truncated]

Copy link
Collaborator

@SamTebbs33 SamTebbs33 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. I like the new getScaledReductions interface.

Comment on lines 142 to 144
/// Returns null if no scaled reduction was found, otherwise a pair with a
/// struct containing reduction information and the scaling factor between the
/// number of elements in the input and output.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment will need updating too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, updated.

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with suggestion inline and please double-check the tests pass with ASan/UBSan

@NickGuy-Arm
Copy link
Contributor Author

LGTM with suggestion inline and please double-check the tests pass with ASan/UBSan

Thanks. I have tested with both sanitizers, and the relevant tests no longer fail with them. However I am seeing some other test failures, but those fail even without these changes (and one seemingly flakey test that comes and goes with or without this PR). Regardless, I'll wait until tomorrow to land this patch just in case it does cause things to fail again.

@NickGuy-Arm NickGuy-Arm merged commit cdea38f into llvm:main Jan 28, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 29, 2025

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/21156

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'ORC-x86_64-linux :: TestCases/Generic/lazy-link.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 6: rm -rf /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp && mkdir -p /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp
+ rm -rf /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp
+ mkdir -p /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp
RUN: at line 7: /build/buildbot/premerge-monolithic-linux/build/./bin/clang   -m64  -c -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/foo.o /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/Inputs/foo-ret-42.ll
+ /build/buildbot/premerge-monolithic-linux/build/./bin/clang -m64 -c -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/foo.o /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/Inputs/foo-ret-42.ll
warning: overriding the module target triple with x86_64-unknown-linux-gnu [-Woverride-module]
1 warning generated.
RUN: at line 8: /build/buildbot/premerge-monolithic-linux/build/./bin/clang   -m64  -c -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/x.o /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/Inputs/var-x-42.ll
+ /build/buildbot/premerge-monolithic-linux/build/./bin/clang -m64 -c -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/x.o /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/Inputs/var-x-42.ll
warning: overriding the module target triple with x86_64-unknown-linux-gnu [-Woverride-module]
1 warning generated.
RUN: at line 9: /build/buildbot/premerge-monolithic-linux/build/./bin/clang   -m64  -c -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/main.o /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
+ /build/buildbot/premerge-monolithic-linux/build/./bin/clang -m64 -c -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/main.o /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
warning: overriding the module target triple with x86_64-unknown-linux-gnu [-Woverride-module]
1 warning generated.
RUN: at line 10: /build/buildbot/premerge-monolithic-linux/build/./bin/llvm-jitlink -orc-runtime=/build/buildbot/premerge-monolithic-linux/build/./lib/../lib/clang/20/lib/x86_64-unknown-linux-gnu/liborc_rt.a -noexec -show-linked-files /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/main.o -lazy /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/foo.o      -lazy /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/x.o | FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
+ /build/buildbot/premerge-monolithic-linux/build/./bin/llvm-jitlink -orc-runtime=/build/buildbot/premerge-monolithic-linux/build/./lib/../lib/clang/20/lib/x86_64-unknown-linux-gnu/liborc_rt.a -noexec -show-linked-files /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/main.o -lazy /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/foo.o -lazy /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/x.o
+ FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
/build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll:18:14: error: CHECK-DAG: expected string not found in input
; CHECK-DAG: Linking {{.*}}x.o
             ^
<stdin>:2:167: note: scanning from here
Linking /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/main.o
                                                                                                                                                                      ^
<stdin>:3:1: note: possible intended match here
Linking __orc_reentry_graph_#1
^

Input file: <stdin>
Check file: /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          1: Linking /build/buildbot/premerge-monolithic-linux/build/./lib/../lib/clang/20/lib/x86_64-unknown-linux-gnu/liborc_rt.a(resolve.cpp.o) 
          2: Linking /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/orc/X86_64LinuxConfig/TestCases/Generic/Output/lazy-link.ll.tmp/main.o 
dag:18'0                                                                                                                                                                           X error: no match found
          3: Linking __orc_reentry_graph_#1 
dag:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
dag:18'1     ?                               possible intended match
          4: Linking /build/buildbot/premerge-monolithic-linux/build/./lib/../lib/clang/20/lib/x86_64-unknown-linux-gnu/liborc_rt.a(sysv_reenter.x86-64.S.o) 
dag:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          5: Linking <indirect stubs graph #1> 
dag:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...

@mikaelholmen
Copy link
Collaborator

Hello @NickGuy-Arm

The following starts crashing with this patch:
opt -passes=loop-vectorize bbi-106371.ll -o /dev/null

It crashes with

opt: ../include/llvm/Support/TypeSize.h:280: ScalarTy llvm::details::FixedOrScalableQuantity<llvm::TypeSize, unsigned long>::getKnownScalarFactor(const FixedOrScalableQuantity<LeafTy, ValueTy> &) const [LeafTy = llvm::TypeSize, ValueTy = unsigned long]: Assertion `hasKnownScalarFactor(RHS) && "Expected RHS to be a known factor!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=loop-vectorize bbi-106371.ll -o /dev/null
1.	Running pass "function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>)" on module "bbi-106371.ll"
2.	Running pass "loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>" on function "foo1"
 #0 0x000055644243be46 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x4625e46)
 #1 0x000055644243988e llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x462388e)
 #2 0x000055644243c6c9 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f78cc84bd10 __restore_rt (/lib64/libpthread.so.0+0x12d10)
 #4 0x00007f78ca1eb52f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f78ca1bee65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f78ca1bed39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f78ca1e3e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000556443a1c5a0 llvm::SmallVectorTemplateBase<std::pair<llvm::PartialReductionChain, unsigned int>, true>::push_back(std::pair<llvm::PartialReductionChain, unsigned int> const&) LoopVectorize.cpp:0:0
 #9 0x0000556443a1c3c7 llvm::VPRecipeBuilder::getScaledReductions(llvm::Instruction*, llvm::Instruction*, llvm::VFRange&, llvm::SmallVectorImpl<std::pair<llvm::PartialReductionChain, unsigned int>>&) (build-all/bin/opt+0x5c063c7)
#10 0x0000556443a1c221 llvm::VPRecipeBuilder::getScaledReductions(llvm::Instruction*, llvm::Instruction*, llvm::VFRange&, llvm::SmallVectorImpl<std::pair<llvm::PartialReductionChain, unsigned int>>&) (build-all/bin/opt+0x5c06221)
#11 0x0000556443a1bdd7 llvm::VPRecipeBuilder::collectScaledReductions(llvm::VFRange&) (build-all/bin/opt+0x5c05dd7)
#12 0x0000556443a1e033 llvm::LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(llvm::VFRange&) (build-all/bin/opt+0x5c08033)
#13 0x0000556443a0fb63 llvm::LoopVectorizationPlanner::buildVPlansWithVPRecipes(llvm::ElementCount, llvm::ElementCount) (build-all/bin/opt+0x5bf9b63)
#14 0x0000556443a0f636 llvm::LoopVectorizationPlanner::plan(llvm::ElementCount, unsigned int) (build-all/bin/opt+0x5bf9636)
#15 0x0000556443a27aa9 llvm::LoopVectorizePass::processLoop(llvm::Loop*) (build-all/bin/opt+0x5c11aa9)
#16 0x0000556443a2fd4b llvm::LoopVectorizePass::runImpl(llvm::Function&) (build-all/bin/opt+0x5c19d4b)
#17 0x0000556443a30606 llvm::LoopVectorizePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x5c1a606)
#18 0x00005564438daddd llvm::detail::PassModel<llvm::Function, llvm::LoopVectorizePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#19 0x0000556442676407 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4860407)
#20 0x00005564438d855d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#21 0x000055644267afde llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4864fde)
#22 0x00005564438d453d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) PassBuilderPipelines.cpp:0:0
#23 0x00005564426750f7 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x485f0f7)
#24 0x0000556443860e8c llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5a4ae8c)
#25 0x00005564423fe23e optMain (build-all/bin/opt+0x45e823e)
#26 0x00007f78ca1d77e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#27 0x00005564423fbd2e _start (build-all/bin/opt+0x45e5d2e)
Abort (core dumped)

bbi-106371.ll.gz

@NickGuy-Arm
Copy link
Contributor Author

Thanks @mikaelholmen I've opened a PR at #136680 to fix this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

8 participants