apparent infinite loop in InstCombine #59897

regehr · 2023-01-09T15:40:58Z

apologies for the unreduced module but it's not easy to make minimal triggers for this kind of bug. the function below, optimized at -O2, appears to hang us up indefinitely. all of the cases triggering this have contained a bitreverse intrinsic so I suspect that's part of the problem.

I attached a profiler to the process and here's what's up at the top:

Samples: 479K of event 'cycles', Event count (approx.): 561733173365
Overhead  Command  Shared Object                   Symbol
   3.69%  opt      libLLVMAnalysis.so.16git        [.] programUndefinedIfUndefOrPoison                   ▒
   3.60%  opt      libLLVMAnalysis.so.16git        [.] computeKnownBits                                  ◆
   2.82%  opt      libLLVMAnalysis.so.16git        [.] llvm::DataLayout::getTypeSizeInBits               ▒
   2.54%  opt      libLLVMCore.so.16git            [.] llvm::Type::getPrimitiveSizeInBits                ▒
   2.54%  opt      libLLVMAnalysis.so.16git        [.] llvm::mustTriggerUB                               ▒
   2.38%  opt      libLLVMAnalysis.so.16git        [.] computeKnownBitsFromOperator                      ▒
   1.86%  opt      libLLVMInstCombine.so.16git     [.] llvm::InstCombinerImpl::run                       ▒
   1.78%  opt      libLLVMAnalysis.so.16git        [.] llvm::getGuaranteedWellDefinedOps                 ▒
   1.72%  opt      libLLVMInstCombine.so.16git     [.] llvm::InstCombinerImpl::SimplifyDemandedUseBits   ▒
   1.29%  opt      libLLVMCore.so.16git            [.] llvm::Value::setValueName                         ▒
   1.22%  opt      libLLVMAnalysis.so.16git        [.] computeKnownBitsFromAssume                        ▒
   1.19%  opt      libLLVMCore.so.16git            [.] llvm::Type::getScalarSizeInBits                   ▒
   1.18%  opt      libLLVMAnalysis.so.16git        [.] llvm::isGuaranteedToTransferExecutionToSuccessor  ▒

trigger this by running opt -O2 on this code:

source_filename = "M2"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux-gnu"

define i64 @f-tgt(i64 %0, i64 %1) {
f:
  %X0_1 = freeze i64 %0
  %X0_2 = trunc i64 %X0_1 to i1
  %X0_3 = zext i1 %X0_2 to i32
  %X0_4 = zext i32 %X0_3 to i64
  %X1_1 = freeze i64 %1
  %X1_2 = trunc i64 %X1_1 to i1
  %X1_3 = zext i1 %X1_2 to i32
  %X1_4 = zext i32 %X1_3 to i64
  %X2_3 = or i64 poison, 0
  %X2_4 = freeze i64 %X2_3
  %X3_3 = or i64 poison, 0
  %X3_4 = freeze i64 %X3_3
  %X4_3 = or i64 poison, 0
  %X4_4 = freeze i64 %X4_3
  %X5_3 = or i64 poison, 0
  %X5_4 = freeze i64 %X5_3
  %X6_3 = or i64 poison, 0
  %X6_4 = freeze i64 %X6_3
  %X7_3 = or i64 poison, 0
  %X7_4 = freeze i64 %X7_3
  %X8_3 = or i64 poison, 0
  %X8_4 = freeze i64 %X8_3
  %X9_3 = or i64 poison, 0
  %X9_4 = freeze i64 %X9_3
  %X10_3 = or i64 poison, 0
  %X10_4 = freeze i64 %X10_3
  %X11_3 = or i64 poison, 0
  %X11_4 = freeze i64 %X11_3
  %X12_3 = or i64 poison, 0
  %X12_4 = freeze i64 %X12_3
  %X13_3 = or i64 poison, 0
  %X13_4 = freeze i64 %X13_3
  %X14_3 = or i64 poison, 0
  %X14_4 = freeze i64 %X14_3
  %X15_3 = or i64 poison, 0
  %X15_4 = freeze i64 %X15_3
  %X16_3 = or i64 poison, 0
  %X16_4 = freeze i64 %X16_3
  %X17_3 = or i64 poison, 0
  %X17_4 = freeze i64 %X17_3
  %Q0_3 = or i128 poison, 0
  %Q0_4 = freeze i128 %Q0_3
  %Q1_3 = or i128 poison, 0
  %Q1_4 = freeze i128 %Q1_3
  %Q2_3 = or i128 poison, 0
  %Q2_4 = freeze i128 %Q2_3
  %Q3_3 = or i128 poison, 0
  %Q3_4 = freeze i128 %Q3_3
  %X8_3x1x2x0 = trunc i64 %X1_4 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X8_3x1x2x0)
  %X8_3x3x2x0 = zext i32 %X8_3x2x2x0 to i64
  %X8_4x1x3x0 = trunc i64 %X8_3x3x2x0 to i32
  %X8_4x2x3x0 = xor i32 %X8_4x1x3x0, -1
  %X8_4x3x3x0 = or i32 0, %X8_4x2x3x0
  %X8_4x4x3x0 = zext i32 %X8_4x3x3x0 to i64
  %X0_3x1x4x0 = trunc i64 %X8_4x4x3x0 to i32
  %X0_3x2x4x0 = lshr i32 %X0_3x1x4x0, 31
  %X0_3x3x4x0 = trunc i64 %X0_4 to i32
  %X0_3x4x4x0 = and i32 %X0_3x3x4x0, %X0_3x2x4x0
  %X0_3x5x4x0 = zext i32 %X0_3x4x4x0 to i64
  %X0_3x1x5x0 = trunc i64 %X0_3x5x4x0 to i32
  %X0_3x2x5x0 = zext i32 %X0_3x1x5x0 to i64
  ret i64 %X0_3x2x5x0
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.bitreverse.i32(i32) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

The text was updated successfully, but these errors were encountered:

nikic · 2023-01-09T16:09:02Z

Reduced for opt -passes=instcombine:

define i64 @test(i1 %X1_2) {
  %X1_3 = zext i1 %X1_2 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X1_3)
  %X8_4x2x3x0 = xor i32 %X8_3x2x2x0, -1
  %X0_3x2x4x0 = lshr i32 %X8_4x2x3x0, 31
  %X0_3x2x5x0 = zext i32 %X0_3x2x4x0 to i64
  ret i64 %X0_3x2x5x0
}

declare i32 @llvm.bitreverse.i32(i32)

nikic · 2023-01-09T16:31:22Z

Gets transformed into this first:

define i64 @test(i1 %X1_2) {
  %X1_3 = zext i1 %X1_2 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X1_3)
  %isnotneg = icmp sgt i32 %X8_3x2x2x0, -1
  %X0_3x2x4x0 = zext i1 %isnotneg to i32
  %X0_3x2x5x0 = zext i32 %X0_3x2x4x0 to i64
  ret i64 %X0_3x2x5x0
}

Then into:

define i64 @test(i1 %X1_2) {
  %X1_3 = zext i1 %X1_2 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X1_3)
  %isnotneg = icmp eq i32 %X8_3x2x2x0, 0
  %X0_3x2x4x0 = zext i1 %isnotneg to i32
  %X0_3x2x5x0 = zext i32 %X0_3x2x4x0 to i64
  ret i64 %X0_3x2x5x0
}

Then into:

define i64 @test(i1 %X1_2) {
  %X1_3 = zext i1 %X1_2 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X1_3)
  %isnotneg = icmp eq i32 %X8_3x2x2x0, 0
  %X0_3x2x5x0 = zext i1 %isnotneg to i64
  ret i64 %X0_3x2x5x0
}

And then:

define i64 @test(i1 %X1_2) {
  %X1_3 = zext i1 %X1_2 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X1_3)
  %X8_3x2x2x0.lobit = lshr i32 %X8_3x2x2x0, 31
  %1 = xor i32 %X8_3x2x2x0.lobit, 1
  %X0_3x2x5x0 = zext i32 %1 to i64
  ret i64 %X0_3x2x5x0
}

And then we're back where we started:

define i64 @test(i1 %X1_2) {
  %X1_3 = zext i1 %X1_2 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X1_3)
  %1 = xor i32 %X8_3x2x2x0, -1
  %2 = lshr i32 %1, 31
  %X0_3x2x5x0 = zext i32 %2 to i64
  ret i64 %X0_3x2x5x0
}

nikic · 2023-01-09T16:32:31Z

Without having looked into this too closely, might be related to 21d3871 by @rotateright?

rotateright · 2023-01-09T16:49:11Z

Without having looked into this too closely, might be related to 21d3871 by @rotateright?

Yes, or at least that exposed some other missing/flawed fold. I had a feeling we'd see more inf-loops than only the ones I found before applying that patch. Taking a look now.

https://alive2.llvm.org/ce/z/ZXCtgi This breaks the infinite combine loop for issue #59897, but we may still need more changes to avoid those loops.

rotateright · 2023-01-09T18:00:55Z

The inf-loop for this example should be fixed now, but the more general problem is in InstCombinerImpl::transformZExtICmp(). It can create 3 instructions from 2 as shown in the IR sequence above, so we need to limit it.

In the changed tests, we avoid creating extra instructions, and there are no obvious regressions in IR tests at least. Codegen should be able to create the shift+mask form if that is profitable. This is a more general fix for issue #59897 than 0eedc9e .

rotateright · 2023-01-09T21:45:51Z

We're avoiding the inf-loop in this example 2 different ways now, so I think it's safe to close.
The code in transformZExtICmp() is still potentially buggy (and has been for a very long time). The FIXME comment above that block remains. I'll add more tests for those patterns and try to limit other inf-loop potential.

regehr added llvm:instcombine llvm:optimizations slow-compile labels Jan 9, 2023

nikic added llvm:hang Compiler hang (infinite loop) and removed slow-compile labels Jan 9, 2023

rotateright self-assigned this Jan 9, 2023

rotateright added a commit that referenced this issue Jan 9, 2023

[InstCombine] bitrev (zext i1 X) --> select X, SMinC, 0

0eedc9e

https://alive2.llvm.org/ce/z/ZXCtgi This breaks the infinite combine loop for issue #59897, but we may still need more changes to avoid those loops.

EugeneZelenko removed the llvm:optimizations label Jan 9, 2023

rotateright closed this as completed Jan 9, 2023

vfdff mentioned this issue Oct 8, 2023

[AArch64] Combine cmp + cset into eor #68465

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

apparent infinite loop in InstCombine #59897

apparent infinite loop in InstCombine #59897

regehr commented Jan 9, 2023

nikic commented Jan 9, 2023 •

edited

Loading

nikic commented Jan 9, 2023

nikic commented Jan 9, 2023

rotateright commented Jan 9, 2023

rotateright commented Jan 9, 2023

rotateright commented Jan 9, 2023

apparent infinite loop in InstCombine #59897

apparent infinite loop in InstCombine #59897

Comments

regehr commented Jan 9, 2023

nikic commented Jan 9, 2023 • edited Loading

nikic commented Jan 9, 2023

nikic commented Jan 9, 2023

rotateright commented Jan 9, 2023

rotateright commented Jan 9, 2023

rotateright commented Jan 9, 2023

nikic commented Jan 9, 2023 •

edited

Loading