LoopVectorizer: incorrect FP operation reordering

Repro: https://godbolt.org/z/dbhsnTWG9

LLVM should respect overflow and precision effects for floating point. For example, in (good) test above
```
define float @test_single(float %a, float %b) {
    %sum = fadd float %a, %b
    %res = fsub float %sum, %a
    ret float %res
}

```
does not get instcombined into `ret float %b`, because (a + b) could go up to infinity, and result would be infinity or NaN (depending on what `%a` is). And this behavior should be preserved.

However, Loop Vectorizer seems to ignore this semantics.
`opt -passes=loop-vectorize -force-vector-width=2`
on test
```
define float @test_vector(ptr %pa, ptr %pb, i32 %length) {
entry:
  %should_execute = icmp ne i32 %length, 0
  br i1 %should_execute, label %loop, label %empty

loop:
  %iv = phi i32 [0, %entry], [%iv.next, %loop]
  %sum = phi float [0.0, %entry], [%sum.next, %loop]
  %a.gep = getelementptr float, ptr %pa, i32 %iv
  %b.gep = getelementptr float, ptr %pb, i32 %iv
  %a = load float, ptr %a.gep, align 4
  %b = load float, ptr %b.gep, align 4
  %mul = fmul float %a, %b
  %sum.next = fsub float %sum, %mul
  %iv.next = add nuw nsw i32 %iv, 1
  %loop.cond = icmp ult i32 %iv.next, %length
  br i1 %loop.cond, label %loop, label %done

done:
  ret float %sum.next

empty:
  ret float 0.0
}
```

leads to classical 2-accumulator + add reduce vectorization:
```
define float @test_vector(ptr %pa, ptr %pb, i32 %length) {
entry:
  %should_execute = icmp ne i32 %length, 0
  br i1 %should_execute, label %loop.preheader, label %empty

loop.preheader:
  %min.iters.check = icmp ult i32 %length, 2
  br i1 %min.iters.check, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i32 %length, 2
  %n.vec = sub i32 %length, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %vec.phi = phi <2 x float> [ <float 0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %3, %vector.body ]
  %0 = getelementptr float, ptr %pa, i32 %index
  %1 = getelementptr float, ptr %pb, i32 %index
  %wide.load = load <2 x float>, ptr %0, align 4
  %wide.load1 = load <2 x float>, ptr %1, align 4
  %2 = fmul <2 x float> %wide.load, %wide.load1
  %3 = fsub <2 x float> %vec.phi, %2
  %index.next = add nuw i32 %index, 2
  %4 = icmp eq i32 %index.next, %n.vec
  br i1 %4, label %middle.block, label %vector.body

middle.block:
  %5 = call float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> %3)
  %cmp.n = icmp eq i32 %length, %n.vec
  br i1 %cmp.n, label %done, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %loop.preheader ]
  %bc.merge.rdx = phi float [ %5, %middle.block ], [ 0.000000e+00, %loop.preheader ]
  br label %loop

loop:
  %iv = phi i32 [ %iv.next, %loop ], [ %bc.resume.val, %scalar.ph ]
  %sum = phi float [ %sum.next, %loop ], [ %bc.merge.rdx, %scalar.ph ]
  %a.gep = getelementptr float, ptr %pa, i32 %iv
  %b.gep = getelementptr float, ptr %pb, i32 %iv
  %a = load float, ptr %a.gep, align 4
  %b = load float, ptr %b.gep, align 4
  %mul = fmul float %a, %b
  %sum.next = fsub float %sum, %mul
  %iv.next = add nuw nsw i32 %iv, 1
  %loop.cond = icmp ult i32 %iv.next, %length
  br i1 %loop.cond, label %loop, label %done

done:
  %sum.next.lcssa = phi float [ %sum.next, %loop ], [ %5, %middle.block ]
  ret float %sum.next.lcssa

empty:
  ret float 0.000000e+00
}

declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```

This is downright broken in multiple ways. For example, given some huge `x`, array `a` is `{x, x, x, ..., x}` and array `b` is `{1, -1, 1, -1, ... 1, -1}`.

Sum of even elements might go to infinity, sum of odd element goes to negative infinity, and the result would be NaN, while the original answer was `0`.

With other input data, it could as well lead to problems with precision.

Seems that Loop Vectorizer doesn't respect this semantics.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

LoopVectorizer: incorrect FP operation reordering #169289

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

LoopVectorizer: incorrect FP operation reordering #169289

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions