Skip to content

Missing range analysis for llvm.ucmp and llvm.scmp intrinsics #130179

Closed
@scottmcm

Description

@scottmcm

Found investigating rust-lang/rust#133984

Given this IR:

define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
  %7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
  ret i8 %7
}

define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
  %_3.i4.i = tail call noundef i8 @rust_i16_partial_ord(i16 %0, i16 %2)
  switch i8 %_3.i4.i, label %bb4.i [
    i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
    i8 0, label %bb5.i
  ]

bb5.i:                                            ; preds = %start
  %_0.i.i = icmp ult i16 %1, %3
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

bb4.i:                                            ; preds = %start
  %4 = icmp slt i16 %0, %2
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %start, %bb5.i, %bb4.i
  %_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %4, %bb4.i ], [ false, %start ]
  ret i1 %_0.sroa.0.0.i
}

Today https://llvm.godbolt.org/z/Wj8cPnK3n it optimizes to

define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
  %4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
  switch i8 %4, label %bb4.i [
    i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
    i8 0, label %bb5.i
  ]

bb5.i:                                            ; preds = %start
  %_0.i.i = icmp ult i16 %1, %3
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

bb4.i:                                            ; preds = %start
  %5 = icmp slt i16 %0, %2
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %bb4.i, %bb5.i, %start
  %_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %5, %bb4.i ], [ false, %start ]
  ret i1 %_0.sroa.0.0.i
}

Note, in particular, the inlined intrinsic call at the beginning:

start:
  %4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)

That's an overly-broad range for scmp, which is always in [-1, 2).

As a result, follow-up passes don't optimize away that dead i8 2 arm of the switch -- I guess they trust that range and don't look at what they know scmp actually returns.

So it would be good if either:

  1. inlining was smarter about the output range it put on a known intrinsic like this, or
  2. some pre-inlining optimization put the correct output range on scmp, as
 define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
-  %7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
+  %7 = tail call noundef range(i8 -1, 2) i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
   ret i8 %7
 }

also fixes the problem (https://llvm.godbolt.org/z/h7a55WKxK).

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions