Closed
Description
Found investigating rust-lang/rust#133984
Given this IR:
define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
%7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
ret i8 %7
}
define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
%_3.i4.i = tail call noundef i8 @rust_i16_partial_ord(i16 %0, i16 %2)
switch i8 %_3.i4.i, label %bb4.i [
i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
i8 0, label %bb5.i
]
bb5.i: ; preds = %start
%_0.i.i = icmp ult i16 %1, %3
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
bb4.i: ; preds = %start
%4 = icmp slt i16 %0, %2
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %start, %bb5.i, %bb4.i
%_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %4, %bb4.i ], [ false, %start ]
ret i1 %_0.sroa.0.0.i
}
Today https://llvm.godbolt.org/z/Wj8cPnK3n it optimizes to
define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
%4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
switch i8 %4, label %bb4.i [
i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
i8 0, label %bb5.i
]
bb5.i: ; preds = %start
%_0.i.i = icmp ult i16 %1, %3
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
bb4.i: ; preds = %start
%5 = icmp slt i16 %0, %2
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %bb4.i, %bb5.i, %start
%_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %5, %bb4.i ], [ false, %start ]
ret i1 %_0.sroa.0.0.i
}
Note, in particular, the inlined intrinsic call at the beginning:
start:
%4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
That's an overly-broad range for scmp
, which is always in [-1, 2).
As a result, follow-up passes don't optimize away that dead i8 2
arm of the switch
-- I guess they trust that range and don't look at what they know scmp
actually returns.
So it would be good if either:
- inlining was smarter about the output range it put on a known intrinsic like this, or
- some pre-inlining optimization put the correct output range on
scmp
, as
define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
- %7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
+ %7 = tail call noundef range(i8 -1, 2) i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
ret i8 %7
}
also fixes the problem (https://llvm.godbolt.org/z/h7a55WKxK).