Skip to content

Commit 0854991

Browse files
committed
De-LLVM the unchecked shifts [MCP#693]
This is just one part of the MCP, but it's the one that IMHO removes the most noise from the standard library code. Seems net simpler this way, since MIR already supported heterogeneous shifts anyway, and thus it's not more work for backends than before.
1 parent 69fa40c commit 0854991

File tree

30 files changed

+263
-561
lines changed

30 files changed

+263
-561
lines changed

compiler/rustc_codegen_ssa/src/base.rs

+28-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::back::write::{
55
compute_per_cgu_lto_type, start_async_codegen, submit_codegened_module_to_llvm,
66
submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm, ComputedLtoType, OngoingCodegen,
77
};
8-
use crate::common::{IntPredicate, RealPredicate, TypeKind};
8+
use crate::common::{self, IntPredicate, RealPredicate, TypeKind};
99
use crate::errors;
1010
use crate::meth;
1111
use crate::mir;
@@ -33,7 +33,7 @@ use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder, MonoItem};
3333
use rustc_middle::query::Providers;
3434
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout};
3535
use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
36-
use rustc_session::config::{self, CrateType, EntryFnType, OutputType};
36+
use rustc_session::config::{self, CrateType, EntryFnType, OptLevel, OutputType};
3737
use rustc_session::Session;
3838
use rustc_span::symbol::sym;
3939
use rustc_span::Symbol;
@@ -300,14 +300,32 @@ pub fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
300300
}
301301
}
302302

303-
pub fn cast_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
303+
/// Shifts in MIR are all allowed to have mismatched LHS & RHS types.
304+
///
305+
/// This does all the appropriate conversions needed to pass it to the builder's
306+
/// shift methods, which are UB for out-of-range shifts.
307+
///
308+
/// If `is_unchecked` is false, this masks the RHS to ensure it stays in-bounds.
309+
/// For 32- and 64-bit types, this matches the semantics
310+
/// of Java. (See related discussion on #1877 and #10183.)
311+
///
312+
/// If `is_unchecked` is true, this does no masking, and adds sufficient `assume`
313+
/// calls or operation flags to preserve as much freedom to optimize as possible.
314+
pub fn build_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
304315
bx: &mut Bx,
305316
lhs: Bx::Value,
306-
rhs: Bx::Value,
317+
mut rhs: Bx::Value,
318+
is_unchecked: bool,
307319
) -> Bx::Value {
308320
// Shifts may have any size int on the rhs
309321
let mut rhs_llty = bx.cx().val_ty(rhs);
310322
let mut lhs_llty = bx.cx().val_ty(lhs);
323+
324+
let mask = common::shift_mask_val(bx, lhs_llty, rhs_llty, false);
325+
if !is_unchecked {
326+
rhs = bx.and(rhs, mask);
327+
}
328+
311329
if bx.cx().type_kind(rhs_llty) == TypeKind::Vector {
312330
rhs_llty = bx.cx().element_type(rhs_llty)
313331
}
@@ -317,6 +335,12 @@ pub fn cast_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
317335
let rhs_sz = bx.cx().int_width(rhs_llty);
318336
let lhs_sz = bx.cx().int_width(lhs_llty);
319337
if lhs_sz < rhs_sz {
338+
if is_unchecked && bx.sess().opts.optimize != OptLevel::No {
339+
// FIXME: Use `trunc nuw` once that's available
340+
let inrange = bx.icmp(IntPredicate::IntULE, rhs, mask);
341+
bx.assume(inrange);
342+
}
343+
320344
bx.trunc(rhs, lhs_llty)
321345
} else if lhs_sz > rhs_sz {
322346
// We zero-extend even if the RHS is signed. So e.g. `(x: i32) << -1i8` will zero-extend the

compiler/rustc_codegen_ssa/src/common.rs

+1-40
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
use rustc_hir::LangItem;
44
use rustc_middle::mir;
55
use rustc_middle::ty::Instance;
6-
use rustc_middle::ty::{self, layout::TyAndLayout, Ty, TyCtxt};
6+
use rustc_middle::ty::{self, layout::TyAndLayout, TyCtxt};
77
use rustc_span::Span;
88

9-
use crate::base;
109
use crate::traits::*;
1110

1211
#[derive(Copy, Clone)]
@@ -128,44 +127,6 @@ pub fn build_langcall<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
128127
(bx.fn_abi_of_instance(instance, ty::List::empty()), bx.get_fn_addr(instance), instance)
129128
}
130129

131-
// To avoid UB from LLVM, these two functions mask RHS with an
132-
// appropriate mask unconditionally (i.e., the fallback behavior for
133-
// all shifts). For 32- and 64-bit types, this matches the semantics
134-
// of Java. (See related discussion on #1877 and #10183.)
135-
136-
pub fn build_masked_lshift<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
137-
bx: &mut Bx,
138-
lhs: Bx::Value,
139-
rhs: Bx::Value,
140-
) -> Bx::Value {
141-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
142-
// #1877, #10183: Ensure that input is always valid
143-
let rhs = shift_mask_rhs(bx, rhs);
144-
bx.shl(lhs, rhs)
145-
}
146-
147-
pub fn build_masked_rshift<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
148-
bx: &mut Bx,
149-
lhs_t: Ty<'tcx>,
150-
lhs: Bx::Value,
151-
rhs: Bx::Value,
152-
) -> Bx::Value {
153-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
154-
// #1877, #10183: Ensure that input is always valid
155-
let rhs = shift_mask_rhs(bx, rhs);
156-
let is_signed = lhs_t.is_signed();
157-
if is_signed { bx.ashr(lhs, rhs) } else { bx.lshr(lhs, rhs) }
158-
}
159-
160-
fn shift_mask_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
161-
bx: &mut Bx,
162-
rhs: Bx::Value,
163-
) -> Bx::Value {
164-
let rhs_llty = bx.val_ty(rhs);
165-
let shift_val = shift_mask_val(bx, rhs_llty, rhs_llty, false);
166-
bx.and(rhs, shift_val)
167-
}
168-
169130
pub fn shift_mask_val<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
170131
bx: &mut Bx,
171132
llty: Bx::Type,

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

+5-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use super::place::PlaceRef;
33
use super::{FunctionCx, LocalRef};
44

55
use crate::base;
6-
use crate::common::{self, IntPredicate};
6+
use crate::common::IntPredicate;
77
use crate::traits::*;
88
use crate::MemFlags;
99

@@ -860,14 +860,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
860860
bx.inbounds_gep(llty, lhs, &[rhs])
861861
}
862862
}
863-
mir::BinOp::Shl => common::build_masked_lshift(bx, lhs, rhs),
864-
mir::BinOp::ShlUnchecked => {
865-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
863+
mir::BinOp::Shl | mir::BinOp::ShlUnchecked => {
864+
let rhs = base::build_shift_expr_rhs(bx, lhs, rhs, op == mir::BinOp::ShlUnchecked);
866865
bx.shl(lhs, rhs)
867866
}
868-
mir::BinOp::Shr => common::build_masked_rshift(bx, input_ty, lhs, rhs),
869-
mir::BinOp::ShrUnchecked => {
870-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
867+
mir::BinOp::Shr | mir::BinOp::ShrUnchecked => {
868+
let rhs = base::build_shift_expr_rhs(bx, lhs, rhs, op == mir::BinOp::ShrUnchecked);
871869
if is_signed { bx.ashr(lhs, rhs) } else { bx.lshr(lhs, rhs) }
872870
}
873871
mir::BinOp::Ne

compiler/rustc_hir_analysis/src/check/intrinsic.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -454,9 +454,10 @@ pub fn check_intrinsic_type(
454454
sym::unchecked_div | sym::unchecked_rem | sym::exact_div => {
455455
(1, 0, vec![param(0), param(0)], param(0))
456456
}
457-
sym::unchecked_shl | sym::unchecked_shr | sym::rotate_left | sym::rotate_right => {
458-
(1, 0, vec![param(0), param(0)], param(0))
457+
sym::unchecked_shl | sym::unchecked_shr => {
458+
(1, 0, vec![param(0), tcx.types.u32], param(0))
459459
}
460+
sym::rotate_left | sym::rotate_right => (1, 0, vec![param(0), param(0)], param(0)),
460461
sym::unchecked_add | sym::unchecked_sub | sym::unchecked_mul => {
461462
(1, 0, vec![param(0), param(0)], param(0))
462463
}

library/core/src/intrinsics.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -2224,18 +2224,20 @@ extern "rust-intrinsic" {
22242224
/// Safe wrappers for this intrinsic are available on the integer
22252225
/// primitives via the `checked_shl` method. For example,
22262226
/// [`u32::checked_shl`]
2227+
#[cfg(not(bootstrap))]
22272228
#[rustc_const_stable(feature = "const_int_unchecked", since = "1.40.0")]
22282229
#[rustc_nounwind]
2229-
pub fn unchecked_shl<T: Copy>(x: T, y: T) -> T;
2230+
pub fn unchecked_shl<T: Copy>(x: T, y: u32) -> T;
22302231
/// Performs an unchecked right shift, resulting in undefined behavior when
22312232
/// `y < 0` or `y >= N`, where N is the width of T in bits.
22322233
///
22332234
/// Safe wrappers for this intrinsic are available on the integer
22342235
/// primitives via the `checked_shr` method. For example,
22352236
/// [`u32::checked_shr`]
2237+
#[cfg(not(bootstrap))]
22362238
#[rustc_const_stable(feature = "const_int_unchecked", since = "1.40.0")]
22372239
#[rustc_nounwind]
2238-
pub fn unchecked_shr<T: Copy>(x: T, y: T) -> T;
2240+
pub fn unchecked_shr<T: Copy>(x: T, y: u32) -> T;
22392241

22402242
/// Returns the result of an unchecked addition, resulting in
22412243
/// undefined behavior when `x + y > T::MAX` or `x + y < T::MIN`.

library/core/src/num/int_macros.rs

+24-8
Original file line numberDiff line numberDiff line change
@@ -1253,10 +1253,18 @@ macro_rules! int_impl {
12531253
#[inline(always)]
12541254
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
12551255
pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
1256-
// SAFETY: the caller must uphold the safety contract for
1257-
// `unchecked_shl`.
1258-
// Any legal shift amount is losslessly representable in the self type.
1259-
unsafe { intrinsics::unchecked_shl(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1256+
#[cfg(bootstrap)]
1257+
{
1258+
// For bootstrapping, just use built-in primitive shift.
1259+
// panicking is a legal manifestation of UB
1260+
self << rhs
1261+
}
1262+
#[cfg(not(bootstrap))]
1263+
{
1264+
// SAFETY: the caller must uphold the safety contract for
1265+
// `unchecked_shl`.
1266+
unsafe { intrinsics::unchecked_shl(self, rhs) }
1267+
}
12601268
}
12611269

12621270
/// Checked shift right. Computes `self >> rhs`, returning `None` if `rhs` is
@@ -1336,10 +1344,18 @@ macro_rules! int_impl {
13361344
#[inline(always)]
13371345
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
13381346
pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
1339-
// SAFETY: the caller must uphold the safety contract for
1340-
// `unchecked_shr`.
1341-
// Any legal shift amount is losslessly representable in the self type.
1342-
unsafe { intrinsics::unchecked_shr(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1347+
#[cfg(bootstrap)]
1348+
{
1349+
// For bootstrapping, just use built-in primitive shift.
1350+
// panicking is a legal manifestation of UB
1351+
self >> rhs
1352+
}
1353+
#[cfg(not(bootstrap))]
1354+
{
1355+
// SAFETY: the caller must uphold the safety contract for
1356+
// `unchecked_shr`.
1357+
unsafe { intrinsics::unchecked_shr(self, rhs) }
1358+
}
13431359
}
13441360

13451361
/// Checked absolute value. Computes `self.abs()`, returning `None` if

library/core/src/num/mod.rs

-11
Original file line numberDiff line numberDiff line change
@@ -286,17 +286,6 @@ macro_rules! widening_impl {
286286
};
287287
}
288288

289-
macro_rules! conv_rhs_for_unchecked_shift {
290-
($SelfT:ty, $x:expr) => {{
291-
// If the `as` cast will truncate, ensure we still tell the backend
292-
// that the pre-truncation value was also small.
293-
if <$SelfT>::BITS < 32 {
294-
intrinsics::assume($x <= (<$SelfT>::MAX as u32));
295-
}
296-
$x as $SelfT
297-
}};
298-
}
299-
300289
impl i8 {
301290
int_impl! {
302291
Self = i8,

library/core/src/num/uint_macros.rs

+24-8
Original file line numberDiff line numberDiff line change
@@ -1313,10 +1313,18 @@ macro_rules! uint_impl {
13131313
#[inline(always)]
13141314
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
13151315
pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
1316-
// SAFETY: the caller must uphold the safety contract for
1317-
// `unchecked_shl`.
1318-
// Any legal shift amount is losslessly representable in the self type.
1319-
unsafe { intrinsics::unchecked_shl(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1316+
#[cfg(bootstrap)]
1317+
{
1318+
// For bootstrapping, just use built-in primitive shift.
1319+
// panicking is a legal manifestation of UB
1320+
self << rhs
1321+
}
1322+
#[cfg(not(bootstrap))]
1323+
{
1324+
// SAFETY: the caller must uphold the safety contract for
1325+
// `unchecked_shl`.
1326+
unsafe { intrinsics::unchecked_shl(self, rhs) }
1327+
}
13201328
}
13211329

13221330
/// Checked shift right. Computes `self >> rhs`, returning `None`
@@ -1396,10 +1404,18 @@ macro_rules! uint_impl {
13961404
#[inline(always)]
13971405
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
13981406
pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
1399-
// SAFETY: the caller must uphold the safety contract for
1400-
// `unchecked_shr`.
1401-
// Any legal shift amount is losslessly representable in the self type.
1402-
unsafe { intrinsics::unchecked_shr(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1407+
#[cfg(bootstrap)]
1408+
{
1409+
// For bootstrapping, just use built-in primitive shift.
1410+
// panicking is a legal manifestation of UB
1411+
self >> rhs
1412+
}
1413+
#[cfg(not(bootstrap))]
1414+
{
1415+
// SAFETY: the caller must uphold the safety contract for
1416+
// `unchecked_shr`.
1417+
unsafe { intrinsics::unchecked_shr(self, rhs) }
1418+
}
14031419
}
14041420

14051421
/// Checked exponentiation. Computes `self.pow(exp)`, returning `None` if

library/core/src/ptr/mod.rs

+14-4
Original file line numberDiff line numberDiff line change
@@ -1781,9 +1781,19 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
17811781
// FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
17821782
// 1, where the method versions of these operations are not inlined.
17831783
use intrinsics::{
1784-
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl,
1785-
unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
1784+
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_sub,
1785+
wrapping_add, wrapping_mul, wrapping_sub,
17861786
};
1787+
#[cfg(bootstrap)]
1788+
const unsafe fn unchecked_shl(value: usize, shift: u32) -> usize {
1789+
value << shift
1790+
}
1791+
#[cfg(bootstrap)]
1792+
const unsafe fn unchecked_shr(value: usize, shift: u32) -> usize {
1793+
value >> shift
1794+
}
1795+
#[cfg(not(bootstrap))]
1796+
use intrinsics::{unchecked_shl, unchecked_shr};
17871797

17881798
/// Calculate multiplicative modular inverse of `x` modulo `m`.
17891799
///
@@ -1902,8 +1912,8 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
19021912
// SAFETY: a is power-of-two hence non-zero. stride == 0 case is handled above.
19031913
// FIXME(const-hack) replace with min
19041914
let gcdpow = unsafe {
1905-
let x = cttz_nonzero(stride);
1906-
let y = cttz_nonzero(a);
1915+
let x = cttz_nonzero(stride) as u32;
1916+
let y = cttz_nonzero(a) as u32;
19071917
if x < y { x } else { y }
19081918
};
19091919
// SAFETY: gcdpow has an upper-bound that’s at most the number of bits in a usize.

tests/codegen/unchecked_shifts.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub unsafe fn unchecked_shl_unsigned_smaller(a: u16, b: u32) -> u16 {
1919
// This uses -DAG to avoid failing on irrelevant reorderings,
2020
// like emitting the truncation earlier.
2121

22-
// CHECK-DAG: %[[INRANGE:.+]] = icmp ult i32 %b, 65536
22+
// CHECK-DAG: %[[INRANGE:.+]] = icmp ult i32 %b, 16
2323
// CHECK-DAG: tail call void @llvm.assume(i1 %[[INRANGE]])
2424
// CHECK-DAG: %[[TRUNC:.+]] = trunc i32 %b to i16
2525
// CHECK-DAG: shl i16 %a, %[[TRUNC]]
@@ -51,7 +51,7 @@ pub unsafe fn unchecked_shr_signed_smaller(a: i16, b: u32) -> i16 {
5151
// This uses -DAG to avoid failing on irrelevant reorderings,
5252
// like emitting the truncation earlier.
5353

54-
// CHECK-DAG: %[[INRANGE:.+]] = icmp ult i32 %b, 32768
54+
// CHECK-DAG: %[[INRANGE:.+]] = icmp ult i32 %b, 16
5555
// CHECK-DAG: tail call void @llvm.assume(i1 %[[INRANGE]])
5656
// CHECK-DAG: %[[TRUNC:.+]] = trunc i32 %b to i16
5757
// CHECK-DAG: ashr i16 %a, %[[TRUNC]]

tests/mir-opt/inline/unchecked_shifts.rs

+3-16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
//@ compile-flags: -Zmir-opt-level=2 -Zinline-mir
66

7+
// These used to be more interesting when the library had to fix the RHS type.
8+
// After MCP#693, though, that's the backend's probablem, not something in MIR.
9+
710
// EMIT_MIR unchecked_shifts.unchecked_shl_unsigned_smaller.Inline.diff
811
// EMIT_MIR unchecked_shifts.unchecked_shl_unsigned_smaller.PreCodegen.after.mir
912
pub unsafe fn unchecked_shl_unsigned_smaller(a: u16, b: u32) -> u16 {
@@ -12,22 +15,6 @@ pub unsafe fn unchecked_shl_unsigned_smaller(a: u16, b: u32) -> u16 {
1215
a.unchecked_shl(b)
1316
}
1417

15-
// EMIT_MIR unchecked_shifts.unchecked_shr_signed_smaller.Inline.diff
16-
// EMIT_MIR unchecked_shifts.unchecked_shr_signed_smaller.PreCodegen.after.mir
17-
pub unsafe fn unchecked_shr_signed_smaller(a: i16, b: u32) -> i16 {
18-
// CHECK-LABEL: fn unchecked_shr_signed_smaller(
19-
// CHECK: (inlined core::num::<impl i16>::unchecked_shr)
20-
a.unchecked_shr(b)
21-
}
22-
23-
// EMIT_MIR unchecked_shifts.unchecked_shl_unsigned_bigger.Inline.diff
24-
// EMIT_MIR unchecked_shifts.unchecked_shl_unsigned_bigger.PreCodegen.after.mir
25-
pub unsafe fn unchecked_shl_unsigned_bigger(a: u64, b: u32) -> u64 {
26-
// CHECK-LABEL: fn unchecked_shl_unsigned_bigger(
27-
// CHECK: (inlined core::num::<impl u64>::unchecked_shl)
28-
a.unchecked_shl(b)
29-
}
30-
3118
// EMIT_MIR unchecked_shifts.unchecked_shr_signed_bigger.Inline.diff
3219
// EMIT_MIR unchecked_shifts.unchecked_shr_signed_bigger.PreCodegen.after.mir
3320
pub unsafe fn unchecked_shr_signed_bigger(a: i64, b: u32) -> i64 {

0 commit comments

Comments
 (0)