From 4fa00c25076c34d7b6ddad9b918c6cd64ae783c3 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 11 Jul 2023 00:51:39 -0700 Subject: [PATCH 1/3] Explain SIGSEGV backtrace handler ...to both compiler contributors and other rustc invokers. The previous error messaging was extremely unfriendly, and potentially both confusing and alarming. The entire modules is extracted into a new file to help ease of reading, and plenty of comments get layered in. The design of the messaging is focused on preventing the overall purpose of the output from being too opaque in common cases, so users understand why it is there. There's not an immediate need for it being actionable, but some suggestions are offered anyways. --- compiler/rustc_driver_impl/src/lib.rs | 75 +++--------------- .../rustc_driver_impl/src/signal_handler.rs | 76 +++++++++++++++++++ 2 files changed, 85 insertions(+), 66 deletions(-) create mode 100644 compiler/rustc_driver_impl/src/signal_handler.rs diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index 2e26ce111f01d..026dfd8ee905a 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -78,6 +78,15 @@ pub mod pretty; #[macro_use] mod print; mod session_diagnostics; +#[cfg(all(unix, any(target_env = "gnu", target_os = "macos")))] +mod signal_handler; + +#[cfg(not(all(unix, any(target_env = "gnu", target_os = "macos"))))] +mod signal_handler { + /// On platforms which don't support our signal handler's requirements, + /// simply use the default signal handler provided by std. + pub(super) fn install() {} +} use crate::session_diagnostics::{ RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch, @@ -1427,72 +1436,6 @@ pub fn init_env_logger(handler: &EarlyErrorHandler, env: &str) { } } -#[cfg(all(unix, any(target_env = "gnu", target_os = "macos")))] -mod signal_handler { - extern "C" { - fn backtrace_symbols_fd( - buffer: *const *mut libc::c_void, - size: libc::c_int, - fd: libc::c_int, - ); - } - - extern "C" fn print_stack_trace(_: libc::c_int) { - const MAX_FRAMES: usize = 256; - static mut STACK_TRACE: [*mut libc::c_void; MAX_FRAMES] = - [std::ptr::null_mut(); MAX_FRAMES]; - unsafe { - let depth = libc::backtrace(STACK_TRACE.as_mut_ptr(), MAX_FRAMES as i32); - if depth == 0 { - return; - } - backtrace_symbols_fd(STACK_TRACE.as_ptr(), depth, 2); - } - } - - /// When an error signal (such as SIGABRT or SIGSEGV) is delivered to the - /// process, print a stack trace and then exit. - pub(super) fn install() { - use std::alloc::{alloc, Layout}; - - unsafe { - let alt_stack_size: usize = min_sigstack_size() + 64 * 1024; - let mut alt_stack: libc::stack_t = std::mem::zeroed(); - alt_stack.ss_sp = alloc(Layout::from_size_align(alt_stack_size, 1).unwrap()).cast(); - alt_stack.ss_size = alt_stack_size; - libc::sigaltstack(&alt_stack, std::ptr::null_mut()); - - let mut sa: libc::sigaction = std::mem::zeroed(); - sa.sa_sigaction = print_stack_trace as libc::sighandler_t; - sa.sa_flags = libc::SA_NODEFER | libc::SA_RESETHAND | libc::SA_ONSTACK; - libc::sigemptyset(&mut sa.sa_mask); - libc::sigaction(libc::SIGSEGV, &sa, std::ptr::null_mut()); - } - } - - /// Modern kernels on modern hardware can have dynamic signal stack sizes. - #[cfg(any(target_os = "linux", target_os = "android"))] - fn min_sigstack_size() -> usize { - const AT_MINSIGSTKSZ: core::ffi::c_ulong = 51; - let dynamic_sigstksz = unsafe { libc::getauxval(AT_MINSIGSTKSZ) }; - // If getauxval couldn't find the entry, it returns 0, - // so take the higher of the "constant" and auxval. - // This transparently supports older kernels which don't provide AT_MINSIGSTKSZ - libc::MINSIGSTKSZ.max(dynamic_sigstksz as _) - } - - /// Not all OS support hardware where this is needed. - #[cfg(not(any(target_os = "linux", target_os = "android")))] - fn min_sigstack_size() -> usize { - libc::MINSIGSTKSZ - } -} - -#[cfg(not(all(unix, any(target_env = "gnu", target_os = "macos"))))] -mod signal_handler { - pub(super) fn install() {} -} - pub fn main() -> ! { let start_time = Instant::now(); let start_rss = get_resident_set_size(); diff --git a/compiler/rustc_driver_impl/src/signal_handler.rs b/compiler/rustc_driver_impl/src/signal_handler.rs new file mode 100644 index 0000000000000..1bf5c5af866e7 --- /dev/null +++ b/compiler/rustc_driver_impl/src/signal_handler.rs @@ -0,0 +1,76 @@ +//! Signal handler for rustc +//! Primarily used to extract a backtrace from stack overflow + +use std::alloc::{alloc, Layout}; +use std::{mem, ptr}; + +extern "C" { + fn backtrace_symbols_fd(buffer: *const *mut libc::c_void, size: libc::c_int, fd: libc::c_int); +} + +/// Signal handler installed for SIGSEGV +extern "C" fn print_stack_trace(_: libc::c_int) { + const MAX_FRAMES: usize = 256; + // Reserve data segment so we don't have to malloc in a signal handler, which might fail + // in incredibly undesirable and unexpected ways due to e.g. the allocator deadlocking + static mut STACK_TRACE: [*mut libc::c_void; MAX_FRAMES] = [ptr::null_mut(); MAX_FRAMES]; + unsafe { + // Collect return addresses + let depth = libc::backtrace(STACK_TRACE.as_mut_ptr(), MAX_FRAMES as i32); + if depth == 0 { + return; + } + // Just a stack trace is cryptic. Explain what we're doing. + write_raw_err("error: rustc interrupted by SIGSEGV, printing stack trace:\n\n"); + // Elaborate return addrs into symbols and write them directly to stderr + backtrace_symbols_fd(STACK_TRACE.as_ptr(), depth, libc::STDERR_FILENO); + if depth > 22 { + // We probably just scrolled that "we got SIGSEGV" message off the terminal + write_raw_err("\nerror: stack trace dumped due to SIGSEGV, possible stack overflow"); + }; + write_raw_err("\nerror: please report a bug to https://github.com/rust-lang/rust\n"); + } +} + +/// Write without locking stderr. +/// +/// Only acceptable because everything will end soon anyways. +fn write_raw_err(input: &str) { + // We do not care how many bytes we actually get out. SIGSEGV comes for our head. + // Splash stderr with letters of our own blood to warn our friends about the monster. + let _ = unsafe { libc::write(libc::STDERR_FILENO, input.as_ptr().cast(), input.len()) }; +} + +/// When SIGSEGV is delivered to the process, print a stack trace and then exit. +pub(super) fn install() { + unsafe { + let alt_stack_size: usize = min_sigstack_size() + 64 * 1024; + let mut alt_stack: libc::stack_t = mem::zeroed(); + alt_stack.ss_sp = alloc(Layout::from_size_align(alt_stack_size, 1).unwrap()).cast(); + alt_stack.ss_size = alt_stack_size; + libc::sigaltstack(&alt_stack, ptr::null_mut()); + + let mut sa: libc::sigaction = mem::zeroed(); + sa.sa_sigaction = print_stack_trace as libc::sighandler_t; + sa.sa_flags = libc::SA_NODEFER | libc::SA_RESETHAND | libc::SA_ONSTACK; + libc::sigemptyset(&mut sa.sa_mask); + libc::sigaction(libc::SIGSEGV, &sa, ptr::null_mut()); + } +} + +/// Modern kernels on modern hardware can have dynamic signal stack sizes. +#[cfg(any(target_os = "linux", target_os = "android"))] +fn min_sigstack_size() -> usize { + const AT_MINSIGSTKSZ: core::ffi::c_ulong = 51; + let dynamic_sigstksz = unsafe { libc::getauxval(AT_MINSIGSTKSZ) }; + // If getauxval couldn't find the entry, it returns 0, + // so take the higher of the "constant" and auxval. + // This transparently supports older kernels which don't provide AT_MINSIGSTKSZ + libc::MINSIGSTKSZ.max(dynamic_sigstksz as _) +} + +/// Not all OS support hardware where this is needed. +#[cfg(not(any(target_os = "linux", target_os = "android")))] +fn min_sigstack_size() -> usize { + libc::MINSIGSTKSZ +} From 1e65b5b741719cc4a7bee80fc4c5299a95f8d631 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 12 Jul 2023 23:08:56 -0700 Subject: [PATCH 2/3] Add recursion detection to signal-safe backtrace This cleans up the formatting of the backtrace considerably, dodging the fact that a backtrace with recursion normally emits hundreds of lines. Instead, simply write repeated symbols, and add how many times the recursion occurred. Also, it makes it look more like the "normal" backtrace. Some fine details in the code are important for reducing the amount of possible panic branches. --- .../rustc_driver_impl/src/signal_handler.rs | 101 ++++++++++++++---- 1 file changed, 83 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_driver_impl/src/signal_handler.rs b/compiler/rustc_driver_impl/src/signal_handler.rs index 1bf5c5af866e7..5971562627cdc 100644 --- a/compiler/rustc_driver_impl/src/signal_handler.rs +++ b/compiler/rustc_driver_impl/src/signal_handler.rs @@ -2,43 +2,108 @@ //! Primarily used to extract a backtrace from stack overflow use std::alloc::{alloc, Layout}; -use std::{mem, ptr}; +use std::{fmt, mem, ptr}; extern "C" { fn backtrace_symbols_fd(buffer: *const *mut libc::c_void, size: libc::c_int, fd: libc::c_int); } +fn backtrace_stderr(buffer: &[*mut libc::c_void]) { + let size = buffer.len().try_into().unwrap_or_default(); + unsafe { backtrace_symbols_fd(buffer.as_ptr(), size, libc::STDERR_FILENO) }; +} + +/// Unbuffered, unsynchronized writer to stderr. +/// +/// Only acceptable because everything will end soon anyways. +struct RawStderr(()); + +impl fmt::Write for RawStderr { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + let ret = unsafe { libc::write(libc::STDERR_FILENO, s.as_ptr().cast(), s.len()) }; + if ret == -1 { Err(fmt::Error) } else { Ok(()) } + } +} + +/// We don't really care how many bytes we actually get out. SIGSEGV comes for our head. +/// Splash stderr with letters of our own blood to warn our friends about the monster. +macro raw_errln($tokens:tt) { + let _ = ::core::fmt::Write::write_fmt(&mut RawStderr(()), format_args!($tokens)); + let _ = ::core::fmt::Write::write_char(&mut RawStderr(()), '\n'); +} + /// Signal handler installed for SIGSEGV extern "C" fn print_stack_trace(_: libc::c_int) { const MAX_FRAMES: usize = 256; // Reserve data segment so we don't have to malloc in a signal handler, which might fail // in incredibly undesirable and unexpected ways due to e.g. the allocator deadlocking static mut STACK_TRACE: [*mut libc::c_void; MAX_FRAMES] = [ptr::null_mut(); MAX_FRAMES]; - unsafe { + let stack = unsafe { // Collect return addresses let depth = libc::backtrace(STACK_TRACE.as_mut_ptr(), MAX_FRAMES as i32); if depth == 0 { return; } - // Just a stack trace is cryptic. Explain what we're doing. - write_raw_err("error: rustc interrupted by SIGSEGV, printing stack trace:\n\n"); - // Elaborate return addrs into symbols and write them directly to stderr - backtrace_symbols_fd(STACK_TRACE.as_ptr(), depth, libc::STDERR_FILENO); - if depth > 22 { - // We probably just scrolled that "we got SIGSEGV" message off the terminal - write_raw_err("\nerror: stack trace dumped due to SIGSEGV, possible stack overflow"); + &STACK_TRACE.as_slice()[0..(depth as _)] + }; + + // Just a stack trace is cryptic. Explain what we're doing. + raw_errln!("error: rustc interrupted by SIGSEGV, printing backtrace\n"); + let mut written = 1; + let mut consumed = 0; + // Begin elaborating return addrs into symbols and writing them directly to stderr + // Most backtraces are stack overflow, most stack overflows are from recursion + // Check for cycles before writing 250 lines of the same ~5 symbols + let cycled = |(runner, walker)| runner == walker; + let mut cyclic = false; + if let Some(period) = stack.iter().skip(1).step_by(2).zip(stack).position(cycled) { + let period = period.saturating_add(1); // avoid "what if wrapped?" branches + let Some(offset) = stack.iter().skip(period).zip(stack).position(cycled) else { + // impossible. + return; + }; + + // Count matching trace slices, else we could miscount "biphasic cycles" + // with the same period + loop entry but a different inner loop + let next_cycle = stack[offset..].chunks_exact(period).skip(1); + let cycles = 1 + next_cycle + .zip(stack[offset..].chunks_exact(period)) + .filter(|(next, prev)| next == prev) + .count(); + backtrace_stderr(&stack[..offset]); + written += offset; + consumed += offset; + if cycles > 1 { + raw_errln!("\n### cycle encountered after {offset} frames with period {period}"); + backtrace_stderr(&stack[consumed..consumed + period]); + raw_errln!("### recursed {cycles} times\n"); + written += period + 4; + consumed += period * cycles; + cyclic = true; }; - write_raw_err("\nerror: please report a bug to https://github.com/rust-lang/rust\n"); } -} + let rem = &stack[consumed..]; + backtrace_stderr(rem); + raw_errln!(""); + written += rem.len() + 1; -/// Write without locking stderr. -/// -/// Only acceptable because everything will end soon anyways. -fn write_raw_err(input: &str) { - // We do not care how many bytes we actually get out. SIGSEGV comes for our head. - // Splash stderr with letters of our own blood to warn our friends about the monster. - let _ = unsafe { libc::write(libc::STDERR_FILENO, input.as_ptr().cast(), input.len()) }; + if cyclic || stack.len() == 256 { + // technically speculation, but assert it with confidence anyway. + // rustc only arrived in this signal handler because bad things happened + // and this message is for explaining it's not the programmer's fault + raw_errln!("note: rustc unexpectedly overflowed its stack! this is a bug"); + written += 1; + } + if stack.len() == 256 { + raw_errln!("note: maximum backtrace depth reached, frames may have been lost"); + written += 1; + } + raw_errln!("note: we would appreciate a report at https://github.com/rust-lang/rust"); + written += 1; + if written > 24 { + // We probably just scrolled the earlier "we got SIGSEGV" message off the terminal + raw_errln!("note: backtrace dumped due to SIGSEGV! resuming signal"); + }; } /// When SIGSEGV is delivered to the process, print a stack trace and then exit. From 3dee9775a8c94e701a08f7b2df2c444f353d8699 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Sat, 15 Jul 2023 01:07:14 -0700 Subject: [PATCH 3/3] Clarify arbitrary constants First, we reuse the `MAX_FRAMES` constant. Co-authored-by: erikdesjardins Then we choose an arbitrary recursion depth for the other case. In this case, I used 2d20. Honest. --- compiler/rustc_driver_impl/src/signal_handler.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_driver_impl/src/signal_handler.rs b/compiler/rustc_driver_impl/src/signal_handler.rs index 5971562627cdc..deca108222150 100644 --- a/compiler/rustc_driver_impl/src/signal_handler.rs +++ b/compiler/rustc_driver_impl/src/signal_handler.rs @@ -87,14 +87,15 @@ extern "C" fn print_stack_trace(_: libc::c_int) { raw_errln!(""); written += rem.len() + 1; - if cyclic || stack.len() == 256 { + let random_depth = || 8 * 16; // chosen by random diceroll (2d20) + if cyclic || stack.len() > random_depth() { // technically speculation, but assert it with confidence anyway. // rustc only arrived in this signal handler because bad things happened // and this message is for explaining it's not the programmer's fault raw_errln!("note: rustc unexpectedly overflowed its stack! this is a bug"); written += 1; } - if stack.len() == 256 { + if stack.len() == MAX_FRAMES { raw_errln!("note: maximum backtrace depth reached, frames may have been lost"); written += 1; }