Skip to content

Commit 182fe22

Browse files
authored
fix: Improve behavior for OOM-killed tasks (#11477)
## Summary **Behavioral change:** Previously `ChildExit::KilledExternal`, `ChildExit::Finished(None)`, and `ChildExit::Failed` returned internal errors (`InternalError::ExternalKill`, `InternalError::UnknownChildExit`). Now they're handled as normal task failures with proper exit codes, error logging, and respect for `--continue` flags. Changes: - `KilledExternal` → task failure with exit code 137 (SIGKILL convention) - `Finished(None)` / `Failed` → task failure with exit code 1 - Add OOM detection hints for Windows NT status codes and Unix exit code 137 - Remove `InternalError::ExternalKill` variant This means externally-killed tasks now behave consistently with tasks that exit non-zero rather than causing internal errors.
1 parent 642a487 commit 182fe22

1 file changed

Lines changed: 125 additions & 5 deletions

File tree

  • crates/turborepo-task-executor/src

crates/turborepo-task-executor/src/exec.rs

Lines changed: 125 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,59 @@ use turborepo_ui::{ColorConfig, OutputWriter};
2828

2929
use crate::{TaskAccessProvider, TaskCacheOutput, TaskOutput};
3030

31+
/// Windows NT status codes that indicate out-of-memory conditions.
32+
/// These are the signed i32 representations of the unsigned NT status codes.
33+
#[cfg(windows)]
34+
mod windows_oom {
35+
/// STATUS_NO_MEMORY (0xC0000017) - insufficient memory to complete
36+
/// operation
37+
pub const STATUS_NO_MEMORY: i32 = 0xC0000017_u32 as i32;
38+
/// STATUS_STACK_OVERFLOW (0xC00000FD) - stack overflow, often
39+
/// memory-related
40+
pub const STATUS_STACK_OVERFLOW: i32 = 0xC00000FD_u32 as i32;
41+
/// STATUS_COMMITMENT_LIMIT (0xC000012D) - system committed memory limit
42+
/// reached
43+
pub const STATUS_COMMITMENT_LIMIT: i32 = 0xC000012D_u32 as i32;
44+
45+
/// Check if an exit code indicates an out-of-memory condition on Windows.
46+
pub fn is_oom_exit_code(code: i32) -> bool {
47+
matches!(
48+
code,
49+
STATUS_NO_MEMORY | STATUS_STACK_OVERFLOW | STATUS_COMMITMENT_LIMIT
50+
)
51+
}
52+
53+
/// Get a human-readable description of the Windows OOM exit code.
54+
pub fn oom_description(code: i32) -> &'static str {
55+
match code {
56+
STATUS_NO_MEMORY => "STATUS_NO_MEMORY: insufficient memory",
57+
STATUS_STACK_OVERFLOW => "STATUS_STACK_OVERFLOW: stack overflow",
58+
STATUS_COMMITMENT_LIMIT => "STATUS_COMMITMENT_LIMIT: system memory limit reached",
59+
_ => "unknown memory error",
60+
}
61+
}
62+
}
63+
64+
/// Get a description for an OOM-related exit code, if applicable.
65+
fn oom_description(code: i32) -> Option<&'static str> {
66+
#[cfg(windows)]
67+
{
68+
if windows_oom::is_oom_exit_code(code) {
69+
Some(windows_oom::oom_description(code))
70+
} else {
71+
None
72+
}
73+
}
74+
#[cfg(not(windows))]
75+
{
76+
if code == 137 {
77+
Some("SIGKILL (signal 9): likely killed by OOM killer")
78+
} else {
79+
None
80+
}
81+
}
82+
}
83+
3184
// =============================================================================
3285
// Result Types
3386
// =============================================================================
@@ -67,8 +120,6 @@ pub enum InternalError {
67120
UnknownChildExit,
68121
#[error("unable to find package manager binary: {0}")]
69122
Which(#[from] which::Error),
70-
#[error("external process killed a task")]
71-
ExternalKill,
72123
#[error("error with cache: {0}")]
73124
Cache(#[from] turborepo_run_cache::Error),
74125
}
@@ -428,7 +479,17 @@ where
428479
if let Err(e) = self.task_cache.on_error(&mut prefixed_ui) {
429480
error!("error reading logs: {e}");
430481
}
431-
let message = format!("command {} exited ({})", process.label(), code);
482+
// Check if this looks like an OOM-related exit code
483+
let message = if let Some(oom_desc) = oom_description(code) {
484+
format!(
485+
"command {} was killed (exit code {}): {}, likely ran out of memory",
486+
process.label(),
487+
code,
488+
oom_desc
489+
)
490+
} else {
491+
format!("command {} exited ({})", process.label(), code)
492+
};
432493
match self.continue_on_error {
433494
ContinueMode::Never => {
434495
prefixed_ui.error(&format!("command finished with error: {}", message))
@@ -447,8 +508,67 @@ where
447508
message,
448509
})
449510
}
450-
ChildExit::Finished(None) | ChildExit::Failed => Err(InternalError::UnknownChildExit),
451-
ChildExit::KilledExternal => Err(InternalError::ExternalKill),
511+
ChildExit::Finished(None) | ChildExit::Failed => {
512+
// Process exited without a code (e.g., killed by signal) or we failed to get
513+
// status. Treat as a task failure with exit code 1.
514+
if let Err(e) = stdout_writer.flush() {
515+
error!("error flushing logs: {e}");
516+
}
517+
if let Err(e) = self.task_cache.on_error(&mut prefixed_ui) {
518+
error!("error reading logs: {e}");
519+
}
520+
let message = format!("command {} exited unexpectedly", process.label());
521+
match self.continue_on_error {
522+
ContinueMode::Never => {
523+
prefixed_ui.error(&format!("command finished with error: {}", message))
524+
}
525+
ContinueMode::Always | ContinueMode::DependenciesSuccessful => {
526+
prefixed_ui.warn("command finished with error, but continuing...")
527+
}
528+
}
529+
self.errors.push_execution_error(
530+
self.task_id_for_display.clone(),
531+
process.label().to_string(),
532+
1,
533+
);
534+
Ok(ExecOutcome::Task {
535+
exit_code: Some(1),
536+
message,
537+
})
538+
}
539+
ChildExit::KilledExternal => {
540+
// Process was killed by an external signal (e.g., OOM killer sending SIGKILL).
541+
// Use exit code 137 (128 + 9) which is the conventional code for SIGKILL.
542+
const SIGKILL_EXIT_CODE: i32 = 137;
543+
if let Err(e) = stdout_writer.flush() {
544+
error!("error flushing logs: {e}");
545+
}
546+
if let Err(e) = self.task_cache.on_error(&mut prefixed_ui) {
547+
error!("error reading logs: {e}");
548+
}
549+
let message = format!(
550+
"command {} was killed (exit code {}), likely due to running out of memory",
551+
process.label(),
552+
SIGKILL_EXIT_CODE
553+
);
554+
match self.continue_on_error {
555+
ContinueMode::Never => {
556+
prefixed_ui.error(&format!("command finished with error: {}", message))
557+
}
558+
ContinueMode::Always | ContinueMode::DependenciesSuccessful => {
559+
prefixed_ui.warn("command finished with error, but continuing...")
560+
}
561+
}
562+
self.errors.push_execution_error(
563+
self.task_id_for_display.clone(),
564+
process.label().to_string(),
565+
SIGKILL_EXIT_CODE,
566+
);
567+
Ok(ExecOutcome::Task {
568+
exit_code: Some(SIGKILL_EXIT_CODE),
569+
message,
570+
})
571+
}
452572
ChildExit::Killed | ChildExit::Interrupted => {
453573
if process.is_closing() {
454574
Ok(ExecOutcome::Shutdown)

0 commit comments

Comments
 (0)