matter-labs · ericker-cyfrin · Jun 15, 2026 · Jun 15, 2026
diff --git a/gpu_prover/README.md b/gpu_prover/README.md
@@ -27,3 +27,15 @@
       `generate_witness_values_delegation` function
     - add handling of the new variant in the `get_delegation_circuit_precomputations` function in
       `src/execution/precomputations.rs`
+
+## Configuration
+
+### Environment variables
+
+- `PROVER_GPU_MEMORY_FRACTION` — optional float in the range `(0.0, 1.0]`. Caps the prover's
+  device allocation to that fraction of **total** GPU memory (it allocates `min(free, total * fraction)`).
+  Useful for co-locating another GPU process (e.g. a SNARK prover) on the same device. When unset —
+  or set to a malformed / out-of-range value — the prover keeps its default behavior of allocating all
+  free GPU memory. Read once in `ProverContextConfig::default()`.
+
+  Example: `PROVER_GPU_MEMORY_FRACTION=0.6` caps the prover at ~60% of the GPU.
diff --git a/gpu_prover/src/prover/context.rs b/gpu_prover/src/prover/context.rs
@@ -40,15 +40,23 @@ pub struct ProverContextConfig {
     pub allocation_block_log_size: u32,
     pub device_slack_blocks_count: usize,
     pub host_allocator_blocks_count: usize,
+    // When set, caps device allocation to this fraction of total GPU memory.
+    // Read from PROVER_GPU_MEMORY_FRACTION env var (0.0 < value <= 1.0).
+    pub max_memory_fraction: Option<f64>,
 }
 
 impl Default for ProverContextConfig {
     fn default() -> Self {
+        let max_memory_fraction = std::env::var("PROVER_GPU_MEMORY_FRACTION")
+            .ok()
+            .and_then(|v| v.parse::<f64>().ok())
+            .filter(|&f| f > 0.0 && f <= 1.0);
         Self {
             powers_of_w_coarse_log_count: 12,
             allocation_block_log_size: 22,    // 4 MB blocks
             device_slack_blocks_count: 64,    // 256 MB slack
             host_allocator_blocks_count: 128, // 512 MB host allocator pool
+            max_memory_fraction,
         }
     }
 }
@@ -114,8 +122,12 @@ impl ProverContext {
         let exec_stream = CudaStream::create()?;
         let aux_stream = CudaStream::create()?;
         let h2d_stream = CudaStream::create()?;
-        let (free, _) = memory_get_info()?;
-        let mut device_blocks_count = free >> config.allocation_block_log_size;
+        let (free, total) = memory_get_info()?;
+        let cap = config
+            .max_memory_fraction
+            .map(|f| (total as f64 * f) as usize)
+            .unwrap_or(free);
+        let mut device_blocks_count = cap.min(free) >> config.allocation_block_log_size;
         let device_allocation = loop {
             let result = era_cudart::memory::DeviceAllocation::<u8>::alloc(
                 device_blocks_count << config.allocation_block_log_size,