Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions gpu_prover/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,15 @@
`generate_witness_values_delegation` function
- add handling of the new variant in the `get_delegation_circuit_precomputations` function in
`src/execution/precomputations.rs`

## Configuration

### Environment variables

- `PROVER_GPU_MEMORY_FRACTION` — optional float in the range `(0.0, 1.0]`. Caps the prover's
device allocation to that fraction of **total** GPU memory (it allocates `min(free, total * fraction)`).
Useful for co-locating another GPU process (e.g. a SNARK prover) on the same device. When unset —
or set to a malformed / out-of-range value — the prover keeps its default behavior of allocating all
free GPU memory. Read once in `ProverContextConfig::default()`.

Example: `PROVER_GPU_MEMORY_FRACTION=0.6` caps the prover at ~60% of the GPU.
16 changes: 14 additions & 2 deletions gpu_prover/src/prover/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,23 @@ pub struct ProverContextConfig {
pub allocation_block_log_size: u32,
pub device_slack_blocks_count: usize,
pub host_allocator_blocks_count: usize,
// When set, caps device allocation to this fraction of total GPU memory.
// Read from PROVER_GPU_MEMORY_FRACTION env var (0.0 < value <= 1.0).
pub max_memory_fraction: Option<f64>,
}

impl Default for ProverContextConfig {
fn default() -> Self {
let max_memory_fraction = std::env::var("PROVER_GPU_MEMORY_FRACTION")
.ok()
.and_then(|v| v.parse::<f64>().ok())
.filter(|&f| f > 0.0 && f <= 1.0);
Self {
powers_of_w_coarse_log_count: 12,
allocation_block_log_size: 22, // 4 MB blocks
device_slack_blocks_count: 64, // 256 MB slack
host_allocator_blocks_count: 128, // 512 MB host allocator pool
max_memory_fraction,
}
}
}
Expand Down Expand Up @@ -114,8 +122,12 @@ impl ProverContext {
let exec_stream = CudaStream::create()?;
let aux_stream = CudaStream::create()?;
let h2d_stream = CudaStream::create()?;
let (free, _) = memory_get_info()?;
let mut device_blocks_count = free >> config.allocation_block_log_size;
let (free, total) = memory_get_info()?;
let cap = config
.max_memory_fraction
.map(|f| (total as f64 * f) as usize)
.unwrap_or(free);
let mut device_blocks_count = cap.min(free) >> config.allocation_block_log_size;
let device_allocation = loop {
let result = era_cudart::memory::DeviceAllocation::<u8>::alloc(
device_blocks_count << config.allocation_block_log_size,
Expand Down