Skip to content

Commit ba5966a

Browse files
authored
nvme_driver: allocate different DMA memory sizes if not bounce buffering (#1306)
A previous change increased the DMA memory allocated per queue to 128 pages, to handle the need for additional pages to bounce buffer on CVMs. Make this configurable, so that the pages allocated go back to the previous of 64, when not isolated.
1 parent 3e8fbd8 commit ba5966a

File tree

5 files changed

+118
-35
lines changed

5 files changed

+118
-35
lines changed

openhcl/underhill_core/src/nvme_manager.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -314,14 +314,21 @@ impl NvmeManagerWorker {
314314
.await
315315
.map_err(InnerError::Vfio)?;
316316

317-
let driver =
318-
nvme_driver::NvmeDriver::new(&self.driver_source, self.vp_count, device)
319-
.instrument(tracing::info_span!(
320-
"nvme_driver_init",
321-
pci_id = entry.key()
322-
))
323-
.await
324-
.map_err(InnerError::DeviceInitFailed)?;
317+
// TODO: For now, any isolation means use bounce buffering. This
318+
// needs to change when we have nvme devices that support DMA to
319+
// confidential memory.
320+
let driver = nvme_driver::NvmeDriver::new(
321+
&self.driver_source,
322+
self.vp_count,
323+
device,
324+
self.is_isolated,
325+
)
326+
.instrument(tracing::info_span!(
327+
"nvme_driver_init",
328+
pci_id = entry.key()
329+
))
330+
.await
331+
.map_err(InnerError::DeviceInitFailed)?;
325332

326333
entry.insert(driver)
327334
}
@@ -385,11 +392,15 @@ impl NvmeManagerWorker {
385392
.instrument(tracing::info_span!("vfio_device_restore", pci_id))
386393
.await?;
387394

395+
// TODO: For now, any isolation means use bounce buffering. This
396+
// needs to change when we have nvme devices that support DMA to
397+
// confidential memory.
388398
let nvme_driver = nvme_driver::NvmeDriver::restore(
389399
&self.driver_source,
390400
saved_state.cpu_count,
391401
vfio_device,
392402
&disk.driver_state,
403+
self.is_isolated,
393404
)
394405
.instrument(tracing::info_span!("nvme_driver_restore"))
395406
.await?;

vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_nvme_driver.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ impl FuzzNvmeDriver {
6464
.unwrap();
6565

6666
let device = FuzzEmulatedDevice::new(nvme, msi_set, mem.dma_client());
67-
let nvme_driver = NvmeDriver::new(&driver_source, cpu_count, device).await?; // TODO: [use-arbitrary-input]
67+
let nvme_driver = NvmeDriver::new(&driver_source, cpu_count, device, false).await?; // TODO: [use-arbitrary-input]
6868
let namespace = nvme_driver.namespace(1).await?; // TODO: [use-arbitrary-input]
6969

7070
Ok(Self {

vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ pub struct NvmeDriver<T: DeviceBacking> {
7070
namespaces: Vec<Arc<Namespace>>,
7171
/// Keeps the controller connected (CC.EN==1) while servicing.
7272
nvme_keepalive: bool,
73+
bounce_buffer: bool,
7374
}
7475

7576
#[derive(Inspect)]
@@ -84,6 +85,7 @@ struct DriverWorkerTask<T: DeviceBacking> {
8485
io_issuers: Arc<IoIssuers>,
8586
#[inspect(skip)]
8687
recv: mesh::Receiver<NvmeWorkerRequest>,
88+
bounce_buffer: bool,
8789
}
8890

8991
#[derive(Inspect)]
@@ -123,14 +125,21 @@ impl IoQueue {
123125
registers: Arc<DeviceRegisters<impl DeviceBacking>>,
124126
mem_block: MemoryBlock,
125127
saved_state: &IoQueueSavedState,
128+
bounce_buffer: bool,
126129
) -> anyhow::Result<Self> {
127130
let IoQueueSavedState {
128131
cpu,
129132
iv,
130133
queue_data,
131134
} = saved_state;
132-
let queue =
133-
QueuePair::restore(spawner, interrupt, registers.clone(), mem_block, queue_data)?;
135+
let queue = QueuePair::restore(
136+
spawner,
137+
interrupt,
138+
registers.clone(),
139+
mem_block,
140+
queue_data,
141+
bounce_buffer,
142+
)?;
134143

135144
Ok(Self {
136145
queue,
@@ -169,9 +178,10 @@ impl<T: DeviceBacking> NvmeDriver<T> {
169178
driver_source: &VmTaskDriverSource,
170179
cpu_count: u32,
171180
device: T,
181+
bounce_buffer: bool,
172182
) -> anyhow::Result<Self> {
173183
let pci_id = device.id().to_owned();
174-
let mut this = Self::new_disabled(driver_source, cpu_count, device)
184+
let mut this = Self::new_disabled(driver_source, cpu_count, device, bounce_buffer)
175185
.instrument(tracing::info_span!("nvme_new_disabled", pci_id))
176186
.await?;
177187
match this
@@ -197,6 +207,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
197207
driver_source: &VmTaskDriverSource,
198208
cpu_count: u32,
199209
mut device: T,
210+
bounce_buffer: bool,
200211
) -> anyhow::Result<Self> {
201212
let driver = driver_source.simple();
202213
let bar0 = Bar0(
@@ -245,6 +256,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
245256
io: Vec::new(),
246257
io_issuers: io_issuers.clone(),
247258
recv,
259+
bounce_buffer,
248260
})),
249261
admin: None,
250262
identify: None,
@@ -253,6 +265,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
253265
rescan_event: Default::default(),
254266
namespaces: vec![],
255267
nvme_keepalive: false,
268+
bounce_buffer,
256269
})
257270
}
258271

@@ -285,6 +298,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
285298
admin_cqes,
286299
interrupt0,
287300
worker.registers.clone(),
301+
self.bounce_buffer,
288302
)
289303
.context("failed to create admin queue pair")?;
290304

@@ -541,6 +555,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
541555
cpu_count: u32,
542556
mut device: T,
543557
saved_state: &NvmeDriverSavedState,
558+
bounce_buffer: bool,
544559
) -> anyhow::Result<Self> {
545560
let driver = driver_source.simple();
546561
let bar0_mapping = device
@@ -571,6 +586,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
571586
io: Vec::new(),
572587
io_issuers: io_issuers.clone(),
573588
recv,
589+
bounce_buffer,
574590
})),
575591
admin: None, // Updated below.
576592
identify: Some(Arc::new(
@@ -582,6 +598,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
582598
rescan_event: Default::default(),
583599
namespaces: vec![],
584600
nvme_keepalive: true,
601+
bounce_buffer,
585602
};
586603

587604
let task = &mut this.task.as_mut().unwrap();
@@ -610,8 +627,15 @@ impl<T: DeviceBacking> NvmeDriver<T> {
610627
.find(|mem| mem.len() == a.mem_len && a.base_pfn == mem.pfns()[0])
611628
.expect("unable to find restored mem block")
612629
.to_owned();
613-
QueuePair::restore(driver.clone(), interrupt0, registers.clone(), mem_block, a)
614-
.unwrap()
630+
QueuePair::restore(
631+
driver.clone(),
632+
interrupt0,
633+
registers.clone(),
634+
mem_block,
635+
a,
636+
bounce_buffer,
637+
)
638+
.unwrap()
615639
})
616640
.unwrap();
617641

@@ -657,8 +681,14 @@ impl<T: DeviceBacking> NvmeDriver<T> {
657681
})
658682
.expect("unable to find restored mem block")
659683
.to_owned();
660-
let q =
661-
IoQueue::restore(driver.clone(), interrupt, registers.clone(), mem_block, q)?;
684+
let q = IoQueue::restore(
685+
driver.clone(),
686+
interrupt,
687+
registers.clone(),
688+
mem_block,
689+
q,
690+
bounce_buffer,
691+
)?;
662692
let issuer = IoIssuer {
663693
issuer: q.queue.issuer().clone(),
664694
cpu: q.cpu,
@@ -866,6 +896,7 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
866896
state.qsize,
867897
interrupt,
868898
self.registers.clone(),
899+
self.bounce_buffer,
869900
)
870901
.with_context(|| format!("failed to create io queue pair {qid}"))?;
871902

vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,18 @@ impl PendingCommands {
166166
}
167167

168168
impl QueuePair {
169-
pub const MAX_SQ_ENTRIES: u16 = (PAGE_SIZE / 64) as u16; // Maximum SQ size in entries.
170-
pub const MAX_CQ_ENTRIES: u16 = (PAGE_SIZE / 16) as u16; // Maximum CQ size in entries.
171-
const SQ_SIZE: usize = PAGE_SIZE; // Submission Queue size in bytes.
172-
const CQ_SIZE: usize = PAGE_SIZE; // Completion Queue size in bytes.
173-
const PER_QUEUE_PAGES: usize = 128;
169+
/// Maximum SQ size in entries.
170+
pub const MAX_SQ_ENTRIES: u16 = (PAGE_SIZE / 64) as u16;
171+
/// Maximum CQ size in entries.
172+
pub const MAX_CQ_ENTRIES: u16 = (PAGE_SIZE / 16) as u16;
173+
/// Submission Queue size in bytes.
174+
const SQ_SIZE: usize = PAGE_SIZE;
175+
/// Completion Queue size in bytes.
176+
const CQ_SIZE: usize = PAGE_SIZE;
177+
/// Number of pages per queue if bounce buffering.
178+
const PER_QUEUE_PAGES_BOUNCE_BUFFER: usize = 128;
179+
/// Number of pages per queue if not bounce buffering.
180+
const PER_QUEUE_PAGES_NO_BOUNCE_BUFFER: usize = 64;
174181

175182
pub fn new(
176183
spawner: impl SpawnDriver,
@@ -180,9 +187,15 @@ impl QueuePair {
180187
cq_entries: u16, // Requested CQ size in entries.
181188
interrupt: DeviceInterrupt,
182189
registers: Arc<DeviceRegisters<impl DeviceBacking>>,
190+
bounce_buffer: bool,
183191
) -> anyhow::Result<Self> {
184-
let total_size =
185-
QueuePair::SQ_SIZE + QueuePair::CQ_SIZE + QueuePair::PER_QUEUE_PAGES * PAGE_SIZE;
192+
let total_size = QueuePair::SQ_SIZE
193+
+ QueuePair::CQ_SIZE
194+
+ if bounce_buffer {
195+
QueuePair::PER_QUEUE_PAGES_BOUNCE_BUFFER * PAGE_SIZE
196+
} else {
197+
QueuePair::PER_QUEUE_PAGES_NO_BOUNCE_BUFFER * PAGE_SIZE
198+
};
186199
let dma_client = device.dma_client();
187200
let mem = dma_client
188201
.allocate_dma_buffer(total_size)
@@ -192,7 +205,15 @@ impl QueuePair {
192205
assert!(cq_entries <= Self::MAX_CQ_ENTRIES);
193206

194207
QueuePair::new_or_restore(
195-
spawner, qid, sq_entries, cq_entries, interrupt, registers, mem, None,
208+
spawner,
209+
qid,
210+
sq_entries,
211+
cq_entries,
212+
interrupt,
213+
registers,
214+
mem,
215+
None,
216+
bounce_buffer,
196217
)
197218
}
198219

@@ -206,6 +227,7 @@ impl QueuePair {
206227
registers: Arc<DeviceRegisters<impl DeviceBacking>>,
207228
mem: MemoryBlock,
208229
saved_state: Option<&QueueHandlerSavedState>,
230+
bounce_buffer: bool,
209231
) -> anyhow::Result<Self> {
210232
// MemoryBlock is either allocated or restored prior calling here.
211233
let sq_mem_block = mem.subblock(0, QueuePair::SQ_SIZE);
@@ -239,13 +261,30 @@ impl QueuePair {
239261
}
240262
});
241263

242-
// Page allocator uses remaining part of the buffer for dynamic allocation.
243-
const _: () = assert!(
244-
QueuePair::PER_QUEUE_PAGES * PAGE_SIZE >= 128 * 1024 + PAGE_SIZE,
245-
"not enough room for an ATAPI IO plus a PRP list"
246-
);
247-
let alloc: PageAllocator =
248-
PageAllocator::new(mem.subblock(data_offset, QueuePair::PER_QUEUE_PAGES * PAGE_SIZE));
264+
// Convert the queue pages to bytes, and assert that queue size is large
265+
// enough.
266+
const fn pages_to_size_bytes(pages: usize) -> usize {
267+
let size = pages * PAGE_SIZE;
268+
assert!(
269+
size >= 128 * 1024 + PAGE_SIZE,
270+
"not enough room for an ATAPI IO plus a PRP list"
271+
);
272+
size
273+
}
274+
275+
// Page allocator uses remaining part of the buffer for dynamic
276+
// allocation. The length of the page allocator depends on if bounce
277+
// buffering / double buffering is needed.
278+
//
279+
// NOTE: Do not remove the `const` blocks below. This is to force
280+
// compile time evaluation of the assertion described above.
281+
let alloc_len = if bounce_buffer {
282+
const { pages_to_size_bytes(QueuePair::PER_QUEUE_PAGES_BOUNCE_BUFFER) }
283+
} else {
284+
const { pages_to_size_bytes(QueuePair::PER_QUEUE_PAGES_NO_BOUNCE_BUFFER) }
285+
};
286+
287+
let alloc = PageAllocator::new(mem.subblock(data_offset, alloc_len));
249288

250289
Ok(Self {
251290
task,
@@ -302,6 +341,7 @@ impl QueuePair {
302341
registers: Arc<DeviceRegisters<impl DeviceBacking>>,
303342
mem: MemoryBlock,
304343
saved_state: &QueuePairSavedState,
344+
bounce_buffer: bool,
305345
) -> anyhow::Result<Self> {
306346
let QueuePairSavedState {
307347
mem_len: _, // Used to restore DMA buffer before calling this.
@@ -321,6 +361,7 @@ impl QueuePair {
321361
registers,
322362
mem,
323363
Some(handler_data),
364+
bounce_buffer,
324365
)
325366
}
326367
}

vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) {
7878
let cap: Cap = Cap::new().with_mqes_z(max_u16);
7979
device.set_mock_response_u64(Some((0, cap.into())));
8080

81-
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device).await;
81+
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await;
8282
assert!(driver.is_ok());
8383
}
8484

@@ -113,7 +113,7 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) {
113113
// Setup mock response at offset 0
114114
let cap: Cap = Cap::new().with_mqes_z(0);
115115
device.set_mock_response_u64(Some((0, cap.into())));
116-
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device).await;
116+
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await;
117117

118118
assert!(driver.is_err());
119119
}
@@ -150,7 +150,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) {
150150
.await
151151
.unwrap();
152152
let device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone());
153-
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device)
153+
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false)
154154
.await
155155
.unwrap();
156156
let namespace = driver.namespace(1).await.unwrap();
@@ -266,7 +266,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) {
266266
.unwrap();
267267

268268
let device = NvmeTestEmulatedDevice::new(nvme_ctrl, msi_x, dma_client.clone());
269-
let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device)
269+
let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false)
270270
.await
271271
.unwrap();
272272
let _ns1 = nvme_driver.namespace(1).await.unwrap();

0 commit comments

Comments
 (0)