Skip to content

Commit c7a4fad

Browse files
authored
Merge pull request #2357 from iced-rs/wgpu/use-staging-belt
Use a `StagingBelt` in `iced_wgpu` for regular buffer uploads
2 parents 5071e3d + 4c74beb commit c7a4fad

15 files changed

Lines changed: 156 additions & 53 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ cosmic-text = "0.10"
129129
dark-light = "1.0"
130130
futures = "0.3"
131131
glam = "0.25"
132-
glyphon = "0.5"
132+
glyphon = { git = "https://github.com/hecrj/glyphon.git", rev = "ceed55403ce53e120ce9d1fae17dcfe388726118" }
133133
guillotiere = "0.6"
134134
half = "2.2"
135135
image = "0.24"
@@ -155,7 +155,6 @@ thiserror = "1.0"
155155
tiny-skia = "0.11"
156156
tokio = "1.0"
157157
tracing = "0.1"
158-
xxhash-rust = { version = "0.8", features = ["xxh3"] }
159158
unicode-segmentation = "1.0"
160159
wasm-bindgen-futures = "0.4"
161160
wasm-timer = "0.2"

core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ log.workspace = true
2121
num-traits.workspace = true
2222
once_cell.workspace = true
2323
palette.workspace = true
24+
rustc-hash.workspace = true
2425
smol_str.workspace = true
2526
thiserror.workspace = true
2627
web-time.workspace = true
27-
xxhash-rust.workspace = true
2828

2929
dark-light.workspace = true
3030
dark-light.optional = true

core/src/hasher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/// The hasher used to compare layouts.
22
#[allow(missing_debug_implementations)] // Doesn't really make sense to have debug on the hasher state anyways.
33
#[derive(Default)]
4-
pub struct Hasher(xxhash_rust::xxh3::Xxh3);
4+
pub struct Hasher(rustc_hash::FxHasher);
55

66
impl core::hash::Hasher for Hasher {
77
fn write(&mut self, bytes: &[u8]) {

graphics/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ raw-window-handle.workspace = true
3434
rustc-hash.workspace = true
3535
thiserror.workspace = true
3636
unicode-segmentation.workspace = true
37-
xxhash-rust.workspace = true
3837

3938
image.workspace = true
4039
image.optional = true

graphics/src/text/cache.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
use crate::core::{Font, Size};
33
use crate::text;
44

5-
use rustc_hash::{FxHashMap, FxHashSet};
5+
use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
66
use std::collections::hash_map;
7-
use std::hash::{BuildHasher, Hash, Hasher};
7+
use std::hash::{Hash, Hasher};
88

99
/// A store of recently used sections of text.
1010
#[allow(missing_debug_implementations)]
@@ -13,11 +13,8 @@ pub struct Cache {
1313
entries: FxHashMap<KeyHash, Entry>,
1414
aliases: FxHashMap<KeyHash, KeyHash>,
1515
recently_used: FxHashSet<KeyHash>,
16-
hasher: HashBuilder,
1716
}
1817

19-
type HashBuilder = xxhash_rust::xxh3::Xxh3Builder;
20-
2118
impl Cache {
2219
/// Creates a new empty [`Cache`].
2320
pub fn new() -> Self {
@@ -35,7 +32,7 @@ impl Cache {
3532
font_system: &mut cosmic_text::FontSystem,
3633
key: Key<'_>,
3734
) -> (KeyHash, &mut Entry) {
38-
let hash = key.hash(self.hasher.build_hasher());
35+
let hash = key.hash(FxHasher::default());
3936

4037
if let Some(hash) = self.aliases.get(&hash) {
4138
let _ = self.recently_used.insert(*hash);
@@ -77,7 +74,7 @@ impl Cache {
7774
] {
7875
if key.bounds != bounds {
7976
let _ = self.aliases.insert(
80-
Key { bounds, ..key }.hash(self.hasher.build_hasher()),
77+
Key { bounds, ..key }.hash(FxHasher::default()),
8178
hash,
8279
);
8380
}

tiny_skia/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ log.workspace = true
2525
rustc-hash.workspace = true
2626
softbuffer.workspace = true
2727
tiny-skia.workspace = true
28-
xxhash-rust.workspace = true
2928

3029
resvg.workspace = true
3130
resvg.optional = true

wgpu/src/backend.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::buffer;
12
use crate::core::{Color, Size, Transformation};
23
use crate::graphics::backend;
34
use crate::graphics::color;
@@ -30,6 +31,7 @@ pub struct Backend {
3031
pipeline_storage: pipeline::Storage,
3132
#[cfg(any(feature = "image", feature = "svg"))]
3233
image_pipeline: image::Pipeline,
34+
staging_belt: wgpu::util::StagingBelt,
3335
}
3436

3537
impl Backend {
@@ -61,6 +63,13 @@ impl Backend {
6163

6264
#[cfg(any(feature = "image", feature = "svg"))]
6365
image_pipeline,
66+
67+
// TODO: Resize belt smartly (?)
68+
// It would be great if the `StagingBelt` API exposed methods
69+
// for introspection to detect when a resize may be worth it.
70+
staging_belt: wgpu::util::StagingBelt::new(
71+
buffer::MAX_WRITE_SIZE as u64,
72+
),
6473
}
6574
}
6675

@@ -105,6 +114,8 @@ impl Backend {
105114
&layers,
106115
);
107116

117+
self.staging_belt.finish();
118+
108119
self.render(
109120
device,
110121
encoder,
@@ -123,12 +134,20 @@ impl Backend {
123134
self.image_pipeline.end_frame();
124135
}
125136

137+
/// Recalls staging memory for future uploads.
138+
///
139+
/// This method should be called after the command encoder
140+
/// has been submitted.
141+
pub fn recall(&mut self) {
142+
self.staging_belt.recall();
143+
}
144+
126145
fn prepare(
127146
&mut self,
128147
device: &wgpu::Device,
129148
queue: &wgpu::Queue,
130149
format: wgpu::TextureFormat,
131-
_encoder: &mut wgpu::CommandEncoder,
150+
encoder: &mut wgpu::CommandEncoder,
132151
scale_factor: f32,
133152
target_size: Size<u32>,
134153
transformation: Transformation,
@@ -144,7 +163,8 @@ impl Backend {
144163
if !layer.quads.is_empty() {
145164
self.quad_pipeline.prepare(
146165
device,
147-
queue,
166+
encoder,
167+
&mut self.staging_belt,
148168
&layer.quads,
149169
transformation,
150170
scale_factor,
@@ -157,7 +177,8 @@ impl Backend {
157177

158178
self.triangle_pipeline.prepare(
159179
device,
160-
queue,
180+
encoder,
181+
&mut self.staging_belt,
161182
&layer.meshes,
162183
scaled,
163184
);
@@ -171,8 +192,8 @@ impl Backend {
171192

172193
self.image_pipeline.prepare(
173194
device,
174-
queue,
175-
_encoder,
195+
encoder,
196+
&mut self.staging_belt,
176197
&layer.images,
177198
scaled,
178199
scale_factor,
@@ -184,6 +205,7 @@ impl Backend {
184205
self.text_pipeline.prepare(
185206
device,
186207
queue,
208+
encoder,
187209
&layer.text,
188210
layer.bounds,
189211
scale_factor,

wgpu/src/buffer.rs

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
use std::marker::PhantomData;
2+
use std::num::NonZeroU64;
23
use std::ops::RangeBounds;
34

5+
pub const MAX_WRITE_SIZE: usize = 100 * 1024;
6+
7+
#[allow(unsafe_code)]
8+
const MAX_WRITE_SIZE_U64: NonZeroU64 =
9+
unsafe { NonZeroU64::new_unchecked(MAX_WRITE_SIZE as u64) };
10+
411
#[derive(Debug)]
512
pub struct Buffer<T> {
613
label: &'static str,
@@ -61,12 +68,46 @@ impl<T: bytemuck::Pod> Buffer<T> {
6168
/// Returns the size of the written bytes.
6269
pub fn write(
6370
&mut self,
64-
queue: &wgpu::Queue,
71+
device: &wgpu::Device,
72+
encoder: &mut wgpu::CommandEncoder,
73+
belt: &mut wgpu::util::StagingBelt,
6574
offset: usize,
6675
contents: &[T],
6776
) -> usize {
6877
let bytes: &[u8] = bytemuck::cast_slice(contents);
69-
queue.write_buffer(&self.raw, offset as u64, bytes);
78+
let mut bytes_written = 0;
79+
80+
// Split write into multiple chunks if necessary
81+
while bytes_written + MAX_WRITE_SIZE < bytes.len() {
82+
belt.write_buffer(
83+
encoder,
84+
&self.raw,
85+
(offset + bytes_written) as u64,
86+
MAX_WRITE_SIZE_U64,
87+
device,
88+
)
89+
.copy_from_slice(
90+
&bytes[bytes_written..bytes_written + MAX_WRITE_SIZE],
91+
);
92+
93+
bytes_written += MAX_WRITE_SIZE;
94+
}
95+
96+
// There will always be some bytes left, since the previous
97+
// loop guarantees `bytes_written < bytes.len()`
98+
let bytes_left = ((bytes.len() - bytes_written) as u64)
99+
.try_into()
100+
.expect("non-empty write");
101+
102+
// Write them
103+
belt.write_buffer(
104+
encoder,
105+
&self.raw,
106+
(offset + bytes_written) as u64,
107+
bytes_left,
108+
device,
109+
)
110+
.copy_from_slice(&bytes[bytes_written..]);
70111

71112
self.offsets.push(offset as u64);
72113

wgpu/src/image.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,21 +83,31 @@ impl Layer {
8383
fn prepare(
8484
&mut self,
8585
device: &wgpu::Device,
86-
queue: &wgpu::Queue,
86+
encoder: &mut wgpu::CommandEncoder,
87+
belt: &mut wgpu::util::StagingBelt,
8788
nearest_instances: &[Instance],
8889
linear_instances: &[Instance],
8990
transformation: Transformation,
9091
) {
91-
queue.write_buffer(
92+
let uniforms = Uniforms {
93+
transform: transformation.into(),
94+
};
95+
96+
let bytes = bytemuck::bytes_of(&uniforms);
97+
98+
belt.write_buffer(
99+
encoder,
92100
&self.uniforms,
93101
0,
94-
bytemuck::bytes_of(&Uniforms {
95-
transform: transformation.into(),
96-
}),
97-
);
102+
(bytes.len() as u64).try_into().expect("Sized uniforms"),
103+
device,
104+
)
105+
.copy_from_slice(bytes);
106+
107+
self.nearest
108+
.upload(device, encoder, belt, nearest_instances);
98109

99-
self.nearest.upload(device, queue, nearest_instances);
100-
self.linear.upload(device, queue, linear_instances);
110+
self.linear.upload(device, encoder, belt, linear_instances);
101111
}
102112

103113
fn render<'a>(&'a self, render_pass: &mut wgpu::RenderPass<'a>) {
@@ -158,7 +168,8 @@ impl Data {
158168
fn upload(
159169
&mut self,
160170
device: &wgpu::Device,
161-
queue: &wgpu::Queue,
171+
encoder: &mut wgpu::CommandEncoder,
172+
belt: &mut wgpu::util::StagingBelt,
162173
instances: &[Instance],
163174
) {
164175
self.instance_count = instances.len();
@@ -168,7 +179,7 @@ impl Data {
168179
}
169180

170181
let _ = self.instances.resize(device, instances.len());
171-
let _ = self.instances.write(queue, 0, instances);
182+
let _ = self.instances.write(device, encoder, belt, 0, instances);
172183
}
173184

174185
fn render<'a>(&'a self, render_pass: &mut wgpu::RenderPass<'a>) {
@@ -383,8 +394,8 @@ impl Pipeline {
383394
pub fn prepare(
384395
&mut self,
385396
device: &wgpu::Device,
386-
queue: &wgpu::Queue,
387397
encoder: &mut wgpu::CommandEncoder,
398+
belt: &mut wgpu::util::StagingBelt,
388399
images: &[layer::Image],
389400
transformation: Transformation,
390401
_scale: f32,
@@ -501,7 +512,8 @@ impl Pipeline {
501512

502513
layer.prepare(
503514
device,
504-
queue,
515+
encoder,
516+
belt,
505517
nearest_instances,
506518
linear_instances,
507519
transformation,

wgpu/src/quad.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ impl Pipeline {
5757
pub fn prepare(
5858
&mut self,
5959
device: &wgpu::Device,
60-
queue: &wgpu::Queue,
60+
encoder: &mut wgpu::CommandEncoder,
61+
belt: &mut wgpu::util::StagingBelt,
6162
quads: &Batch,
6263
transformation: Transformation,
6364
scale: f32,
@@ -67,7 +68,7 @@ impl Pipeline {
6768
}
6869

6970
let layer = &mut self.layers[self.prepare_layer];
70-
layer.prepare(device, queue, quads, transformation, scale);
71+
layer.prepare(device, encoder, belt, quads, transformation, scale);
7172

7273
self.prepare_layer += 1;
7374
}
@@ -162,7 +163,8 @@ impl Layer {
162163
pub fn prepare(
163164
&mut self,
164165
device: &wgpu::Device,
165-
queue: &wgpu::Queue,
166+
encoder: &mut wgpu::CommandEncoder,
167+
belt: &mut wgpu::util::StagingBelt,
166168
quads: &Batch,
167169
transformation: Transformation,
168170
scale: f32,
@@ -171,15 +173,25 @@ impl Layer {
171173
let _ = info_span!("Wgpu::Quad", "PREPARE").entered();
172174

173175
let uniforms = Uniforms::new(transformation, scale);
176+
let bytes = bytemuck::bytes_of(&uniforms);
174177

175-
queue.write_buffer(
178+
belt.write_buffer(
179+
encoder,
176180
&self.constants_buffer,
177181
0,
178-
bytemuck::bytes_of(&uniforms),
179-
);
182+
(bytes.len() as u64).try_into().expect("Sized uniforms"),
183+
device,
184+
)
185+
.copy_from_slice(bytes);
180186

181-
self.solid.prepare(device, queue, &quads.solids);
182-
self.gradient.prepare(device, queue, &quads.gradients);
187+
if !quads.solids.is_empty() {
188+
self.solid.prepare(device, encoder, belt, &quads.solids);
189+
}
190+
191+
if !quads.gradients.is_empty() {
192+
self.gradient
193+
.prepare(device, encoder, belt, &quads.gradients);
194+
}
183195
}
184196
}
185197

0 commit comments

Comments
 (0)