Skip to content

Commit 8de011b

Browse files
authored
Bump rust to 1.76 and disable sleeping between kernels on Win (#210)
* Bump rust to 1.76 and optimize sleeping between kernels on Win * Disable sleeping between OCL kernels on Windows
1 parent d429260 commit 8de011b

File tree

2 files changed

+41
-16
lines changed

2 files changed

+41
-16
lines changed

.github/workflows/ci.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
- uses: actions/checkout@v3
2020
with:
2121
submodules: true
22-
- uses: dtolnay/rust-toolchain@1.74.1
22+
- uses: dtolnay/rust-toolchain@1.76
2323
- uses: Swatinem/rust-cache@v2
2424
- run: cargo check --workspace --all-features
2525

@@ -49,7 +49,7 @@ jobs:
4949
- uses: actions/checkout@v3
5050
with:
5151
submodules: true
52-
- uses: dtolnay/rust-toolchain@1.74.1
52+
- uses: dtolnay/rust-toolchain@1.76
5353
- if: matrix.runner == 'self-hosted-linux-intel'
5454
run: sudo apt-get update && sudo apt-get install -y libpocl2 pocl-opencl-icd ocl-icd-opencl-dev
5555
name: Install dependencies for testing openCL on Linux
@@ -79,7 +79,7 @@ jobs:
7979
- uses: actions/checkout@v3
8080
with:
8181
submodules: true
82-
- uses: dtolnay/rust-toolchain@1.74.1
82+
- uses: dtolnay/rust-toolchain@1.76
8383
with:
8484
components: rustfmt
8585
- uses: Swatinem/rust-cache@v2
@@ -117,7 +117,7 @@ jobs:
117117
with:
118118
submodules: true
119119
- name: Install stable
120-
uses: dtolnay/rust-toolchain@1.74.1
120+
uses: dtolnay/rust-toolchain@1.76
121121
with:
122122
components: llvm-tools-preview
123123
- name: cargo install cargo-llvm-cov
@@ -177,7 +177,7 @@ jobs:
177177
- uses: actions/checkout@v3
178178
with:
179179
submodules: true
180-
- uses: dtolnay/rust-toolchain@1.74.1
180+
- uses: dtolnay/rust-toolchain@1.76
181181
- uses: Swatinem/rust-cache@v2
182182
with:
183183
key: ${{ join( matrix.os, '-' ) }}

scrypt-ocl/src/lib.rs

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@ use ocl::{
55
SpatialDims,
66
};
77
use post::initialize::{Initialize, VrfNonce, ENTIRE_LABEL_SIZE, LABEL_SIZE};
8-
use std::{cmp::min, fmt::Display, io::Write, ops::Range};
8+
use std::{
9+
cmp::min,
10+
fmt::Display,
11+
io::Write,
12+
ops::Range,
13+
time::{Duration, Instant},
14+
};
915
use thiserror::Error;
1016

1117
pub use ocl;
@@ -267,8 +273,8 @@ impl Scrypter {
267273
let mut best_nonce = None;
268274
let labels_end = labels.end;
269275

270-
let mut total_kernel_duration = std::time::Duration::ZERO;
271-
let mut last_kernel_duration = std::time::Duration::ZERO;
276+
let mut total_kernel_duration = Duration::ZERO;
277+
let mut last_kernel_duration = Duration::ZERO;
272278

273279
for (iter, index) in labels.step_by(self.global_work_size).enumerate() {
274280
self.kernel.set_arg(1, index)?;
@@ -291,15 +297,32 @@ impl Scrypter {
291297
self.kernel.cmd().enew(&mut kernel_event).enq()?;
292298
}
293299

294-
let read_start = std::time::Instant::now();
300+
let read_start = Instant::now();
295301
// On some platforms (eg. Nvidia), the read command will spin CPU 100% until the kernel finishes.
296302
// Hence we wait a bit before reading the buffer.
297303
// The wait time is based on the average kernel duration, with some margin.
298-
if iter > 0 {
299-
let average = total_kernel_duration.div_f32(iter as f32);
300-
let wait = (last_kernel_duration + average).div_f32(2.0).mul_f32(0.9);
301-
log::trace!("waiting for kernel to finish for {wait:?}");
302-
std::thread::sleep(wait);
304+
// It's weighted 50% of last kernel duration and 50% of average kernel duration
305+
// to speed up convergence to the optimal wait time.
306+
//
307+
// We skip few 'warmup iterations', as the average kernel duration is not yet reliable.
308+
let warmup_iters = 10;
309+
if iter > warmup_iters {
310+
let average = total_kernel_duration.div_f32((iter - warmup_iters) as f32);
311+
log::trace!("last execution time: {last_kernel_duration:?}, average: {average:?})");
312+
313+
#[cfg(not(target_os = "windows"))]
314+
{
315+
let wait = (last_kernel_duration + average).div_f32(2.0).mul_f32(0.9);
316+
// Don't wait longer than `average - 5ms` to give the scheduler time to switch back to this thread.
317+
let wait = min(
318+
average
319+
.checked_sub(Duration::from_millis(5))
320+
.unwrap_or_default(),
321+
wait,
322+
);
323+
log::trace!("waiting for kernel to finish for {wait:?}");
324+
std::thread::sleep(wait);
325+
}
303326
}
304327

305328
let labels_buffer =
@@ -310,8 +333,10 @@ impl Scrypter {
310333
.read(labels_buffer.as_mut())
311334
.enq()?;
312335

313-
last_kernel_duration = read_start.elapsed();
314-
total_kernel_duration += last_kernel_duration;
336+
if iter >= warmup_iters {
337+
last_kernel_duration = read_start.elapsed();
338+
total_kernel_duration += last_kernel_duration;
339+
}
315340

316341
// Look for VRF nonce if enabled
317342
// TODO: run in background / in parallel to GPU

0 commit comments

Comments
 (0)