@@ -5,7 +5,13 @@ use ocl::{
5
5
SpatialDims ,
6
6
} ;
7
7
use post:: initialize:: { Initialize , VrfNonce , ENTIRE_LABEL_SIZE , LABEL_SIZE } ;
8
- use std:: { cmp:: min, fmt:: Display , io:: Write , ops:: Range } ;
8
+ use std:: {
9
+ cmp:: min,
10
+ fmt:: Display ,
11
+ io:: Write ,
12
+ ops:: Range ,
13
+ time:: { Duration , Instant } ,
14
+ } ;
9
15
use thiserror:: Error ;
10
16
11
17
pub use ocl;
@@ -267,8 +273,8 @@ impl Scrypter {
267
273
let mut best_nonce = None ;
268
274
let labels_end = labels. end ;
269
275
270
- let mut total_kernel_duration = std :: time :: Duration :: ZERO ;
271
- let mut last_kernel_duration = std :: time :: Duration :: ZERO ;
276
+ let mut total_kernel_duration = Duration :: ZERO ;
277
+ let mut last_kernel_duration = Duration :: ZERO ;
272
278
273
279
for ( iter, index) in labels. step_by ( self . global_work_size ) . enumerate ( ) {
274
280
self . kernel . set_arg ( 1 , index) ?;
@@ -291,15 +297,32 @@ impl Scrypter {
291
297
self . kernel . cmd ( ) . enew ( & mut kernel_event) . enq ( ) ?;
292
298
}
293
299
294
- let read_start = std :: time :: Instant :: now ( ) ;
300
+ let read_start = Instant :: now ( ) ;
295
301
// On some platforms (eg. Nvidia), the read command will spin CPU 100% until the kernel finishes.
296
302
// Hence we wait a bit before reading the buffer.
297
303
// The wait time is based on the average kernel duration, with some margin.
298
- if iter > 0 {
299
- let average = total_kernel_duration. div_f32 ( iter as f32 ) ;
300
- let wait = ( last_kernel_duration + average) . div_f32 ( 2.0 ) . mul_f32 ( 0.9 ) ;
301
- log:: trace!( "waiting for kernel to finish for {wait:?}" ) ;
302
- std:: thread:: sleep ( wait) ;
304
+ // It's weighted 50% of last kernel duration and 50% of average kernel duration
305
+ // to speed up convergence to the optimal wait time.
306
+ //
307
+ // We skip few 'warmup iterations', as the average kernel duration is not yet reliable.
308
+ let warmup_iters = 10 ;
309
+ if iter > warmup_iters {
310
+ let average = total_kernel_duration. div_f32 ( ( iter - warmup_iters) as f32 ) ;
311
+ log:: trace!( "last execution time: {last_kernel_duration:?}, average: {average:?})" ) ;
312
+
313
+ #[ cfg( not( target_os = "windows" ) ) ]
314
+ {
315
+ let wait = ( last_kernel_duration + average) . div_f32 ( 2.0 ) . mul_f32 ( 0.9 ) ;
316
+ // Don't wait longer than `average - 5ms` to give the scheduler time to switch back to this thread.
317
+ let wait = min (
318
+ average
319
+ . checked_sub ( Duration :: from_millis ( 5 ) )
320
+ . unwrap_or_default ( ) ,
321
+ wait,
322
+ ) ;
323
+ log:: trace!( "waiting for kernel to finish for {wait:?}" ) ;
324
+ std:: thread:: sleep ( wait) ;
325
+ }
303
326
}
304
327
305
328
let labels_buffer =
@@ -310,8 +333,10 @@ impl Scrypter {
310
333
. read ( labels_buffer. as_mut ( ) )
311
334
. enq ( ) ?;
312
335
313
- last_kernel_duration = read_start. elapsed ( ) ;
314
- total_kernel_duration += last_kernel_duration;
336
+ if iter >= warmup_iters {
337
+ last_kernel_duration = read_start. elapsed ( ) ;
338
+ total_kernel_duration += last_kernel_duration;
339
+ }
315
340
316
341
// Look for VRF nonce if enabled
317
342
// TODO: run in background / in parallel to GPU
0 commit comments