@@ -168,23 +168,8 @@ opal_progress_finalize(void)
168
168
return OPAL_SUCCESS ;
169
169
}
170
170
171
-
172
- /*
173
- * Progress the event library and any functions that have registered to
174
- * be called. We don't propogate errors from the progress functions,
175
- * so no action is taken if they return failures. The functions are
176
- * expected to return the number of events progressed, to determine
177
- * whether or not we should call sched_yield() during MPI progress.
178
- * This is only losely tracked, as an error return can cause the number
179
- * of progressed events to appear lower than it actually is. We don't
180
- * care, as the cost of that happening is far outweighed by the cost
181
- * of the if checks (they were resulting in bad pipe stalling behavior)
182
- */
183
- void
184
- opal_progress (void )
171
+ static int opal_progress_events ()
185
172
{
186
- static volatile uint32_t num_calls = 0 ;
187
- size_t i ;
188
173
int events = 0 ;
189
174
190
175
if ( opal_progress_event_flag != 0 ) {
@@ -217,16 +202,46 @@ opal_progress(void)
217
202
#endif /* OPAL_HAVE_WORKING_EVENTOPS */
218
203
}
219
204
205
+ return events ;
206
+ }
207
+
208
+ /*
209
+ * Progress the event library and any functions that have registered to
210
+ * be called. We don't propogate errors from the progress functions,
211
+ * so no action is taken if they return failures. The functions are
212
+ * expected to return the number of events progressed, to determine
213
+ * whether or not we should call sched_yield() during MPI progress.
214
+ * This is only losely tracked, as an error return can cause the number
215
+ * of progressed events to appear lower than it actually is. We don't
216
+ * care, as the cost of that happening is far outweighed by the cost
217
+ * of the if checks (they were resulting in bad pipe stalling behavior)
218
+ */
219
+ void
220
+ opal_progress (void )
221
+ {
222
+ static uint32_t num_calls = 0 ;
223
+ size_t i ;
224
+ int events = 0 ;
225
+
220
226
/* progress all registered callbacks */
221
227
for (i = 0 ; i < callbacks_len ; ++ i ) {
222
228
events += (callbacks [i ])();
223
229
}
224
230
225
- if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD_FETCH32 ((volatile int32_t * ) & num_calls , 1 ) & 0x7 ) == 0 ) {
226
- /* run low priority callbacks once every 8 calls to opal_progress() */
231
+ /* Run low priority callbacks and events once every 8 calls to opal_progress().
232
+ * Even though "num_calls" can be modified by multiple threads, we do not use
233
+ * atomic operations here, for performance reasons. In case of a race, the
234
+ * number of calls may be inaccurate, but since it will eventually be incremented,
235
+ * it's not a problem.
236
+ */
237
+ if (((num_calls ++ ) & 0x7 ) == 0 ) {
227
238
for (i = 0 ; i < callbacks_lp_len ; ++ i ) {
228
239
events += (callbacks_lp [i ])();
229
240
}
241
+
242
+ opal_progress_events ();
243
+ } else if (num_event_users > 0 ) {
244
+ opal_progress_events ();
230
245
}
231
246
232
247
#if OPAL_HAVE_SCHED_YIELD
0 commit comments