@@ -186,7 +186,6 @@ cost lap(const lap_row dim,
186186 }
187187
188188 // AUGMENTING ROW REDUCTION
189- auto & col_list = scratch.col_list ; // List of columns to be scanned in various ways.
190189 int loopcnt = 0 ; // do-loop to be done twice.
191190
192191 do {
@@ -239,13 +238,16 @@ cost lap(const lap_row dim,
239238 } while (loopcnt < 2 ); // Repeat once.
240239
241240 // AUGMENT SOLUTION for each free row.
242- auto & d = scratch.d ; // 'Cost-distance' in augmenting path calculation.
243- auto & predecessor = scratch.predecessor ; // Row-predecessor of column in augmenting/alternating path.
241+ // Restrict-qualified local pointers enable the compiler to avoid
242+ // reloads after stores in the Dijkstra inner loop.
243+ cost* __restrict__ d_ptr = scratch.d .data ();
244+ lap_row* __restrict__ pred_ptr = scratch.predecessor .data ();
245+ lap_col* __restrict__ cl_ptr = scratch.col_list .data ();
244246
245247 for (lap_row f = 0 ; f < num_free; ++f) {
246248 bool unassignedfound = false ;
247249 lap_row free_row = freeunassigned[f]; // Start row of augmenting path.
248- const cost* free_row_cost = input_cost.row (free_row);
250+ const cost* __restrict__ free_row_cost = input_cost.row (free_row);
249251 lap_col endofpath = 0 ;
250252 lap_col last = 0 ;
251253 lap_row i;
@@ -254,9 +256,9 @@ cost lap(const lap_row dim,
254256 // Dijkstra shortest path algorithm.
255257 // Runs until unassigned column added to shortest path tree.
256258 for (lap_col j = 0 ; j < dim; ++j) {
257- d [j] = free_row_cost[j] - v_ptr[j];
258- predecessor [j] = free_row;
259- col_list [j] = j; // Init column list.
259+ d_ptr [j] = free_row_cost[j] - v_ptr[j];
260+ pred_ptr [j] = free_row;
261+ cl_ptr [j] = j; // Init column list.
260262 }
261263
262264 cost min = 0 ;
@@ -271,26 +273,26 @@ cost lap(const lap_row dim,
271273
272274 // Scan columns for up..dim-1 to find all indices for which new minimum occurs.
273275 // Store these indices between low..up-1 (increasing up).
274- min = d[col_list [up++]];
276+ min = d_ptr[cl_ptr [up++]];
275277
276278 for (lap_dim k = up; k < dim; ++k) {
277- const lap_col j = col_list [k];
278- const cost h = d [j];
279+ const lap_col j = cl_ptr [k];
280+ const cost h = d_ptr [j];
279281 if (h <= min) {
280282 if (h < min) { // New minimum.
281283 up = low; // Restart list at index low.
282284 min = h;
283285 }
284286 // New index with same minimum, put on undex up, and extend list.
285- col_list [k] = col_list [up];
286- col_list [up++] = j;
287+ cl_ptr [k] = cl_ptr [up];
288+ cl_ptr [up++] = j;
287289 }
288290 }
289291 // Check if any of the minimum columns happens to be unassigned.
290292 // If so, we have an augmenting path right away.
291293 for (lap_dim k = low; k < up; ++k) {
292- if (colsol[col_list [k]] < 0 ) {
293- endofpath = col_list [k];
294+ if (colsol[cl_ptr [k]] < 0 ) {
295+ endofpath = cl_ptr [k];
294296 unassignedfound = true ;
295297 break ;
296298 }
@@ -300,16 +302,16 @@ cost lap(const lap_row dim,
300302 if (!unassignedfound) {
301303 // Update 'distances' between free_row and all unscanned columns,
302304 // via next scanned column.
303- j1 = col_list [low++];
305+ j1 = cl_ptr [low++];
304306 i = colsol[j1];
305- const cost* row_i = input_cost.row (i);
307+ const cost* __restrict__ row_i = input_cost.row (i);
306308 const cost h = row_i[j1] - v_ptr[j1] - min;
307309
308310 for (lap_dim k = up; k < dim; ++k) {
309- const lap_col j = col_list [k];
311+ const lap_col j = cl_ptr [k];
310312 cost v2 = row_i[j] - v_ptr[j] - h;
311- if (v2 < d [j]) {
312- predecessor [j] = i;
313+ if (v2 < d_ptr [j]) {
314+ pred_ptr [j] = i;
313315 if (v2 == min) { // New column found at same minimum value
314316 if (colsol[j] < 0 ) {
315317 // If unassigned, shortest augmenting path is complete.
@@ -318,25 +320,25 @@ cost lap(const lap_row dim,
318320 break ;
319321 } else {
320322 // Else add to list to be scanned right away.
321- col_list [k] = col_list [up];
322- col_list [up++] = j;
323+ cl_ptr [k] = cl_ptr [up];
324+ cl_ptr [up++] = j;
323325 }
324326 }
325- d [j] = v2; // <MS: Unintended>
327+ d_ptr [j] = v2;
326328 }
327329 }
328330 }
329331 } while (!unassignedfound);
330332
331333 // Update column prices.
332334 for (lap_dim k = 0 ; k <= last; ++k) {
333- j1 = col_list [k];
334- v[j1] += d [j1] - min;
335+ j1 = cl_ptr [k];
336+ v[j1] += d_ptr [j1] - min;
335337 }
336338
337339 // Reset row and column assignments along the alternating path.
338340 do {
339- i = predecessor [endofpath];
341+ i = pred_ptr [endofpath];
340342 colsol[endofpath] = i;
341343 j1 = endofpath;
342344 endofpath = rowsol[i];
0 commit comments