Description
In blas_memory_alloc, there are too many LOCK_COMMAND. And I check the code,
2570 do {
2571 if (!memory[position].used && (memory[position].pos == mypos)) {
2572 LOCK_COMMAND(&alloc_lock);
2573 /* blas_lock(&memory[position].lock);/
2574
2575 if (!memory[position].used) goto allocation;
2576
2577 UNLOCK_COMMAND(&alloc_lock);
2578 / blas_unlock(&memory[position].lock);*/
2579 }
2580
2581 position ++;
2582
2583 } while (position < NUM_BUFFERS);
2584
2588 position = 0;
2589
2590 do {
2591 /* if (!memory[position].used) { /
2592 LOCK_COMMAND(&alloc_lock);
2593 / blas_lock(&memory[position].lock);/
2594
2595 if (!memory[position].used) goto allocation;
2596
2597 UNLOCK_COMMAND(&alloc_lock);
2598 / blas_unlock(&memory[position].lock);/
2599 / } */
2600
2601 position ++;
2602
2603 } while (position < NUM_BUFFERS);
One atomic opertion if (!memory[position].used) need one LOCK/UNLOCK. And why not we move LOCK/UNLOCK outside of loop?
One malloc require many times LOCK/UNLOCK operations, this will lead to memery alloc very low efficient.
Below modify can pass with 5 threads test.
LOCK_COMMAND(&alloc_lock);
2570 do {
2571 if (!memory[position].used && (memory[position].pos == mypos)) {
2572
2573 /* blas_lock(&memory[position].lock);/
2574
2575 if (!memory[position].used) goto allocation;
2576
2577
2578 / blas_unlock(&memory[position].lock);*/
2579 }
2580
2581 position ++;
2582
2583 } while (position < NUM_BUFFERS);
2584 UNLOCK_COMMAND(&alloc_lock);
2588 position = 0;
2589 LOCK_COMMAND(&alloc_lock);
2590 do {
2591 /* if (!memory[position].used) { /
2592
2593 / blas_lock(&memory[position].lock);/
2594
2595 if (!memory[position].used) goto allocation;
2596
2597 UNLOCK_COMMAND(&alloc_lock);
2598 / blas_unlock(&memory[position].lock);/
2599 / } */
2600
2601 position ++;
2602
2603 } while (position < NUM_BUFFERS);
UNLOCK_COMMAND(&alloc_lock);