Skip to content

Commit 3c2f6d5

Browse files
committed
Protect openib_btl->device data with explicit opal_mitex locks.
1 parent e06bffe commit 3c2f6d5

File tree

1 file changed

+19
-16
lines changed

1 file changed

+19
-16
lines changed

opal/mca/btl/openib/btl_openib.c

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq)
210210
return OPAL_ERROR;
211211
}
212212

213-
OPAL_THREAD_LOCK(&device->device_lock);
214213
if (!device->progress) {
215214
int rc;
216215
device->progress = true;
@@ -219,7 +218,6 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq)
219218
return rc;
220219
}
221220
}
222-
OPAL_THREAD_UNLOCK(&device->device_lock);
223221
#endif
224222
}
225223
#ifdef HAVE_IBV_RESIZE_CQ
@@ -406,7 +404,7 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
406404
return OPAL_SUCCESS;
407405
}
408406

409-
static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
407+
static int mca_btl_openib_size_queues_nolock(struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
410408
{
411409
uint32_t send_cqes, recv_cqes;
412410
int rc = OPAL_SUCCESS, qp;
@@ -603,7 +601,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
603601
return OPAL_SUCCESS;
604602
}
605603

606-
static int prepare_device_for_use (mca_btl_openib_device_t *device)
604+
static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device)
607605
{
608606
mca_btl_openib_frag_init_data_t *init_data;
609607
int rc, length;
@@ -920,7 +918,12 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open
920918
return OPAL_ERROR;
921919
}
922920

921+
/* protect device because several endpoints for different ib_proc's
922+
* may be simultaneously initialized */
923+
opal_mutex_lock(&openib_btl->device->device_lock);
923924
endpoint->index = opal_pointer_array_add(openib_btl->device->endpoints, (void*)endpoint);
925+
opal_mutex_unlock(&openib_btl->device->device_lock);
926+
924927
if( 0 > endpoint->index ) {
925928
OBJ_RELEASE(endpoint);
926929
return OPAL_ERROR;
@@ -981,21 +984,21 @@ int mca_btl_openib_add_procs(
981984
#endif
982985

983986
/* protect the device */
984-
opal_mutex_lock(&mca_btl_openib_component.ib_lock);
985-
rc = prepare_device_for_use (openib_btl->device);
987+
opal_mutex_lock(&openib_btl->device->device_lock);
988+
rc = prepare_device_for_use_nolock (openib_btl->device);
986989
if (OPAL_SUCCESS != rc) {
987990
BTL_ERROR(("could not prepare openib device for use"));
988-
opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
991+
opal_mutex_unlock(&openib_btl->device->device_lock);
989992
return rc;
990993
}
991994

992-
rc = mca_btl_openib_size_queues(openib_btl, nprocs);
995+
rc = mca_btl_openib_size_queues_nolock(openib_btl, nprocs);
993996
if (OPAL_SUCCESS != rc) {
994997
BTL_ERROR(("error creating cqs"));
995-
opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
998+
opal_mutex_unlock(&openib_btl->device->device_lock);
996999
return rc;
9971000
}
998-
opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
1001+
opal_mutex_unlock(&openib_btl->device->device_lock);
9991002

10001003
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
10011004
struct opal_proc_t* proc = procs[i];
@@ -1075,21 +1078,21 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
10751078

10761079
// TODO: shift to the separate function
10771080
/* protect the device */
1078-
opal_mutex_lock(&mca_btl_openib_component.ib_lock);
1079-
rc = prepare_device_for_use (openib_btl->device);
1081+
opal_mutex_lock(&openib_btl->device->device_lock);
1082+
rc = prepare_device_for_use_nolock (openib_btl->device);
10801083
if (OPAL_SUCCESS != rc) {
10811084
BTL_ERROR(("could not prepare openib device for use"));
1082-
opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
1085+
opal_mutex_unlock(&openib_btl->device->device_lock);
10831086
return NULL;
10841087
}
10851088

1086-
rc = mca_btl_openib_size_queues(openib_btl, 1);
1089+
rc = mca_btl_openib_size_queues_nolock(openib_btl, 1);
10871090
if (OPAL_SUCCESS != rc) {
10881091
BTL_ERROR(("error creating cqs"));
1089-
opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
1092+
opal_mutex_unlock(&openib_btl->device->device_lock);
10901093
return NULL;
10911094
}
1092-
opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
1095+
opal_mutex_unlock(&openib_btl->device->device_lock);
10931096

10941097

10951098
if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc, &is_new))) {

0 commit comments

Comments
 (0)