13
13
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
14
14
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
15
15
* reserved.
16
- * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
16
+ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved
17
17
* Copyright (c) 2017 Research Organization for Information Science
18
18
* and Technology (RIST). All rights reserved.
19
19
* $COPYRIGHT$
28
28
#include "opal/mca/event/event.h"
29
29
#include "opal/util/output.h"
30
30
#include "opal/util/show_help.h"
31
+ #include "opal/util/opal_environ.h"
31
32
#include "ompi/proc/proc.h"
32
33
33
34
#include "mtl_psm2.h"
@@ -45,6 +46,10 @@ static int param_priority;
45
46
/* MPI_THREAD_MULTIPLE_SUPPORT */
46
47
opal_mutex_t mtl_psm2_mq_mutex = OPAL_MUTEX_STATIC_INIT ;
47
48
49
+ #if OPAL_CUDA_SUPPORT
50
+ static bool cuda_envvar_set = false;
51
+ #endif
52
+
48
53
static int ompi_mtl_psm2_component_open (void );
49
54
static int ompi_mtl_psm2_component_close (void );
50
55
static int ompi_mtl_psm2_component_query (mca_base_module_t * * module , int * priority );
@@ -201,9 +206,6 @@ static int
201
206
ompi_mtl_psm2_component_register (void )
202
207
{
203
208
int num_local_procs , num_total_procs ;
204
- #if OPAL_CUDA_SUPPORT
205
- char * cuda_env ;
206
- #endif
207
209
208
210
ompi_mtl_psm2 .connect_timeout = 180 ;
209
211
(void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
@@ -228,30 +230,6 @@ ompi_mtl_psm2_component_register(void)
228
230
param_priority = 40 ;
229
231
}
230
232
231
- #if OPAL_CUDA_SUPPORT
232
- /*
233
- * If using CUDA enabled OpenMPI, the user likely intends to
234
- * run with CUDA buffers. So, force-set the envvar here if user failed
235
- * to set it.
236
- */
237
- cuda_env = getenv ("PSM2_CUDA" );
238
- if (!cuda_env ) {
239
- opal_show_help ("help-mtl-psm2.txt" ,
240
- "no psm2 cuda env" , true,
241
- "not set" ,
242
- "Host buffers,\nthere will be a performance penalty"
243
- " due to OMPI force setting this variable now.\n"
244
- "Set environment variable to 0 if using Host buffers" );
245
- setenv ("PSM2_CUDA" , "1" , 0 );
246
- } else if (strcmp (cuda_env , "0" ) == 0 ) {
247
- opal_show_help ("help-mtl-psm2.txt" ,
248
- "no psm2 cuda env" , true,
249
- "set to 0" ,
250
- "CUDA buffers,\nthe execution will SEGFAULT."
251
- " Set environment variable to 1 if using CUDA buffers" );
252
- }
253
- #endif
254
-
255
233
(void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
256
234
"priority" , "Priority of the PSM2 MTL component" ,
257
235
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
@@ -272,17 +250,16 @@ static int
272
250
ompi_mtl_psm2_component_open (void )
273
251
{
274
252
int res ;
275
- glob_t globbuf ;
276
- globbuf .gl_offs = 0 ;
253
+ glob_t globbuf = {0 };
277
254
278
255
/* Component available only if Omni-Path hardware is present */
279
256
res = glob ("/dev/hfi1_[0-9]" , GLOB_DOOFFS , NULL , & globbuf );
280
- if (0 == res || GLOB_NOMATCH == res ) {
257
+ if (globbuf . gl_pathc > 0 ) {
281
258
globfree (& globbuf );
282
259
}
283
260
if (0 != res ) {
284
261
res = glob ("/dev/hfi1_[0-9][0-9]" , GLOB_APPEND , NULL , & globbuf );
285
- if (0 == res || GLOB_NOMATCH == res ) {
262
+ if (globbuf . gl_pathc > 0 ) {
286
263
globfree (& globbuf );
287
264
}
288
265
if (0 != res ) {
@@ -336,6 +313,11 @@ ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority)
336
313
static int
337
314
ompi_mtl_psm2_component_close (void )
338
315
{
316
+ #if OPAL_CUDA_SUPPORT
317
+ if (cuda_envvar_set ) {
318
+ opal_unsetenv ("PSM2_CUDA" , & environ );
319
+ }
320
+ #endif
339
321
return OMPI_SUCCESS ;
340
322
}
341
323
@@ -362,6 +344,11 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
362
344
int verno_major = PSM2_VERNO_MAJOR ;
363
345
int verno_minor = PSM2_VERNO_MINOR ;
364
346
int local_rank = -1 , num_local_procs = 0 ;
347
+ #if OPAL_CUDA_SUPPORT
348
+ int ret ;
349
+ char * cuda_env ;
350
+ glob_t globbuf = {0 };
351
+ #endif
365
352
366
353
/* Compute the total number of processes on this host and our local rank
367
354
* on that node. We need to provide PSM2 with these values so it can
@@ -389,6 +376,27 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
389
376
ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i );
390
377
}
391
378
379
+ #if OPAL_CUDA_SUPPORT
380
+ /*
381
+ * If using CUDA enabled Open MPI, the user likely intends to
382
+ * run with CUDA buffers. So, force-set the envvar here if user failed
383
+ * to set it.
384
+ */
385
+ ret = glob ("/sys/module/nvidia" , GLOB_DOOFFS , NULL , & globbuf );
386
+ if (globbuf .gl_pathc > 0 ) {
387
+ globfree (& globbuf );
388
+ }
389
+
390
+ cuda_env = getenv ("PSM2_CUDA" );
391
+ if (!cuda_env && (0 == ret )) {
392
+ opal_show_help ("help-mtl-psm2.txt" ,
393
+ "no psm2 cuda env" , true,
394
+ ompi_process_info .nodename );
395
+ opal_setenv ("PSM2_CUDA" , "1" , false, & environ );
396
+ cuda_envvar_set = true;
397
+ }
398
+ #endif
399
+
392
400
err = psm2_init (& verno_major , & verno_minor );
393
401
if (err ) {
394
402
opal_show_help ("help-mtl-psm2.txt" ,
0 commit comments