11
11
* Copyright (c) 2004-2005 The Regents of the University of California.
12
12
* All rights reserved.
13
13
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
14
- * Copyright (c) 2012-2015 Los Alamos National Security, LLC.
15
- * All rights reserved.
16
- * Copyright (c) 2013-2016 Intel, Inc. All rights reserved
14
+ * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
15
+ * reserved.
16
+ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved
17
17
* Copyright (c) 2017 Research Organization for Information Science
18
18
* and Technology (RIST). All rights reserved.
19
19
* $COPYRIGHT$
28
28
#include "opal/mca/event/event.h"
29
29
#include "opal/util/output.h"
30
30
#include "opal/util/show_help.h"
31
+ #include "opal/util/opal_environ.h"
31
32
#include "ompi/proc/proc.h"
32
33
33
34
#include "mtl_psm2.h"
43
44
44
45
static int param_priority ;
45
46
47
+ #if OPAL_CUDA_SUPPORT
48
+ static bool cuda_envvar_set = false;
49
+ #endif
50
+
46
51
static int ompi_mtl_psm2_component_open (void );
47
52
static int ompi_mtl_psm2_component_close (void );
48
53
static int ompi_mtl_psm2_component_query (mca_base_module_t * * module , int * priority );
@@ -80,10 +85,6 @@ mca_mtl_psm2_component_t mca_mtl_psm2_component = {
80
85
static int
81
86
ompi_mtl_psm2_component_register (void )
82
87
{
83
- #if OPAL_CUDA_SUPPORT
84
- char * cuda_env ;
85
- #endif
86
-
87
88
ompi_mtl_psm2 .connect_timeout = 180 ;
88
89
(void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
89
90
"connect_timeout" ,
@@ -95,29 +96,6 @@ ompi_mtl_psm2_component_register(void)
95
96
96
97
/* set priority high enough to beat ob1's default (also set higher than psm) */
97
98
param_priority = 40 ;
98
- #if OPAL_CUDA_SUPPORT
99
- /*
100
- * If using CUDA enabled OpenMPI, the user likely intends to
101
- * run with CUDA buffers. So, force-set the envvar here if user failed
102
- * to set it.
103
- */
104
- cuda_env = getenv ("PSM2_CUDA" );
105
- if (!cuda_env ) {
106
- opal_show_help ("help-mtl-psm2.txt" ,
107
- "no psm2 cuda env" , true,
108
- "not set" ,
109
- "Host buffers,\nthere will be a performance penalty"
110
- " due to OMPI force setting this variable now.\n"
111
- "Set environment variable to 0 if using Host buffers" );
112
- setenv ("PSM2_CUDA" , "1" , 0 );
113
- } else if (strcmp (cuda_env , "0" ) == 0 ) {
114
- opal_show_help ("help-mtl-psm2.txt" ,
115
- "no psm2 cuda env" , true,
116
- "set to 0" ,
117
- "CUDA buffers,\nthe execution will SEGFAULT."
118
- " Set environment variable to 1 if using CUDA buffers" );
119
- }
120
- #endif
121
99
122
100
(void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
123
101
"priority" , "Priority of the PSM2 MTL component" ,
@@ -133,17 +111,16 @@ static int
133
111
ompi_mtl_psm2_component_open (void )
134
112
{
135
113
int res ;
136
- glob_t globbuf ;
137
- globbuf .gl_offs = 0 ;
114
+ glob_t globbuf = {0 };
138
115
139
116
/* Component available only if Omni-Path hardware is present */
140
117
res = glob ("/dev/hfi1_[0-9]" , GLOB_DOOFFS , NULL , & globbuf );
141
- if (0 == res || GLOB_NOMATCH == res ) {
118
+ if (globbuf . gl_pathc > 0 ) {
142
119
globfree (& globbuf );
143
120
}
144
121
if (0 != res ) {
145
122
res = glob ("/dev/hfi1_[0-9][0-9]" , GLOB_APPEND , NULL , & globbuf );
146
- if (0 == res || GLOB_NOMATCH == res ) {
123
+ if (globbuf . gl_pathc > 0 ) {
147
124
globfree (& globbuf );
148
125
}
149
126
if (0 != res ) {
@@ -197,6 +174,11 @@ ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority)
197
174
static int
198
175
ompi_mtl_psm2_component_close (void )
199
176
{
177
+ #if OPAL_CUDA_SUPPORT
178
+ if (cuda_envvar_set ) {
179
+ opal_unsetenv ("PSM2_CUDA" , & environ );
180
+ }
181
+ #endif
200
182
return OMPI_SUCCESS ;
201
183
}
202
184
@@ -240,6 +222,11 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
240
222
int verno_minor = PSM2_VERNO_MINOR ;
241
223
int local_rank = -1 , num_local_procs = 0 ;
242
224
int num_total_procs = 0 ;
225
+ #if OPAL_CUDA_SUPPORT
226
+ int ret ;
227
+ char * cuda_env ;
228
+ glob_t globbuf = {0 };
229
+ #endif
243
230
244
231
/* Compute the total number of processes on this host and our local rank
245
232
* on that node. We need to provide PSM2 with these values so it can
@@ -272,6 +259,27 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
272
259
setenv ("PSM2_DEVICES" , "self,shm" , 0 );
273
260
}
274
261
262
+ #if OPAL_CUDA_SUPPORT
263
+ /*
264
+ * If using CUDA enabled Open MPI, the user likely intends to
265
+ * run with CUDA buffers. So, force-set the envvar here if user failed
266
+ * to set it.
267
+ */
268
+ ret = glob ("/sys/module/nvidia" , GLOB_DOOFFS , NULL , & globbuf );
269
+ if (globbuf .gl_pathc > 0 ) {
270
+ globfree (& globbuf );
271
+ }
272
+
273
+ cuda_env = getenv ("PSM2_CUDA" );
274
+ if (!cuda_env && (0 == ret )) {
275
+ opal_show_help ("help-mtl-psm2.txt" ,
276
+ "no psm2 cuda env" , true,
277
+ ompi_process_info .nodename );
278
+ opal_setenv ("PSM2_CUDA" , "1" , false, & environ );
279
+ cuda_envvar_set = true;
280
+ }
281
+ #endif
282
+
275
283
err = psm2_init (& verno_major , & verno_minor );
276
284
if (err ) {
277
285
opal_show_help ("help-mtl-psm2.txt" ,
0 commit comments