11
11
* All rights reserved.
12
12
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
13
13
* Copyright (c) 2014 Intel, Inc. All rights reserved
14
+ * Copyright (c) 2016 IBM Corporation. All rights reserved.
14
15
* $COPYRIGHT$
15
16
*
16
17
* Additional copyrights may follow
38
39
#include "orte/util/show_help.h"
39
40
40
41
#include "orte/mca/ras/base/ras_private.h"
42
+ #include "orte/mca/ras/base/base.h"
41
43
#include "ras_lsf.h"
42
44
43
45
@@ -98,6 +100,8 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
98
100
if (NULL != node && 0 == strcmp (nodelist [i ], node -> name )) {
99
101
/* it is a repeat - just bump the slot count */
100
102
++ node -> slots ;
103
+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
104
+ "ras/lsf: +++ Node (%s) [slots=%d]" , node -> name , node -> slots );
101
105
continue ;
102
106
}
103
107
@@ -109,6 +113,9 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
109
113
node -> slots = 1 ;
110
114
node -> state = ORTE_NODE_STATE_UP ;
111
115
opal_list_append (nodes , & node -> super );
116
+
117
+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
118
+ "ras/lsf: New Node (%s) [slots=%d]" , node -> name , node -> slots );
112
119
}
113
120
114
121
/* release the nodelist from lsf */
@@ -142,14 +149,20 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
142
149
if (!OPAL_BINDING_POLICY_IS_SET (opal_hwloc_binding_policy )) {
143
150
OPAL_SET_BINDING_POLICY (opal_hwloc_binding_policy , OPAL_BIND_TO_HWTHREAD );
144
151
}
145
- /* get the apps and set the hostfile attribute in each to point to
146
- * the hostfile */
147
- for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
148
- if (NULL == (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
149
- continue ;
150
- }
151
- orte_set_attribute (& app -> attributes , ORTE_APP_HOSTFILE , true, (void * )affinity_file , OPAL_STRING );
152
+ /*
153
+ * Do not set the hostfile attribute on each app_context since that
154
+ * would confuse the sequential mapper when it tries to assign bindings
155
+ * when running an MPMD job.
156
+ * Instead just overwrite the orte_default_hostfile so it will be
157
+ * general for all of the app_contexts.
158
+ */
159
+ if ( NULL != orte_default_hostfile ) {
160
+ free (orte_default_hostfile );
161
+ orte_default_hostfile = NULL ;
152
162
}
163
+ orte_default_hostfile = strdup (affinity_file );
164
+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
165
+ "ras/lsf: Set default_hostfile to %s" ,orte_default_hostfile );
153
166
154
167
return ORTE_SUCCESS ;
155
168
}
0 commit comments