11
11
* All rights reserved.
12
12
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
13
13
* Copyright (c) 2014 Intel, Inc. All rights reserved
14
+ * Copyright (c) 2016 IBM Corporation. All rights reserved.
14
15
* $COPYRIGHT$
15
16
*
16
17
* Additional copyrights may follow
38
39
#include "orte/util/show_help.h"
39
40
40
41
#include "orte/mca/ras/base/ras_private.h"
42
+ #include "orte/mca/ras/base/base.h"
41
43
#include "ras_lsf.h"
42
44
43
45
@@ -98,6 +100,8 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
98
100
if (NULL != node && 0 == strcmp (nodelist [i ], node -> name )) {
99
101
/* it is a repeat - just bump the slot count */
100
102
++ node -> slots ;
103
+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
104
+ "ras/lsf: +++ Node (%s) [slots=%d]" , node -> name , node -> slots );
101
105
continue ;
102
106
}
103
107
@@ -107,7 +111,11 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
107
111
node -> slots_inuse = 0 ;
108
112
node -> slots_max = 0 ;
109
113
node -> slots = 1 ;
114
+ node -> state = ORTE_NODE_STATE_UP ;
110
115
opal_list_append (nodes , & node -> super );
116
+
117
+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
118
+ "ras/lsf: New Node (%s) [slots=%d]" , node -> name , node -> slots );
111
119
}
112
120
113
121
/* release the nodelist from lsf */
@@ -141,14 +149,20 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
141
149
if (!OPAL_BINDING_POLICY_IS_SET (opal_hwloc_binding_policy )) {
142
150
OPAL_SET_BINDING_POLICY (opal_hwloc_binding_policy , OPAL_BIND_TO_HWTHREAD );
143
151
}
144
- /* get the apps and set the hostfile attribute in each to point to
145
- * the hostfile */
146
- for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
147
- if (NULL == (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
148
- continue ;
149
- }
150
- orte_set_attribute (& app -> attributes , ORTE_APP_HOSTFILE , true, (void * )affinity_file , OPAL_STRING );
152
+ /*
153
+ * Do not set the hostfile attribute on each app_context since that
154
+ * would confuse the sequential mapper when it tries to assign bindings
155
+ * when running an MPMD job.
156
+ * Instead just overwrite the orte_default_hostfile so it will be
157
+ * general for all of the app_contexts.
158
+ */
159
+ if ( NULL != orte_default_hostfile ) {
160
+ free (orte_default_hostfile );
161
+ orte_default_hostfile = NULL ;
151
162
}
163
+ orte_default_hostfile = strdup (affinity_file );
164
+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
165
+ "ras/lsf: Set default_hostfile to %s" ,orte_default_hostfile );
152
166
153
167
return ORTE_SUCCESS ;
154
168
}
0 commit comments