Skip to content

Commit 2c348b1

Browse files
committed
ofi: follow user specified include/exclude list to select providers
This PR addresses #12233 Since 5.0.x, we introduced an optional FI_HMEM capability in ofi provider selection logic(both mtl and btl) in order to support accelerator memory. As described in the issue, this introduced a bug that can cause the wrong ofi provider to be selected, even if the user explicitly includes/excludes the provider name. This change refactors the selection logic to correctly handle the include/exclude list, and therefore fixes the bug. Signed-off-by: Wenduo Wang <[email protected]>
1 parent 5fa32f7 commit 2c348b1

File tree

4 files changed

+119
-22
lines changed

4 files changed

+119
-22
lines changed

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,12 +764,19 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
764764
"%s:%d: fi_getinfo(): %s\n",
765765
__FILE__, __LINE__, fi_strerror(-ret));
766766

767-
if (FI_ENODATA == -ret) {
767+
if ((FI_ENODATA == -ret)
768+
|| (include_list && 0 == opal_common_ofi_count_providers_in_list(providers, include_list))
769+
|| (!include_list && exclude_list
770+
&& opal_common_ofi_providers_subset_of_list(providers, exclude_list))) {
768771
#if defined(FI_HMEM)
769772
/* Attempt selecting a provider without FI_HMEM hints */
770773
if (hints->caps & FI_HMEM) {
771774
hints->caps &= ~FI_HMEM;
772775
hints->domain_attr->mr_mode &= ~FI_MR_HMEM;
776+
if (providers) {
777+
(void) fi_freeinfo(providers);
778+
providers = NULL;
779+
}
773780
goto no_hmem;
774781
}
775782
#endif

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
260260
int rc;
261261
uint64_t progress_mode;
262262
unsigned resource_count = 0;
263-
struct mca_btl_base_module_t **base_modules;
263+
struct mca_btl_base_module_t **base_modules = NULL;
264264
char **include_list = NULL, **exclude_list = NULL;
265265

266266
BTL_VERBOSE(("initializing ofi btl"));
@@ -275,7 +275,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
275275
return NULL;
276276
}
277277

278-
struct fi_info *info, *info_list, *selected_info;
278+
struct fi_info *info, *info_list = NULL, *selected_info = NULL;
279279
struct fi_info hints = {0};
280280
struct fi_ep_attr ep_attr = {0};
281281
struct fi_rx_attr rx_attr = {0};
@@ -366,22 +366,30 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
366366
* The earliest version the explictly allow provider to call CUDA API is 1.18 */
367367
rc = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, &hints, &info_list);
368368
if (FI_ENOSYS == -rc) {
369-
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
369+
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
370370
}
371-
if (0 != rc) {
371+
372+
if ((FI_ENODATA == -rc)
373+
|| (include_list && 0 == opal_common_ofi_count_providers_in_list(info_list, include_list))
374+
|| (!include_list && exclude_list
375+
&& opal_common_ofi_providers_subset_of_list(info_list, exclude_list))) {
372376
#if defined(FI_HMEM)
377+
/* Attempt selecting a provider without FI_HMEM hints */
373378
if (hints.caps & FI_HMEM) {
374-
/* Try again without FI_HMEM hints */
375379
hints.caps &= ~FI_HMEM;
376380
hints.domain_attr->mr_mode &= ~FI_MR_HMEM;
381+
if (info_list) {
382+
(void) fi_freeinfo(info_list);
383+
info_list = NULL;
384+
}
377385
goto no_hmem;
378386
}
379387
#endif
388+
/* It is not an error if no information is returned. */
389+
goto out;
390+
} else if (0 != rc) {
380391
BTL_VERBOSE(("fi_getinfo failed with code %d: %s", rc, fi_strerror(-rc)));
381-
if (NULL != include_list) {
382-
opal_argv_free(include_list);
383-
}
384-
return NULL;
392+
goto out;
385393
}
386394

387395
#if defined(FI_HMEM)
@@ -441,16 +449,15 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
441449
info = info->next;
442450
}
443451

444-
/* We are done with the returned info. */
445-
fi_freeinfo(info_list);
446-
if (NULL != include_list) {
447-
opal_argv_free(include_list);
452+
if (NULL == info) {
453+
BTL_VERBOSE(("No provider is selected"));
454+
goto out;
448455
}
449456

450457
/* pass module array back to caller */
451458
base_modules = calloc(mca_btl_ofi_component.module_count, sizeof(*base_modules));
452459
if (NULL == base_modules) {
453-
return NULL;
460+
goto out;
454461
}
455462

456463
memcpy(base_modules, mca_btl_ofi_component.modules,
@@ -461,6 +468,17 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
461468

462469
*num_btl_modules = mca_btl_ofi_component.module_count;
463470

471+
out:
472+
if (include_list) {
473+
opal_argv_free(include_list);
474+
}
475+
if (exclude_list) {
476+
opal_argv_free(exclude_list);
477+
}
478+
if (info_list) {
479+
(void) fi_freeinfo(info_list);
480+
}
481+
464482
return base_modules;
465483
}
466484

opal/mca/common/ofi/common_ofi.c

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ static int opal_common_ofi_init_ref_cnt = 0;
4949
static bool opal_common_ofi_installed_memory_monitor = false;
5050
#endif
5151

52+
/* Count providers returns the number of providers present in an fi_info list
53+
* @param (IN) provider_list struct fi_info* list of providers available
54+
*
55+
* @param (OUT) int number of providers present in the list
56+
*
57+
* returns 0 if the list is NULL
58+
*/
59+
static int count_providers(struct fi_info *provider_list);
60+
5261
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
5362

5463
/*
@@ -272,6 +281,44 @@ int opal_common_ofi_is_in_list(char **list, char *item)
272281
return 0;
273282
}
274283

284+
int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list, char **list)
285+
{
286+
int count = 0, matched = 0;
287+
struct fi_info *prov = provider_list, *prev_prov = NULL;
288+
char *name;
289+
290+
while (prov) {
291+
name = prov->fabric_attr->prov_name;
292+
if (prev_prov && !strncasecmp(prev_prov->fabric_attr->prov_name, name, strlen(name))) {
293+
/**
294+
* Providers are usually sorted by name. We can reuse the previous matching result and
295+
* avoid the potentially expensive list traversal.
296+
*/
297+
count += matched;
298+
} else if (opal_common_ofi_is_in_list(list, prov->fabric_attr->prov_name)) {
299+
matched = 1;
300+
++count;
301+
} else {
302+
matched = 0;
303+
}
304+
prev_prov = prov;
305+
prov = prov->next;
306+
}
307+
308+
return count;
309+
}
310+
311+
int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list, char **list)
312+
{
313+
int num_prov = count_providers(provider_list);
314+
315+
if (!num_prov) {
316+
return 1;
317+
}
318+
319+
return num_prov == opal_common_ofi_count_providers_in_list(provider_list, list);
320+
}
321+
275322
int opal_common_ofi_mca_register(const mca_base_component_t *component)
276323
{
277324
static int include_index = -1;
@@ -737,13 +784,6 @@ static struct fi_info *select_provider_round_robin(struct fi_info *provider_list
737784
return current_provider;
738785
}
739786

740-
/* Count providers returns the number of providers present in an fi_info list
741-
* @param (IN) provider_list struct fi_info* list of providers available
742-
*
743-
* @param (OUT) int number of providers present in the list
744-
*
745-
* returns 0 if the list is NULL
746-
*/
747787
static int count_providers(struct fi_info *provider_list)
748788
{
749789
struct fi_info *dev = provider_list;

opal/mca/common/ofi/common_ofi.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,38 @@ OPAL_DECLSPEC int opal_common_ofi_export_memory_monitor(void);
100100
*/
101101
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
102102

103+
/**
104+
* Get the number of providers whose names are included in a list
105+
*
106+
* This function takes a list of providers and a list of name strings
107+
* as inputs, and return the number of providers whose names are included
108+
* in the name strings.
109+
*
110+
* @param provider_list (IN) List of providers
111+
* @param list (IN) List of name string
112+
*
113+
* @return Number of matched providers
114+
*
115+
*/
116+
OPAL_DECLSPEC int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list,
117+
char **list);
118+
119+
/**
120+
* Determine whether all providers are included in a list
121+
*
122+
* This function takes a list of providers and a list of name strings
123+
* as inputs, and return whether all provider names are included in the name strings.
124+
*
125+
* @param provider_list (IN) List of providers
126+
* @param list (IN) List of name string
127+
*
128+
* @return 0 At least one provider's name is not included in the name strings.
129+
* @return 1 All provider names are included in the name strings.
130+
*
131+
*/
132+
OPAL_DECLSPEC int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list,
133+
char **list);
134+
103135
/**
104136
* Selects NIC (provider) based on hardware locality
105137
*

0 commit comments

Comments
 (0)