Skip to content

Commit 75e726c

Browse files
committed
ofi: follow user specified include/exclude list to select providers
This PR addresses #12233 Since 5.0.x, we introduced an optional FI_HMEM capability in ofi provider selection logic(both mtl and btl) in order to support accelerator memory. As described in the issue, this introduced a bug that can cause the wrong ofi provider to be selected, even if the user explicitly includes/excludes the provider name. This change refactors the selection logic to correctly handle the include/exclude list, and therefore fixes the bug. Signed-off-by: Wenduo Wang <[email protected]> (cherry picked from commit 29efcef)
1 parent a2dee36 commit 75e726c

File tree

4 files changed

+121
-22
lines changed

4 files changed

+121
-22
lines changed

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,12 +764,20 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
764764
"%s:%d: fi_getinfo(): %s\n",
765765
__FILE__, __LINE__, fi_strerror(-ret));
766766

767-
if (FI_ENODATA == -ret) {
767+
if ((FI_ENODATA == -ret)
768+
|| (0 == ret && include_list
769+
&& 0 == opal_common_ofi_count_providers_in_list(providers, include_list))
770+
|| (0 == ret && !include_list && exclude_list
771+
&& opal_common_ofi_providers_subset_of_list(providers, exclude_list))) {
768772
#if defined(FI_HMEM)
769773
/* Attempt selecting a provider without FI_HMEM hints */
770774
if (hints->caps & FI_HMEM) {
771775
hints->caps &= ~FI_HMEM;
772776
hints->domain_attr->mr_mode &= ~FI_MR_HMEM;
777+
if (providers) {
778+
(void) fi_freeinfo(providers);
779+
providers = NULL;
780+
}
773781
goto no_hmem;
774782
}
775783
#endif

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
260260
int rc;
261261
uint64_t progress_mode;
262262
unsigned resource_count = 0;
263-
struct mca_btl_base_module_t **base_modules;
263+
struct mca_btl_base_module_t **base_modules = NULL;
264264
char **include_list = NULL, **exclude_list = NULL;
265265

266266
BTL_VERBOSE(("initializing ofi btl"));
@@ -275,7 +275,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
275275
return NULL;
276276
}
277277

278-
struct fi_info *info, *info_list, *selected_info;
278+
struct fi_info *info, *info_list = NULL, *selected_info = NULL;
279279
struct fi_info hints = {0};
280280
struct fi_ep_attr ep_attr = {0};
281281
struct fi_rx_attr rx_attr = {0};
@@ -366,22 +366,31 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
366366
* The earliest version the explictly allow provider to call CUDA API is 1.18 */
367367
rc = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, &hints, &info_list);
368368
if (FI_ENOSYS == -rc) {
369-
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
369+
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
370370
}
371-
if (0 != rc) {
371+
372+
if ((FI_ENODATA == -rc)
373+
|| (0 == rc && include_list
374+
&& 0 == opal_common_ofi_count_providers_in_list(info_list, include_list))
375+
|| (0 == rc && !include_list && exclude_list
376+
&& opal_common_ofi_providers_subset_of_list(info_list, exclude_list))) {
372377
#if defined(FI_HMEM)
378+
/* Attempt selecting a provider without FI_HMEM hints */
373379
if (hints.caps & FI_HMEM) {
374-
/* Try again without FI_HMEM hints */
375380
hints.caps &= ~FI_HMEM;
376381
hints.domain_attr->mr_mode &= ~FI_MR_HMEM;
382+
if (info_list) {
383+
(void) fi_freeinfo(info_list);
384+
info_list = NULL;
385+
}
377386
goto no_hmem;
378387
}
379388
#endif
389+
/* It is not an error if no information is returned. */
390+
goto out;
391+
} else if (0 != rc) {
380392
BTL_VERBOSE(("fi_getinfo failed with code %d: %s", rc, fi_strerror(-rc)));
381-
if (NULL != include_list) {
382-
opal_argv_free(include_list);
383-
}
384-
return NULL;
393+
goto out;
385394
}
386395

387396
#if defined(FI_HMEM)
@@ -441,16 +450,15 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
441450
info = info->next;
442451
}
443452

444-
/* We are done with the returned info. */
445-
fi_freeinfo(info_list);
446-
if (NULL != include_list) {
447-
opal_argv_free(include_list);
453+
if (NULL == info) {
454+
BTL_VERBOSE(("No provider is selected"));
455+
goto out;
448456
}
449457

450458
/* pass module array back to caller */
451459
base_modules = calloc(mca_btl_ofi_component.module_count, sizeof(*base_modules));
452460
if (NULL == base_modules) {
453-
return NULL;
461+
goto out;
454462
}
455463

456464
memcpy(base_modules, mca_btl_ofi_component.modules,
@@ -461,6 +469,17 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
461469

462470
*num_btl_modules = mca_btl_ofi_component.module_count;
463471

472+
out:
473+
if (include_list) {
474+
opal_argv_free(include_list);
475+
}
476+
if (exclude_list) {
477+
opal_argv_free(exclude_list);
478+
}
479+
if (info_list) {
480+
(void) fi_freeinfo(info_list);
481+
}
482+
464483
return base_modules;
465484
}
466485

opal/mca/common/ofi/common_ofi.c

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ static int opal_common_ofi_init_ref_cnt = 0;
4949
static bool opal_common_ofi_installed_memory_monitor = false;
5050
#endif
5151

52+
/* Count providers returns the number of providers present in an fi_info list
53+
* @param (IN) provider_list struct fi_info* list of providers available
54+
*
55+
* @param (OUT) int number of providers present in the list
56+
*
57+
* returns 0 if the list is NULL
58+
*/
59+
static int count_providers(struct fi_info *provider_list);
60+
5261
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
5362

5463
/*
@@ -272,6 +281,44 @@ int opal_common_ofi_is_in_list(char **list, char *item)
272281
return 0;
273282
}
274283

284+
int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list, char **list)
285+
{
286+
int count = 0, matched = 0;
287+
struct fi_info *prov = provider_list, *prev_prov = NULL;
288+
char *name;
289+
290+
while (prov) {
291+
name = prov->fabric_attr->prov_name;
292+
if (prev_prov && !strncasecmp(prev_prov->fabric_attr->prov_name, name, strlen(name))) {
293+
/**
294+
* Providers are usually sorted by name. We can reuse the previous matching result and
295+
* avoid the potentially expensive list traversal.
296+
*/
297+
count += matched;
298+
} else if (opal_common_ofi_is_in_list(list, prov->fabric_attr->prov_name)) {
299+
matched = 1;
300+
++count;
301+
} else {
302+
matched = 0;
303+
}
304+
prev_prov = prov;
305+
prov = prov->next;
306+
}
307+
308+
return count;
309+
}
310+
311+
int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list, char **list)
312+
{
313+
int num_prov = count_providers(provider_list);
314+
315+
if (!num_prov) {
316+
return 1;
317+
}
318+
319+
return num_prov == opal_common_ofi_count_providers_in_list(provider_list, list);
320+
}
321+
275322
int opal_common_ofi_mca_register(const mca_base_component_t *component)
276323
{
277324
static int include_index = -1;
@@ -682,13 +729,6 @@ static bool is_near(pmix_device_distance_t *distances,
682729
#endif
683730
#endif // OPAL_OFI_PCI_DATA_AVAILABLE
684731

685-
/* Count providers returns the number of providers present in an fi_info list
686-
* @param (IN) provider_list struct fi_info* list of providers available
687-
*
688-
* @param (OUT) int number of providers present in the list
689-
*
690-
* returns 0 if the list is NULL
691-
*/
692732
static int count_providers(struct fi_info *provider_list)
693733
{
694734
struct fi_info *dev = provider_list;

opal/mca/common/ofi/common_ofi.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,38 @@ OPAL_DECLSPEC int opal_common_ofi_export_memory_monitor(void);
100100
*/
101101
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
102102

103+
/**
104+
* Get the number of providers whose names are included in a list
105+
*
106+
* This function takes a list of providers and a list of name strings
107+
* as inputs, and return the number of providers whose names are included
108+
* in the name strings.
109+
*
110+
* @param provider_list (IN) List of providers
111+
* @param list (IN) List of name string
112+
*
113+
* @return Number of matched providers
114+
*
115+
*/
116+
OPAL_DECLSPEC int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list,
117+
char **list);
118+
119+
/**
120+
* Determine whether all providers are included in a list
121+
*
122+
* This function takes a list of providers and a list of name strings
123+
* as inputs, and return whether all provider names are included in the name strings.
124+
*
125+
* @param provider_list (IN) List of providers
126+
* @param list (IN) List of name string
127+
*
128+
* @return 0 At least one provider's name is not included in the name strings.
129+
* @return 1 All provider names are included in the name strings.
130+
*
131+
*/
132+
OPAL_DECLSPEC int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list,
133+
char **list);
134+
103135
/**
104136
* Selects NIC (provider) based on hardware locality
105137
*

0 commit comments

Comments
 (0)