Skip to content

ofi: follow user specified include/exclude list to select providers #12234

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ompi/mca/mtl/ofi/mtl_ofi_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -764,12 +764,20 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
"%s:%d: fi_getinfo(): %s\n",
__FILE__, __LINE__, fi_strerror(-ret));

if (FI_ENODATA == -ret) {
if ((FI_ENODATA == -ret)
|| (0 == ret && include_list
&& 0 == opal_common_ofi_count_providers_in_list(providers, include_list))
|| (0 == ret && !include_list && exclude_list
&& opal_common_ofi_providers_subset_of_list(providers, exclude_list))) {
#if defined(FI_HMEM)
/* Attempt selecting a provider without FI_HMEM hints */
if (hints->caps & FI_HMEM) {
hints->caps &= ~FI_HMEM;
hints->domain_attr->mr_mode &= ~FI_MR_HMEM;
if (providers) {
(void) fi_freeinfo(providers);
providers = NULL;
}
goto no_hmem;
}
#endif
Expand Down
51 changes: 35 additions & 16 deletions opal/mca/btl/ofi/btl_ofi_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
#include "btl_ofi_frag.h"
#include "btl_ofi_rdma.h"

#define MCA_BTL_OFI_ONE_SIDED_REQUIRED_CAPS (FI_RMA | FI_ATOMIC)
#define MCA_BTL_OFI_TWO_SIDED_REQUIRED_CAPS (FI_MSG)
#define MCA_BTL_OFI_ONE_SIDED_REQUIRED_CAPS (FI_RMA | FI_ATOMIC | FI_REMOTE_COMM)
#define MCA_BTL_OFI_TWO_SIDED_REQUIRED_CAPS (FI_MSG | FI_REMOTE_COMM)

#define MCA_BTL_OFI_REQUESTED_MR_MODE (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR | FI_MR_ENDPOINT)

Expand Down Expand Up @@ -260,7 +260,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
int rc;
uint64_t progress_mode;
unsigned resource_count = 0;
struct mca_btl_base_module_t **base_modules;
struct mca_btl_base_module_t **base_modules = NULL;
char **include_list = NULL, **exclude_list = NULL;

BTL_VERBOSE(("initializing ofi btl"));
Expand All @@ -275,7 +275,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
return NULL;
}

struct fi_info *info, *info_list, *selected_info;
struct fi_info *info, *info_list = NULL, *selected_info = NULL;
struct fi_info hints = {0};
struct fi_ep_attr ep_attr = {0};
struct fi_rx_attr rx_attr = {0};
Expand Down Expand Up @@ -366,22 +366,31 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
* The earliest version the explictly allow provider to call CUDA API is 1.18 */
rc = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, &hints, &info_list);
if (FI_ENOSYS == -rc) {
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
}
if (0 != rc) {

if ((FI_ENODATA == -rc)
|| (0 == rc && include_list
&& 0 == opal_common_ofi_count_providers_in_list(info_list, include_list))
|| (0 == rc && !include_list && exclude_list
&& opal_common_ofi_providers_subset_of_list(info_list, exclude_list))) {
#if defined(FI_HMEM)
/* Attempt selecting a provider without FI_HMEM hints */
if (hints.caps & FI_HMEM) {
/* Try again without FI_HMEM hints */
hints.caps &= ~FI_HMEM;
hints.domain_attr->mr_mode &= ~FI_MR_HMEM;
if (info_list) {
(void) fi_freeinfo(info_list);
info_list = NULL;
}
goto no_hmem;
}
#endif
/* It is not an error if no information is returned. */
goto out;
} else if (0 != rc) {
BTL_VERBOSE(("fi_getinfo failed with code %d: %s", rc, fi_strerror(-rc)));
if (NULL != include_list) {
opal_argv_free(include_list);
}
return NULL;
goto out;
}

#if defined(FI_HMEM)
Expand Down Expand Up @@ -441,16 +450,15 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
info = info->next;
}

/* We are done with the returned info. */
fi_freeinfo(info_list);
if (NULL != include_list) {
opal_argv_free(include_list);
if (NULL == info) {
BTL_VERBOSE(("No provider is selected"));
goto out;
}

/* pass module array back to caller */
base_modules = calloc(mca_btl_ofi_component.module_count, sizeof(*base_modules));
if (NULL == base_modules) {
return NULL;
goto out;
}

memcpy(base_modules, mca_btl_ofi_component.modules,
Expand All @@ -461,6 +469,17 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,

*num_btl_modules = mca_btl_ofi_component.module_count;

out:
if (include_list) {
opal_argv_free(include_list);
}
if (exclude_list) {
opal_argv_free(exclude_list);
}
if (info_list) {
(void) fi_freeinfo(info_list);
}

return base_modules;
}

Expand Down
54 changes: 47 additions & 7 deletions opal/mca/common/ofi/common_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ static int opal_common_ofi_init_ref_cnt = 0;
static bool opal_common_ofi_installed_memory_monitor = false;
#endif

/* Count providers returns the number of providers present in an fi_info list
* @param (IN) provider_list struct fi_info* list of providers available
*
* @param (OUT) int number of providers present in the list
*
* returns 0 if the list is NULL
*/
static int count_providers(struct fi_info *provider_list);

#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR

/*
Expand Down Expand Up @@ -272,6 +281,44 @@ int opal_common_ofi_is_in_list(char **list, char *item)
return 0;
}

int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list, char **list)
{
int count = 0, matched = 0;
struct fi_info *prov = provider_list, *prev_prov = NULL;
char *name;

while (prov) {
name = prov->fabric_attr->prov_name;
if (prev_prov && !strncasecmp(prev_prov->fabric_attr->prov_name, name, strlen(name))) {
/**
* Providers are usually sorted by name. We can reuse the previous matching result and
* avoid the potentially expensive list traversal.
*/
count += matched;
} else if (opal_common_ofi_is_in_list(list, prov->fabric_attr->prov_name)) {
matched = 1;
++count;
} else {
matched = 0;
}
prev_prov = prov;
prov = prov->next;
}

return count;
}

int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list, char **list)
{
int num_prov = count_providers(provider_list);

if (!num_prov) {
return 1;
}

return num_prov == opal_common_ofi_count_providers_in_list(provider_list, list);
}

int opal_common_ofi_mca_register(const mca_base_component_t *component)
{
static int include_index = -1;
Expand Down Expand Up @@ -737,13 +784,6 @@ static struct fi_info *select_provider_round_robin(struct fi_info *provider_list
return current_provider;
}

/* Count providers returns the number of providers present in an fi_info list
* @param (IN) provider_list struct fi_info* list of providers available
*
* @param (OUT) int number of providers present in the list
*
* returns 0 if the list is NULL
*/
static int count_providers(struct fi_info *provider_list)
{
struct fi_info *dev = provider_list;
Expand Down
32 changes: 32 additions & 0 deletions opal/mca/common/ofi/common_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,38 @@ OPAL_DECLSPEC int opal_common_ofi_export_memory_monitor(void);
*/
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);

/**
* Get the number of providers whose names are included in a list
*
* This function takes a list of providers and a list of name strings
* as inputs, and return the number of providers whose names are included
* in the name strings.
*
* @param provider_list (IN) List of providers
* @param list (IN) List of name string
*
* @return Number of matched providers
*
*/
OPAL_DECLSPEC int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list,
char **list);

/**
* Determine whether all providers are included in a list
*
* This function takes a list of providers and a list of name strings
* as inputs, and return whether all provider names are included in the name strings.
*
* @param provider_list (IN) List of providers
* @param list (IN) List of name string
*
* @return 0 At least one provider's name is not included in the name strings.
* @return 1 All provider names are included in the name strings.
*
*/
OPAL_DECLSPEC int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list,
char **list);

/**
* Selects NIC (provider) based on hardware locality
*
Expand Down