Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -442,34 +442,64 @@ GidNetworkState RdmaContext::findBestGidIndex(const std::string &device_name,
gid_index = -1;
int i;
struct ibv_gid_entry gid_entry;
bool fallback_found = false;
int fallback_ipv4_gid_without_network = -1;
int fallback_ipv6_gid_with_network = -1;
int fallback_ipv6_gid_without_network = -1;
GidNetworkState state = GidNetworkState::GID_NOT_FOUND;

for (i = 0; i < port_attr.gid_tbl_len; i++) {
if (ibv_query_gid_ex(context, port, i, &gid_entry, 0)) {
PLOG(ERROR) << "Failed to query GID " << i << " on " << device_name
<< "/" << port;
continue; // if gid is invalid ibv_query_gid_ex() will return !0
// Reached end of valid GID indices
break;
}

if (gid_entry.gid_type != IBV_GID_TYPE_ROCE_V2 &&
gid_entry.gid_type != IBV_GID_TYPE_IB) {
continue;
}

if ((ipv6_addr_v4mapped((struct in6_addr *)gid_entry.gid.raw) &&
gid_entry.gid_type == IBV_GID_TYPE_ROCE_V2) ||
gid_entry.gid_type == IBV_GID_TYPE_IB) {
// Check if this GID has an associated network device
if (hasNetworkDevice(device_name, port, i)) {
// Found a GID with network device, this is the best choice
const bool is_ipv4_gid =
gid_entry.gid_type == IBV_GID_TYPE_ROCE_V2 &&
ipv6_addr_v4mapped((struct in6_addr *)gid_entry.gid.raw);
const bool has_network_device = hasNetworkDevice(device_name, port, i);

if (is_ipv4_gid) {
if (has_network_device) {
gid_index = i;
state = GidNetworkState::GID_WITH_NETWORK;
break;
return GidNetworkState::GID_WITH_NETWORK;
}
// No network device, keep the first one as fallback candidate
if (!fallback_found) {
if (fallback_ipv4_gid_without_network < 0) {
gid_index = i;
fallback_found = true;
fallback_ipv4_gid_without_network = i;
state = GidNetworkState::GID_WITHOUT_NETWORK;
}
continue;
}

if (has_network_device && fallback_ipv6_gid_with_network < 0) {
fallback_ipv6_gid_with_network = i;
}

if (!has_network_device && fallback_ipv6_gid_without_network < 0) {
fallback_ipv6_gid_without_network = i;
}
}

if (fallback_ipv4_gid_without_network >= 0) {
gid_index = fallback_ipv4_gid_without_network;
return GidNetworkState::GID_WITHOUT_NETWORK;
}

if (fallback_ipv6_gid_with_network >= 0) {
gid_index = fallback_ipv6_gid_with_network;
return GidNetworkState::GID_WITH_NETWORK;
}

if (fallback_ipv6_gid_without_network >= 0) {
gid_index = fallback_ipv6_gid_without_network;
return GidNetworkState::GID_WITHOUT_NETWORK;
}

return state;
}

Expand Down Expand Up @@ -584,16 +614,16 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,
}
} else {
// Also check network state for user-specified GID
if (!hasNetworkDevice(device_name, port, gid_index)) {
bool has_ndev = hasNetworkDevice(device_name, port, gid_index);
if (!has_ndev) {
LOG(WARNING) << "User-specified GID index " << gid_index
<< " on " << device_name << "/" << port
<< " has no associated network device, "
<< "may not be optimal for RDMA operations";
goto cleanup_context_and_devices;
}
LOG(INFO) << "Using user-specified GID index: " << gid_index
<< " on " << device_name << "/" << port
<< " (with network device)";
<< " on " << device_name << "/" << port << " ("
<< (has_ndev ? "with" : "without") << " network device)";
}

// Continue with GID validation
Expand Down
Loading