Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
easyblock = 'ConfigureMake'

name = 'UCC-CUDA'
version = '1.1.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://www.openucx.org/'
description = """UCC (Unified Collective Communication) is a collective
communication operations API and library that is flexible, complete, and
feature-rich for current and emerging programming models and runtimes.

This module adds the UCC CUDA support.
"""

toolchain = {'name': 'GCCcore', 'version': '12.2.0'}
toolchainopts = {'pic': True}

source_urls = ['https://github.com/openucx/ucc/archive/refs/tags']
sources = ['v%(version)s.tar.gz']
patches = [
'%(name)s-1.0.0_link_against_existing_UCC_libs.patch',
'%(name)s-%(version)s_cuda_12_mem_ops.patch',
]
checksums = [
{'v1.1.0.tar.gz': '74c8ba75037b5bd88cb703e8c8ae55639af3fecfd4428912a433c010c97b4df7'},
{'UCC-CUDA-1.0.0_link_against_existing_UCC_libs.patch':
'9fa11cf6779174f4e9048df5812096e4261e1769d465cc7f34a6354398876856'},
{'UCC-CUDA-1.1.0_cuda_12_mem_ops.patch': 'fc3ea1487d29dc626db2363ef5a79e7f0906f6a7507a363fa6167a812b143eb6'},
]

builddependencies = [
('binutils', '2.39'),
('Autotools', '20220317'),
]

dependencies = [
('UCC', '1.1.0'),
('CUDA', '12.0.0', '', SYSTEM),
('UCX-CUDA', '1.13.1', '-CUDA-%(cudaver)s'),
('NCCL', '2.16.2', '-CUDA-%(cudaver)s'),
]

preconfigopts = "./autogen.sh && "

buildopts = '-C src/components/mc/cuda V=1 && make -C src/components/tl/nccl V=1'
installopts = '-C src/components/mc/cuda && make -C src/components/tl/nccl install'

sanity_check_paths = {
'files': ['lib/ucc/libucc_mc_cuda.%s' % SHLIB_EXT, 'lib/ucc/libucc_tl_nccl.%s' % SHLIB_EXT],
'dirs': ['lib']
}

sanity_check_commands = ["ucc_info -c"]

modextrapaths = {'EB_UCC_EXTRA_COMPONENT_PATH': 'lib/ucc'}

moduleclass = 'lib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
Backported fix for CUDA 12 https://github.com/openucx/ucc/pull/700
Essentially just removes the deprecated checks for CUDA MEM OPS as they are required in CUDA 12
author: [email protected]
--- src/components/ec/cuda/ec_cuda.c.orig 2023-02-02 18:44:36.085221084 +0000
+++ src/components/ec/cuda/ec_cuda.c 2023-02-02 18:47:23.726819030 +0000
@@ -205,11 +205,10 @@
{
ucc_ec_cuda_config_t *cfg = EC_CUDA_CONFIG;
ucc_status_t status;
- int device, num_devices, attr;
+ int device, num_devices;
CUdevice cu_dev;
CUresult cu_st;
cudaError_t cuda_st;
- const char *cu_err_st_str;

ucc_ec_cuda.stream = NULL;
ucc_ec_cuda.stream_initialized = 0;
@@ -272,9 +271,14 @@
} else {
ucc_ec_cuda.strm_task_mode = UCC_EC_CUDA_TASK_MEM_OPS;
ucc_ec_cuda.post_strm_task = ucc_ec_cuda_post_driver_stream_task;
+#if CUDA_VERSION < 12000
+ CUresult cu_st;
+ CUdevice cu_dev;
+ int attr;

cu_st = cuCtxGetDevice(&cu_dev);
if (cu_st != CUDA_SUCCESS){
+ const char *cu_err_st_str;
cuGetErrorString(cu_st, &cu_err_st_str);
ec_debug(&ucc_ec_cuda.super, "cuCtxGetDevice() failed: %s",
cu_err_st_str);
@@ -297,6 +301,7 @@
"CUDA MEM OPS are not supported or disabled");
return UCC_ERR_NOT_SUPPORTED;
}
+#endif
}
ucc_ec_cuda.task_strm_type = cfg->task_strm_type;
ucc_spinlock_init(&ucc_ec_cuda.init_spinlock, 0);
--- src/components/tl/nccl/tl_nccl_context.c.orig 2023-02-03 15:17:09.358881676 +0000
+++ src/components/tl/nccl/tl_nccl_context.c 2023-02-03 17:04:31.680185749 +0000
@@ -101,13 +101,14 @@
ucc_derived_of(config, ucc_tl_nccl_context_config_t);
int mem_ops_attr = 0;
ucc_status_t status;
- CUresult cu_st;
- CUdevice cu_dev;

UCC_CLASS_CALL_SUPER_INIT(ucc_tl_context_t, &tl_nccl_config->super,
params->context);
memcpy(&self->cfg, tl_nccl_config, sizeof(*tl_nccl_config));
if (self->cfg.sync_type != UCC_TL_NCCL_COMPLETION_SYNC_TYPE_EVENT) {
+#if CUDA_VERSION < 12000
+ CUresult cu_st;
+ CUdevice cu_dev;
cu_st = cuCtxGetDevice(&cu_dev);
if (cu_st == CUDA_SUCCESS) {
cu_st = cuDeviceGetAttribute(&mem_ops_attr,
@@ -116,6 +117,9 @@
} else {
tl_info(self->super.super.lib, "failed to get cuda device");
}
+#else
+ mem_ops_attr = 1;
+#endif
if (mem_ops_attr == 0) {
if (self->cfg.sync_type == UCC_TL_NCCL_COMPLETION_SYNC_TYPE_MEMOPS) {
tl_error(self->super.super.lib, "memops not supported");
--- config/m4/cuda.m4.orig 2023-02-03 17:04:44.367155175 +0000
+++ config/m4/cuda.m4 2023-02-03 17:06:26.110909987 +0000
@@ -15,6 +15,11 @@
ARCH10="-gencode=arch=compute_75,code=sm_75"
ARCH11="-gencode=arch=compute_80,code=sm_80 \
-gencode=arch=compute_80,code=compute_80"
+ARCH111="-gencode=arch=compute_86,code=sm_86 \
+-gencode=arch=compute_86,code=compute_86"
+ARCH120="-gencode=arch=compute_90,code=sm_90 \
+-gencode=arch=compute_90,code=compute_90"
+

AC_DEFUN([CHECK_CUDA],[
AS_IF([test "x$cuda_checked" != "xyes"],
@@ -104,8 +109,12 @@
[NVCC_CFLAGS="$NVCC_CFLAGS -O3 -g -DNDEBUG"])
AS_IF([test "x$cuda_happy" = "xyes"],
[AS_IF([test "x$with_nvcc_gencode" = "xdefault"],
- [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
- [NVCC_ARCH="${ARCH8} ${ARCH9} ${ARCH10} ${ARCH11}"])],
+ [AS_IF([test $CUDA_MAJOR_VERSION -eq 12],
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111} ${ARCH120}"],
+ [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
+ [AS_IF([test $CUDA_MINOR_VERSION -lt 1],
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])])],
[NVCC_ARCH="$with_nvcc_gencode"])
AC_SUBST([NVCC_ARCH], ["$NVCC_ARCH"])])
LDFLAGS="$save_LDFLAGS"
6 changes: 5 additions & 1 deletion easybuild/easyconfigs/u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ toolchainopts = {'pic': True}

source_urls = ['https://github.com/openucx/ucc/archive/refs/tags']
sources = ['v%(version)s.tar.gz']
checksums = ['74c8ba75037b5bd88cb703e8c8ae55639af3fecfd4428912a433c010c97b4df7']
patches = ['UCC-%(version)s-multiple_component_paths.patch']
checksums = [
{'v1.1.0.tar.gz': '74c8ba75037b5bd88cb703e8c8ae55639af3fecfd4428912a433c010c97b4df7'},
{'UCC-1.1.0-multiple_component_paths.patch': '3081d0f694331daa4a88a0fa3fb54b9a918015248ae5eb7b3157b924abd31bee'},
]

builddependencies = [
('binutils', '2.39'),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
Adds support for multiple extra components paths to UCC
Was necessary to add, as they broke the logic we used for UCC_COMPONEN_PATH in 1.1.0,
then removed it completely in 1.2.0.
This patch also removes the need to copy over all the other plugins, thus supporting multiple external components.

author: [email protected]

diff -ru ucc-1.1.0.orig/src/utils/ucc_component.c ucc-1.1.0.fixed/src/utils/ucc_component.c
--- ucc-1.1.0.orig/src/utils/ucc_component.c 2023-04-06 15:16:18.341729973 +0200
+++ ucc-1.1.0.fixed/src/utils/ucc_component.c 2023-04-08 13:42:28.448319486 +0200
@@ -128,6 +128,27 @@
return UCC_ERR_INVALID_PARAM;
}

+ char *extra_component_path = getenv("EB_UCC_EXTRA_COMPONENT_PATH");
+ if (extra_component_path) {
+ // Add extra room for extra paths, braces and comma (+3):
+ pattern_size =
+ strlen(ucc_global_config.component_path) + strlen(framework_name) + 16 + strlen(extra_component_path) + 3;
+ full_pattern = (char *)ucc_malloc(pattern_size, "full_pattern");
+ if (!full_pattern) {
+ ucc_error("failed to allocate %zd bytes for full_pattern",
+ pattern_size);
+ return UCC_ERR_NO_MEMORY;
+ }
+ ucc_snprintf_safe(full_pattern, pattern_size, "{%s,%s}/libucc_%s_*.so",
+ extra_component_path, ucc_global_config.component_path, framework_name);
+ // Replace ; typically used in PATHs variables with , for glob
+ char *current_pos = strchr(full_pattern, ';');
+ while (current_pos) {
+ *current_pos = ',';
+ current_pos = strchr(current_pos, ';');
+ }
+ glob(full_pattern, GLOB_BRACE, NULL, &globbuf);
+ } else {
pattern_size =
strlen(ucc_global_config.component_path) + strlen(framework_name) + 16;
full_pattern = (char *)ucc_malloc(pattern_size, "full_pattern");
@@ -139,6 +160,7 @@
ucc_snprintf_safe(full_pattern, pattern_size, "%s/libucc_%s_*.so",
ucc_global_config.component_path, framework_name);
glob(full_pattern, 0, NULL, &globbuf);
+ }
ucc_free(full_pattern);
n_loaded = 0;