diff --git a/.gitignore b/.gitignore index a399e1d70d9..00433ecefaf 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ *.exe *.log *.trs +*.sapp *~ *\\# @@ -300,10 +301,13 @@ opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h opal/mca/installdirs/config/install_dirs.h -opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h -opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h -opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h -opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h.in +opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h +opal/mca/pmix/pmix112/pmix/include/private/autogen/config.h +opal/mca/pmix/pmix112/pmix/include/private/autogen/config.h.in +opal/mca/pmix/pmix120/pmix/include/pmix/autogen/config.h +opal/mca/pmix/pmix120/pmix/include/private/autogen/config.h +opal/mca/pmix/pmix120/pmix/include/private/autogen/config.h.in + opal/tools/opal-checkpoint/opal-checkpoint opal/tools/opal-checkpoint/opal-checkpoint.1 @@ -364,6 +368,7 @@ orte/test/mpi/parallel_w64 orte/test/mpi/pubsub orte/test/mpi/read_write orte/test/mpi/reduce-hang +orte/test/mpi/ring orte/test/mpi/segv orte/test/mpi/simple_spawn orte/test/mpi/slave @@ -590,6 +595,8 @@ test/event/signal-test test/event/event-test test/event/time-test +test/monitoring/monitoring_test + test/mpi/environment/chello test/runtime/parse_context diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000000..3a19ac9dc5b --- /dev/null +++ b/.travis.yml @@ -0,0 +1,81 @@ +# Use "required" for sudo, because we want to use the "trusty" Debian +# distro, which is (currently) only available in the legacy Travis +# infrastructure (i.e., if we put "sudo: false" to use the new container- +# based Travis infrastructure, then "trusty" is not available). We +# need the "trusty" distro because it has more recent versions of the +# GNU Autotools (i.e., autogen.pl will fail if you use the regular +# distro because the GNU Autotools are too old). +sudo: required +dist: trusty +language: c + +# Iterate over 2 different compilers +compiler: + - gcc + - clang + +# Iterate over 2 different OSs +os: + - linux + - osx + +addons: + # For Linux, make sure we have some extra packages that we like to + # build with + apt: + packages: + - autoconf + - automake + - libtool + - libnl-3-200 + - libnl-3-dev + - libnl-route-3-200 + - libnl-route-3-dev + - libibverbs-dev + - librdmacm-dev + sources: + - ubuntu-toolchain-r-test + +env: + global: + - AM_MAKEFLAGS="-j4" + - CPPFLAGS="-I$HOME/bogus/include" + - LDFLAGS="-I$HOME/bogus/lib" + - LD_LIBRARY_PATH="$HOME/bogus/lib" + matrix: + - GCC_VERSION=default + - GCC_VERSION=5 + +# Install dependencies for the verbs and usnic providers. Open MPI is +# not currently using the verbs provider in Libfabric, so we might as +# well not build it. +before_install: + - if [[ "GCC_VERSION" == "5" ]]; then COMPILERS="CC=gcc-5 CXX=g++-5 FC=gfortran-5"; fi + - export CONFIGURE_ARGS="--prefix=$HOME/bogus $COMPILERS" DISTCHECK_CONFIGURE_FLAGS="$CONFIGURE_ARGS" + - export DISTCHECK_CONFIGURE_FLAGS="$CONFIGURE_ARGS" + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then git clone https://github.com/ofiwg/libfabric.git ; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$GCC_VERSION" == "5" ]] ; then sudo apt-get --assume-yes install gcc-5 g++-5 gfortran-5; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then cd libfabric && ./autogen.sh && ./configure --prefix=$HOME/bogus --enable-usnic --disable-verbs $COMPILERS && make install && cd .. ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$GCC_VERSION" == "5" ]] ; then brew update; brew unlink gcc ; brew install gcc; fi + +# Note that we use "make -k" to do the entire build, even if there was a +# build error in there somewhere. This prevents us from needing to submit +# to Travis, see the first error, fix that first error, submit again, ...etc. +install: + - m4 --version + - autoconf --version + - automake --version + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then libtool --version; else glibtool --version; fi + - ./autogen.pl + - if [[ "$TRAVIS_OS_NAME" == "linux" && "$CC" == "gcc" ]]; then ./configure $CONFIGURE_ARGS --with-libfabric=$HOME/bogus --with-usnic --with-verbs; else ./configure $CONFIGURE_ARGS; fi + - make -k + +# We only need to distcheck on one OS / compiler combination (this is just +# a minor optimization to make the overall set of builds faster). +script: + - if [[ "$TRAVIS_OS_NAME" == "linux" && "$CC" == "gcc" ]]; then make distcheck; else make check; fi + +matrix: + exclude: + - env: GCC_VERSION=5 + compiler: clang diff --git a/AUTHORS b/AUTHORS index b2ef110ec9b..309526e7d93 100644 --- a/AUTHORS +++ b/AUTHORS @@ -40,7 +40,7 @@ ethan.mallove@oracle.com Ethan Mallove Sun, Oracle eugene.loh@oracle.com Eugene Loh Sun, Oracle gabriel@cs.uh.edu Edgar Gabriel HLRS, UH, UTK gef@icl.utk.edu Graham Fagg UTK -gilles.gouaillardet@iferc.org Gilles Gouaillardet RIST +gilles@rist.or.jp Gilles Gouaillardet RIST gingery@lanl.gov Ginger Young LANL gleb@voltaire.com Gleb Natapov Voltaire gshipman@lanl.gov Galen Shipman LANL diff --git a/NEWS b/NEWS index 28ba5005b94..ec800b4c4ec 100644 --- a/NEWS +++ b/NEWS @@ -67,6 +67,161 @@ Master (not on release branches yet) Please consider Score-P (score-p.org) as an external replacement. +2.0.0 +------ + + ********************************************************************** + * Open MPI is now fully MPI-3.1 compliant + ********************************************************************** + + +- Enhancements to reduce the memory footprint for jobs at scale. + A new MCA parameter - mpi_add_procs_cutoff is available to set + the threshold for using this feature. +- Enhancements to MPI RMA. Open MPI now maps MPI RMA operations on + to native RMA operations for those networks which support this + capability. +- Add support for PMIx - Process Management Interface for Exascale. + Version 1.1.2 of PMIx is included internally in this release. +- Add support for PLFS file systems in Open MPI I/O. +- Add support for UCX transport. +- Improved support for MPI_THREAD_MULTIPLE. +- Update Java interfaces to MPI-3.1 +- Simplify build process for Cray XC systems. Add support for + using native SLURM. +- Update the internal copy of ROMIO to that which shipped in MPICH + 3.1.4. +- Update internal copy of libevent to v2.0.22. +- Update internal copy of hwloc to v1.11.1. +- Removed checkpoint/restart code due to loss of maintainer. :-( +- Add a new MCA parameter - opal_common_verbs_want_fork_support. This + replaces the btl_openib_want_fork_support parameter. +- Add a -tune command line option to simplify setting many environment + variables and mca parameters. +- Removed --enable-mpi-profiling configure option. +- Add --with-platform-patches-dir configure option. +- Add a new mca parameter default-dash-to-host to offer a mirror of + the default_hostfile. +- Add a --with-pmi-libdir configure option for environments that install + PMI libs in a non-default location. +- Add the ability to specify the number of desired slots in the --host + option. +- Remove support for OS X Leopard. +- Remove support for Cray XT systems. +- Remove VampirTrace. +- Remove support for Myrinet/MX. +- Remove legacy collective module:ML. +- Remove support for Alpha processors. +- Numerous fixes/improvements to orte-dvm. Special thanks to Mark + Santcroos for his help. +- Silence a compiler warning in PSM MTL. Thanks to Adrian Reber for + reporting this. +- Add missing Fortran bindings for MPI_Win_allocate. Thanks to Christoph + Niethammer for reporting and fixing. +- Replace use of alloca with malloc for certain datatype creation + functions. Thanks to Bogdan Sataric for reporting this. +- Fix use of MPI_LB and MPI_UB in creation of of certain MPI datatypes. + Thanks to Gus Correa for helping to fix this. +- Implement a workaround for a libtool problem. Thanks to Eric + Schnetter for reporting and fixing. +- Improve hcoll library detection in configure. Thanks to David + Shrader and Ake Sandgren for reporting this. +- Fix problems with XRC detection in OFED 3.12 and older releases. + Thanks to Paul Hargrove for his analysis of this problem. +- Fix an issue with MCA parameters for Java bindings. Thanks to + Takahiro Kawashima and Siegmar Gross for reporting this issue. +- Fix a performance problem for large messages for Cray XC systems. + Thanks to Jerome Vienne for reporting this. +- Fix an issue with MPI_Win_lock_all. Thanks to Thomas Jahns for + reporting. +- Fix an issue with passing a parameter to configure multiple times. + Thanks to QuesarVII for reporting and supplying a fix. +- Add support for ALPS resource allocation system on Cray CLE 5.2 and + later. Thanks to Mark Santcroos. +- Correction to HACKING file. Thanks to Maximilien Levesque. +- Fix an issue with user supplied reduction operator functions. + Thanks to Rupert Nash for reporting this. +- Fix an issue with an internal list management function. Thanks to + Adrian Reber for reporting this. +- Fix a problem with MPI-RMA PSCW epochs. Thanks to Berk Hess for + reporting this. +- Fix a problem in neighbor collectives. Thanks to Lisandro Dalcin + for reporting. +- Fix a problem when using persistent requests in the Java bindings. + Thanks to Nate Chambers for reporting. +- Add additional flavors to MPI_Comm_split_type. Thanks to Nick + Andersen for supplying this enhancement. +- Improve closing of file descriptors during job launch phase. Thanks + to Piotr Lesnicki for reporting and providing this enhancement. +- Fix a problem in MPI_Get_accumulate and MPI_Rget_accumulate when + using Portals4. Thanks to Nicolas Chevalier for reporting. +- Use correct include file for lstat prototype in ROMIO. Thanks to + William Throwe for finding and providing a fix. +- Fortran related fixes to handle Intel 2016 compiler. Thanks to + Fabrice Roy for reporting this. +- Fix an issue with use of DL-related macros in opal library. Thanks to + Scott Atchley for finding this. +- Fix a Fortran linkage issue. Thanks to Macro Atzeri for finding and + suggesting a fix. +- Fix an issue with parsing mpirun command line options which contain + colons. Thanks to Lev Given for reporting. +- Fix a problem with Open MPI's package config files. Thanks to + Christoph Junghans for reporting. +- Fix a typo in the MPI_Intercomm_merge man page. Thanks To Harald + Servat for reporting and correcting. +- Update man pages for non-blocking sends per MPI 3.1 standard. + Thanks to Alexander Pozdneev for reporting. + + +1.10.2 +------ + + ********************************************************************** + * OSHMEM is now 1.2 compliant + ********************************************************************** + +- Fix NBC_Copy for legitimate zero-size messages +- Fix multiple bugs in OSHMEM +- Correctly handle mpirun --host @ +- Centralize two MCA params to avoid duplication between OMPI and + OSHMEM layers: opal_abort_delay and opal_abort_print_stack +- Add support for Fujitsu compilers +- Add UCX support for OMPI and OSHMEM +- Correctly handle oversubscription when not given directives + to permit it. Thanks to @ammore1 for reporting it +- Fix rpm spec file to not include the /usr directory +- Add Intel HFI1 default parameters for the openib BTL +- Resolve symbol conflicts in the PSM2 library +- Add ability to empty the rgpusm cache when full if requested +- Fix another libtool bug when -L requires a space between it + and the path. Thanks to Eric Schnetter for the patch. +- Add support for OSHMEM v1.2 APIs +- Improve efficiency of oshmem_preconnect_all algorithm +- Fix bug in buffered sends support +- Fix double free in edge case of mpirun. Thanks to @jsharpe for + the patch +- Multiple one-sided support fixes +- Fix integer overflow in the tuned "reduce" collective when + using buffers larger than INT_MAX in size +- Fix parse of user environment variables in mpirun. Thanks to + Stefano Garzarella for the patch +- Performance improvements in PSM2 support +- Fix NBS iBarrier for inter-communicators +- Fix bug in vader BTL during finalize +- Improved configure support for Fortran compilers +- Fix rank_file mapper to support default --slot-set. Thanks + to Matt Thompson for reporting it +- Update MPI_Testsome man page. Thanks to Eric Schnetter for + the suggestion +- Fix missing resize of the returned type for subarray and + darray types. Thanks to Keith Bennett and Dan Garmann for + reporting it +- Fix Java support on OSX 10.11. Thanks to Alexander Daryin + for reporting the problem +- Fix some compilation issues on Solaris 11.2. Thanks to + Paul Hargrove for his continued help in such areas + + 1.10.1 ------ @@ -118,6 +273,12 @@ Master (not on release branches yet) to our attention. - Fix linking issues on some platforms (e.g., SLES 12). - Fix hang on some corner cases when MPI applications abort. +- Add missing options to mpirun man page. Thanks to Daniel Letai + for bringing this to our attention. +- Add new --with-platform-patches-dir configure option +- Adjust relative selection priorities to ensure that MTL + support is favored over BTL support when both are available +- Use CUDA IPC for all sized messages for performance 1.10.0 @@ -126,7 +287,11 @@ Master (not on release branches yet) ** version numbering scheme. The v1.10.x release series is based on ** the v1.8.x series, but with a few new features. v2.x will be the ** next series after the v1.10.x series, and complete the transition -** to the new version numbering scheme. See README for more details. +** to the new version numbering scheme. See README for more details +** on the new versioning scheme. +** +** NOTE: In accordance with OMPI version numbering, the v1.10 is *not* +** API compatible with the v1.8 release series. - Added libfabric support (see README for more details): - usNIC BTL updated to use libfabric. diff --git a/README b/README index 6883d1f6e6b..5b0bd929efb 100644 --- a/README +++ b/README @@ -8,7 +8,7 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2007 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. Copyright (c) 2007 Myricom, Inc. All rights reserved. @@ -952,6 +952,17 @@ NETWORKING SUPPORT / OPTIONS which covers most cases. This option is only needed for special configurations. +--with-verbs-usnic + This option will activate support in Open MPI for disabling a + dire-sounding warning message from libibverbs that Cisco usNIC + devices are not supported (because Cisco usNIC devices are supported + through libfabric, not libibverbs). This libibverbs warning can + also be suppressed by installing the "no op" libusnic_verbs plugin + for libibverbs (see https://github.com/cisco/libusnic_verbs, or + download binaries from cisco.com). This option is disabled by + default because it causes libopen-pal.so to depend on libibverbs.so, + which is undesirable to many downstream packagers. + --with-usnic Abort configure if Cisco usNIC support cannot be built. @@ -1561,7 +1572,6 @@ Here's how we apply those rules specifically to Open MPI: * libmpi_cxx * libmpi_java * liboshmem - * liboshmem_java =========================================================================== diff --git a/VERSION b/VERSION index 4b7b0735d2c..d99e1439457 100644 --- a/VERSION +++ b/VERSION @@ -91,7 +91,6 @@ libopen_rte_so_version=0:0:0 libopen_pal_so_version=0:0:0 libmpi_java_so_version=0:0:0 liboshmem_so_version=0:0:0 -liboshmem_java_so_version=0:0:0 # "Common" components install standalone libraries that are run-time # linked by one or more components. So they need to be versioned as @@ -99,14 +98,13 @@ liboshmem_java_so_version=0:0:0 # components-don't-affect-the-build-system abstraction. # OMPI layer -libmca_common_cuda_so_version=0:0:0 -libmca_common_ofacm_so_version=0:0:0 -libmca_common_sm_so_version=0:0:0 -libmca_common_ugni_so_version=0:0:0 -libmca_common_verbs_so_version=0:0:0 + +# ORTE layer +libmca_orte_common_alps_so_version=0:0:0 # OPAL layer +libmca_opal_common_cuda_so_version=0:0:0 libmca_opal_common_libfabric_so_version=0:0:0 - -# ORTE layer -libmca_common_alps_so_version=0:0:0 +libmca_opal_common_sm_so_version=0:0:0 +libmca_opal_common_ugni_so_version=0:0:0 +libmca_opal_common_verbs_so_version=0:0:0 diff --git a/autogen.pl b/autogen.pl index ffe034fb7a1..0cedf66e013 100755 --- a/autogen.pl +++ b/autogen.pl @@ -54,6 +54,7 @@ my $platform_arg = 0; my $include_arg = 0; my $exclude_arg = 0; +my $force_arg = 0; # Include/exclude lists my $include_list; @@ -1020,6 +1021,24 @@ sub patch_autotools_output { unlink("configure.patched"); } +sub in_tarball { + my $tarball = 0; + open(IN, "VERSION") || my_die "Can't open VERSION"; + # If repo_rev is not an empty string, we are in a tarball + while () { + my $line = $_; + my @fields = split(/=/,$line); + if ($fields[0] eq "repo_rev") { + if ($fields[1] ne "\n") { + $tarball = 1; + last; + } + } + } + close(IN); + return $tarball; +} + ############################################################################## ############################################################################## ## main - do the real work... @@ -1037,6 +1056,7 @@ sub patch_autotools_output { "platform=s" => \$platform_arg, "include=s" => \$include_arg, "exclude=s" => \$exclude_arg, + "force|f" => \$force_arg, ); if (!$ok || $help_arg) { @@ -1056,7 +1076,9 @@ sub patch_autotools_output { will be ignored and only those specified will be marked to build --exclude | -e Comma-separated list of framework or framework-component - to be excluded from the build\n"; + to be excluded from the build + --force | -f Run even if invoked from the source tree of an expanded + distribution tarball\n"; my_exit($ok ? 0 : 1); } @@ -1120,6 +1142,11 @@ sub patch_autotools_output { my_die "Not at the root directory of an OMPI source tree" if (! -f "config/opal_try_assemble.m4"); +my_die "autogen.pl has been invoked in the source tree of an Open MPI distribution tarball; aborting... +You likely do not need to invoke \"autogen.pl\" -- you can probably run \"configure\" directly. +If you really know what you are doing, and really need to run autogen.pl, use the \"--force\" flag." + if (!$force_arg && in_tarball()); + # Now that we've verified that we're in the top-level OMPI directory, # set the sentinel file to remove if we abort. $sentinel = Cwd::cwd() . "/configure"; diff --git a/config/find_common_syms b/config/find_common_syms index 0841a4dfe29..0a67ffc4bd2 100755 --- a/config/find_common_syms +++ b/config/find_common_syms @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -69,13 +69,14 @@ if (0 != system("command -v nm >/dev/null 2>&1")) { # subdirectories. That way a whitelisted symbol in one component doesn't # "shadow" a symbol that should not be whitelisted in another component. If we # find this is actually a problem in practice then we can write a v2 update. -my @wl_files = `find '${top_srcdir}' -name 'common_sym_whitelist'`; +my @wl_files = `find '${top_srcdir}' -name 'common_sym_whitelist.txt'`; foreach my $wl_file (@wl_files) { chomp $wl_file; my @lines = `cat $wl_file`; foreach my $line (@lines) { chomp $line; - next if ($line =~ m/^\s*#/); # skip comments + next if ($line =~ /^\s*#/); # skip comments + next if ($line =~ /^\s*$/); # skip blank lines push @sym_whitelist, $line; } } @@ -127,7 +128,7 @@ sub is_whitelisted { my $line = shift; foreach my $wl_sym (@sym_whitelist) { - if ($line =~ m/\b\Q$wl_sym\E\b/) { + if ($line =~ m/\b_?\Q$wl_sym\E\b/) { return 1; } } @@ -135,7 +136,7 @@ sub is_whitelisted { # Look for symbol names ending in one or more underscores and assume they # are "Fortran-shaped". This won't match the hex output from most nm's and # shouldn't match the single characters that indicate symbol type. - if ($line =~ m/\b[A-Za-z_]+[A-Za-z0-9_]*_+\b/) { + if ($line =~ m/\b_?[A-Za-z_]+[A-Za-z0-9_]*_+\b/) { return 1; } diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index b22a5c4d0ac..d27fe3bf390 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -61,6 +61,30 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ [$ompi_check_lustre_dir], [$ompi_check_lustre_libdir], [ompi_check_lustre_happy="yes"], [ompi_check_lustre_happy="no"]) + AC_MSG_CHECKING([for required lustre data structures]) + cat > conftest.c < - #include ]], - [[PVFS_util_resolve(NULL,NULL,NULL,0);]])], - [AC_MSG_RESULT([yes]) - ompi_check_pvfs2_happy="yes"], - [AC_MSG_RESULT([no]) - ompi_check_pvfs2_happy="no"])], - [ompi_check_pvfs2_happy="no"]) - ]) - - - LDFLAGS="$check_pvfs2_save_LDFLAGS" - CPPFLAGS="$check_pvfs2_save_CPPFLAGS" - LIBS="$check_pvfs2_save_LIBS" AS_IF([test "$ompi_check_pvfs2_happy" = "yes"], [$2], [AS_IF([test ! -z "$with_pvfs2" && test "$with_pvfs2" != "no"], [echo PVFS2 support not found]) $3]) - with_pvfs2="$temp_with_pvfs2" - with_pvfs2_libs="$temp_with_pvfs2_libs" - ]) diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index 3a7b8035cdc..86db9de3a78 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -1,6 +1,8 @@ # -*- shell-script -*- # -# Copyright (C) Mellanox Technologies Ltd. 2015. ALL RIGHTS RESERVED. +# Copyright (C) 2015 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,12 +30,12 @@ AC_DEFUN([OMPI_CHECK_UCX],[ ompi_check_ucx_$1_save_LIBS="$LIBS" AS_IF([test "$with_ucx" != "no"], - [AS_IF([test ! -z "$with_ucx" -a "$with_ucx" != "yes"], + [AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "yes"], [ ompi_check_ucx_dir="$with_ucx" ompi_check_ucx_libdir="$with_ucx/lib" ]) - AS_IF([test ! -z "$with_ucx_libdir" -a "$with_ucx_libdir" != "yes"], + AS_IF([test ! -z "$with_ucx_libdir" && test "$with_ucx_libdir" != "yes"], [ompi_check_ucx_libdir="$with_ucx_libdir"]) ompi_check_ucx_extra_libs="-L$ompi_check_ucx_libdir" @@ -73,7 +75,7 @@ AC_DEFUN([OMPI_CHECK_UCX],[ AS_IF([test "$ompi_check_ucx_happy" = "yes"], [$2], - [AS_IF([test ! -z "$with_ucx" -a "$with_ucx" != "no"], + [AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "no"], [AC_MSG_ERROR([UCX support requested but not found. Aborting])]) $3]) ]) diff --git a/config/ompi_fortran_check_real16_c_equiv.m4 b/config/ompi_fortran_check_real16_c_equiv.m4 index df32dbdabb4..447d2ce1fe7 100644 --- a/config/ompi_fortran_check_real16_c_equiv.m4 +++ b/config/ompi_fortran_check_real16_c_equiv.m4 @@ -57,7 +57,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_REAL16_C_EQUIV],[ [CFLAGS="$CFLAGS_save" AC_MSG_RESULT([does not work])]) ]) - AS_IF([test "$opal_cv_c_compiler_vendor" = "gnu" -a "$ac_cv_type___float128" = "yes"], + AS_IF([test "$opal_cv_c_compiler_vendor" = "gnu" && test "$ac_cv_type___float128" = "yes"], [AC_MSG_CHECKING([if gnu compiler __float128 == REAL*16]) OPAL_UNIQ([CFLAGS]) OMPI_FORTRAN_CHECK_REAL16_EQUIV_TYPE([__float128], [q]) diff --git a/config/ompi_fortran_check_use_only.m4 b/config/ompi_fortran_check_use_only.m4 index 3a9d419b360..c147167391d 100644 --- a/config/ompi_fortran_check_use_only.m4 +++ b/config/ompi_fortran_check_use_only.m4 @@ -11,6 +11,8 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -32,30 +34,42 @@ dnl [action if not supported]) dnl ---------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_USE_ONLY],[ AS_VAR_PUSHDEF([use_only_var], [ompi_cv_fortran_use_only]) + OPAL_VAR_SCOPE_PUSH([FCFLAGS_save]) + FCFLAGS_save=$FCFLAGS + FCFLAGS="-I. $FCFLAGS" AC_CACHE_CHECK([if Fortran compiler supports USE...ONLY], use_only_var, [AC_LANG_PUSH([Fortran]) - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[MODULE aaa -INTEGER :: aaa_unique -COMMON/common_to_both/COMMON_NAME_TO_BOTH + cat > aaa.f90 << EOF +MODULE aaa +INTEGER :: CMON(1) +COMMON/CMMON/CMON +INTEGER :: global_aaa END MODULE aaa - +EOF + cat > bbb.f90 << EOF MODULE bbb -INTEGER :: bbb_unique -INTEGER, BIND(C, name="common_to_both_") :: COMMON_NAME_TO_BOTH +integer, bind(C, name="cmmon_") :: CMON +INTEGER :: global_bbb END MODULE bbb - -PROGRAM test_proc - USE :: aaa, ONLY: aaa_unique - USE :: bbb, ONLY: bbb_unique +EOF + OPAL_LOG_COMMAND([$FC $FCFLAGS -c aaa.f90], + [OPAL_LOG_COMMAND([$FC $FCFLAGS -c bbb.f90], + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([[PROGRAM test +USE aaa, ONLY : global_aaa +USE bbb, ONLY : global_bbb +implicit none END PROGRAM]])], - [AS_VAR_SET(use_only_var, yes)], - [AS_VAR_SET(use_only_var, no)]) - touch conftest_foo.mod - rm -rf *.mod 2>/dev/null + [AS_VAR_SET(use_only_var, yes)], + [AS_VAR_SET(use_only_var, no)])], + [AS_VAR_SET(use_only_var, no)])], + [AS_VAR_SET(use_only_var, no)]) + rm -rf aaa.f90 aaa.o bbb.f90 bbb.o *.mod 2>/dev/null AC_LANG_POP([Fortran]) ]) AS_VAR_IF(use_only_var, [yes], [$1], [$2]) + FCFLAGS=$FCFLAGS_save + OPAL_VAR_SCOPE_POP AS_VAR_POPDEF([use_only_var])dnl ]) diff --git a/config/ompi_setup_java.m4 b/config/ompi_setup_java.m4 index bf707a89b17..aeacf40907b 100644 --- a/config/ompi_setup_java.m4 +++ b/config/ompi_setup_java.m4 @@ -74,6 +74,9 @@ AC_DEFUN([OMPI_SETUP_JAVA_BINDINGS],[ # header file needs this file, so we need to check for # it/include it in our sources when compiling on Mac). AC_CHECK_HEADERS([TargetConditionals.h]) + + # dladdr and Dl_info are required to build the full path to libmpi on OS X 10.11 aka El Capitan + AC_CHECK_TYPES([Dl_info], [], [], [[#include ]]) else AC_MSG_RESULT([no]) WANT_MPI_JAVA_SUPPORT=0 diff --git a/config/opal_check_cray_xpmem.m4 b/config/opal_check_cray_xpmem.m4 index 4dca2f62a98..f489cab3417 100644 --- a/config/opal_check_cray_xpmem.m4 +++ b/config/opal_check_cray_xpmem.m4 @@ -14,7 +14,7 @@ # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2014 Research Organization for Information Science +# Copyright (c) 2014-2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ # @@ -38,7 +38,7 @@ AC_DEFUN([OPAL_CHECK_CRAY_XPMEM],[ AS_IF([test "$with_cray_xpmem" = "no"], [AC_MSG_RESULT([no]) $3], - [AS_IF([test "$with_cray_xpmem" = "auto" -o "$with_cray_xpmem" = "yes"], + [AS_IF([test "$with_cray_xpmem" = "auto" || test "$with_cray_xpmem" = "yes"], [PKG_CHECK_MODULES_STATIC([CRAY_XPMEM], [cray-xpmem], [opal_check_cray_xpmem_happy="yes"], [opal_check_cray_xpmem_happy="no"] @@ -49,7 +49,7 @@ AC_DEFUN([OPAL_CHECK_CRAY_XPMEM],[ []) ]) - AS_IF([test "$opal_check_cray_xpmem_happy" = "yes" -a "$enable_static" = "yes"], + AS_IF([test "$opal_check_cray_xpmem_happy" = "yes" && test "$enable_static" = "yes"], [CRAY_XPMEM_LIBS = $CRAY_XPMEM_STATIC_LIBS],[]) AS_IF([test "$opal_check_cray_xpmem_happy" = "yes"], diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4 index 7040f5c515b..11456ef905e 100644 --- a/config/opal_check_cuda.m4 +++ b/config/opal_check_cuda.m4 @@ -16,7 +16,7 @@ dnl Copyright (c) 2009 IBM Corporation. All rights reserved. dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl @@ -79,10 +79,13 @@ dnl common framework, and likely configured first). So we have to dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4 dnl macro, below). :-( -# If we have CUDA support, check to see if we have CUDA 4.1 support -AS_IF([test "$opal_check_cuda_happy"="yes"], - AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0], - [#include <$opal_cuda_incdir/cuda.h>]), +# We require CUDA IPC support which started in CUDA 4.1. Error +# out if the support is not there. +AS_IF([test "$opal_check_cuda_happy" = "yes"], + [AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], + [], + [AC_MSG_ERROR([Cannot continue because CUDA 4.1 or later is required])], + [#include <$opal_cuda_incdir/cuda.h>])], []) # If we have CUDA support, check to see if we have support for SYNC_MEMOPS @@ -125,10 +128,6 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT, [Whether we want cuda device pointer support]) -AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"]) -AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41, - [Whether we have CUDA 4.1 support available]) - AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS, [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available]) diff --git a/config/opal_check_openfabrics.m4 b/config/opal_check_openfabrics.m4 index dafb9ca0a04..3145f6f3214 100644 --- a/config/opal_check_openfabrics.m4 +++ b/config/opal_check_openfabrics.m4 @@ -387,6 +387,23 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[ fi ])dnl +AC_DEFUN([OPAL_CHECK_EXP_VERBS],[ + OPAL_VAR_SCOPE_PUSH([have_struct_ibv_exp_send_wr]) + + AC_MSG_CHECKING([whether expanded verbs are available]) + AC_TRY_COMPILE([#include ], [struct ibv_exp_send_wr;], + [have_struct_ibv_exp_send_wr=1 + AC_MSG_RESULT([yes])], + [have_struct_ibv_exp_send_wr=0 + AC_MSG_RESULT([no])]) + + AC_DEFINE_UNQUOTED([HAVE_EXP_VERBS], [$have_struct_ibv_exp_send_wr], [Experimental verbs]) + AC_CHECK_DECLS([IBV_EXP_ATOMIC_HCA_REPLY_BE, IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY, ibv_exp_create_qp, ibv_exp_query_device, IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG], + [], [], [#include ]) + AS_IF([test '$have_struct_ibv_exp_send_wr' = 1], [$1], [$2]) + OPAL_VAR_SCOPE_POP +])dnl + AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[ $1_have_mverbs=0 $1_have_mqe=0 diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index bf010b3def8..b0fd15a3ef4 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -224,3 +224,52 @@ AC_DEFUN([OPAL_CHECK_PMI],[ OPAL_VAR_SCOPE_POP ]) +AC_DEFUN([OPAL_CHECK_PMIX],[ + + opal_pmix_ext_CPPFLAGS= + opal_pmix_ext_LDFLAGS= + opal_pmix_ext_LIBS= + + OPAL_VAR_SCOPE_PUSH([pmix_ext_install_dir]) + + AC_ARG_WITH([pmix], + [AC_HELP_STRING([--with-pmix(=DIR)], + [Build PMIx support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of PMIx. "external" forces Open MPI to use an external installation of PMIx. Supplying a valid directory name also forces Open MPI to use an external installation of PMIx, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI does not support --without-pmix.])]) + + AS_IF([test "$with_pmix" = "no"], + [AC_MSG_WARN([Open MPI requires PMIx support. It can be built]) + AC_MSG_WARN([with either its own internal copy of PMIx, or with]) + AC_MSG_WARN([an external copy that you supply.]) + AC_MSG_ERROR([Cannot continue])]) + + AC_MSG_CHECKING([if user requested PMIx support($with_pmix)]) + AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"], + [AC_MSG_RESULT([no]) + opal_external_pmix_happy="no"], + [AC_MSG_RESULT([yes]) + # check for external pmix lib */ + AS_IF([test "$with_pmix" = "external"], + [pmix_ext_install_dir=/usr], + [pmix_ext_install_dir=$with_pmix]) + + # cannot use check_package because there are + # external dependencies to make the headers + # build, so just check for presence of header + # and library files - these checks will error + # out if the files aren't found, which is okay + # as we are only executing here if the user + # specified external pmix + OPAL_CHECK_WITHDIR([external-pmix], [$pmix_ext_install_dir/include], [pmix.h]) + OPAL_CHECK_WITHDIR([external-libpmix], [$pmix_ext_install_dir/lib], [libpmix.*]) + + opal_pmix_ext_CPPFLAGS="-I$pmix_ext_install_dir -I$pmix_ext_install_dir/include -I$pmix_ext_install_dir/include/pmix -I$pmix_ext_install_dir/include/pmix/include" + opal_pmix_ext_LDFLAGS="-L$pmix_ext_install_dir/lib" + opal_pmix_ext_LIBS="-lpmix" + opal_external_pmix_happy="yes" + ]) + AC_SUBST(opal_pmix_ext_CPPFLAGS) + AC_SUBST(opal_pmix_ext_LDFLAGS) + AC_SUBST(opal_pmix_ext_LIBS) + + OPAL_VAR_SCOPE_POP +]) diff --git a/config/opal_check_singularity.m4 b/config/opal_check_singularity.m4 new file mode 100644 index 00000000000..1b0c38bd457 --- /dev/null +++ b/config/opal_check_singularity.m4 @@ -0,0 +1,60 @@ +# -*- shell-script ; indent-tabs-mode:nil -*- +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OPAL_CHECK_SINGULARITY(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([OPAL_CHECK_SINGULARITY],[ + OPAL_VAR_SCOPE_PUSH([spath have_singularity]) + + AC_ARG_WITH([singularity], + [AC_HELP_STRING([--with-singularity(=DIR)], + [Build support for the Singularity container, optionally adding DIR to the search path])]) + spath= + AC_MSG_CHECKING([if Singularity support is to be built]) + AS_IF([test "$with_singularity" = "no"], + [AC_MSG_RESULT([no]) + have_singularity=no], + [AC_MSG_RESULT([yes]) + AS_IF([test -z "$with_singularity" || test "$with_singularity" = "yes"], + [ # look for the singularity command in the default path + AC_CHECK_PROG([SINGULARITY], [singularity], [singularity]) + AS_IF([test "$SINGULARITY" != ""], + [have_singularity=yes], + [AS_IF([test "$with_singularity" = "yes"], + [AC_MSG_WARN([Singularity support requested, but required executable]) + AC_MSG_WARN(["singularity" not found in default locations]) + AC_MSG_ERROR([Cannot continue])]) + have_singularity=no])], + [ AC_MSG_CHECKING([for existence of $with_singularity/bin]) + # look for the singularity command in the bin subdirectory + AS_IF([test ! -d "$with_singularity/bin"], + [AC_MSG_RESULT([not found]) + AC_MSG_WARN([Directory $with_singularity/bin not found]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([found])]) + save_path=$PATH + PATH=$with_singularity/bin:$PATH + AC_CHECK_PROG([SINGULARITY], [singularity], [singularity]) + AS_IF([test "$SINGULARITY" != ""], + [have_singularity=yes + spath=$with_singularity/bin], + [have_singlarity=no + AC_MSG_WARN([Singularity support requested, but required executable]) + AC_MSG_WARN(["singularity" not found in either default or specified path]) + AC_MSG_ERROR([Cannot continue])]) + PATH=$save_path + ] + )]) + + AC_DEFINE_UNQUOTED(OPAL_SINGULARITY_PATH, "$spath", [Path to Singularity binaries]) + AS_IF([test "$have_singularity" = "yes"], + [$2], [$3]) + OPAL_VAR_SCOPE_POP +]) diff --git a/config/opal_check_withdir.m4 b/config/opal_check_withdir.m4 index 17212b063fa..7c0ffa84ffd 100644 --- a/config/opal_check_withdir.m4 +++ b/config/opal_check_withdir.m4 @@ -6,6 +6,8 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -17,7 +19,7 @@ dnl # ---------------------------------------------------- AC_DEFUN([OPAL_CHECK_WITHDIR],[ AC_MSG_CHECKING([--with-$1 value]) - AS_IF([test "$2" = "yes" -o "$2" = "no" -o "x$2" = "x"], + AS_IF([test "$2" = "yes" || test "$2" = "no" || test "x$2" = "x"], [AC_MSG_RESULT([simple ok (unspecified)])], [AS_IF([test ! -d "$2"], [AC_MSG_RESULT([not found]) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 6d6a94c30bf..47fa9fa3f5f 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -872,11 +872,11 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ [], [enable_osx_builtin_atomics="yes"]) opal_cv_asm_builtin="BUILTIN_NO" - if test "$opal_cv_asm_builtin" = "BUILTIN_NO" -a "$enable_builtin_atomics" = "yes" ; then + if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], []) OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 fi - if test "$opal_cv_asm_builtin" = "BUILTIN_NO" -a "$enable_osx_builtin_atomics" = "yes" ; then + if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_osx_builtin_atomics" = "yes" ; then AC_CHECK_HEADER([libkern/OSAtomic.h], [opal_cv_asm_builtin="BUILTIN_OSX"]) else diff --git a/config/opal_config_pthreads.m4 b/config/opal_config_pthreads.m4 index 3cdee6d3809..b2d9c7aaece 100644 --- a/config/opal_config_pthreads.m4 +++ b/config/opal_config_pthreads.m4 @@ -11,7 +11,7 @@ dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2014-2015 Research Organization for Information Science +dnl Copyright (c) 2014-2016 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl @@ -119,7 +119,7 @@ void pthreadtest_f(void) pthread_attr_init(&attr); pthread_cleanup_push(cleanup_routine, 0); pthread_create(&newthread, &attr, thread_main, 0); - pthread_join(&newthread, 0); + pthread_join(newthread, 0); pthread_cleanup_pop(0); } diff --git a/config/opal_config_subdir_args.m4 b/config/opal_config_subdir_args.m4 index 1d9bed399a3..3b7a35580f9 100644 --- a/config/opal_config_subdir_args.m4 +++ b/config/opal_config_subdir_args.m4 @@ -10,8 +10,8 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -30,7 +30,9 @@ OPAL_VAR_SCOPE_PUSH([subdirs_str subdirs_skip subdirs_args subdirs_arg]) # # Make a list of command line args --eliminate the --srcdir and # --cache-file args, because we need to replace them with our own -# values when invoking the sub-configure script. +# values when invoking the sub-configure script. Also eliminate +# the --with-platform as this will confuse any subdir with +# similar options # subdirs_args= @@ -56,6 +58,8 @@ do ;; -srcdir=* | --srcdir=*) ;; + -with-platform=* | --with-platform=*) + ;; *) case $subdir_arg in *\'*) subdir_arg=`echo "$subdir_arg" | sed "s/'/'\\\\\\\\''/g"` ;; diff --git a/config/opal_setup_java.m4 b/config/opal_setup_java.m4 index 7de58260e79..699ae780241 100644 --- a/config/opal_setup_java.m4 +++ b/config/opal_setup_java.m4 @@ -95,7 +95,9 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ [ # OS X Snow Leopard and Lion (10.6 and 10.7 -- did not # check prior versions) opal_java_found=0 - opal_java_dir=/System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers + AS_IF([test -x /usr/libexec/java_home], + [opal_java_dir=`/usr/libexec/java_home`/include], + [opal_java_dir=/System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers]) AC_MSG_CHECKING([OSX locations]) AS_IF([test -d $opal_java_dir], [AC_MSG_RESULT([found ($opal_java_dir)]) @@ -186,6 +188,11 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ # too. Ugh. AS_IF([test -d "$with_jdk_headers/solaris"], [OPAL_JDK_CPPFLAGS="$OPAL_JDK_CPPFLAGS -I$with_jdk_headers/solaris"]) + # Darwin JDK also require -I/darwin. + # See if that's there, and if so, add a -I for that, + # too. Ugh. + AS_IF([test -d "$with_jdk_headers/darwin"], + [OPAL_JDK_CPPFLAGS="$OPAL_JDK_CPPFLAGS -I$with_jdk_headers/darwin"]) CPPFLAGS="$CPPFLAGS $OPAL_JDK_CPPFLAGS"]) AC_CHECK_HEADER([jni.h], [], diff --git a/config/orte_check_alps.m4 b/config/orte_check_alps.m4 index 21202501de0..b0555822ba5 100644 --- a/config/orte_check_alps.m4 +++ b/config/orte_check_alps.m4 @@ -129,7 +129,18 @@ AC_DEFUN([ORTE_CHECK_ALPS],[ AC_MSG_WARN([on the configure line using --with-alps option.]) AC_MSG_ERROR([Aborting])],[])] ) - + PKG_CHECK_MODULES_STATIC([CRAY_WLM_DETECT], [cray-wlm_detect], + [orte_check_cray_alps_happy="yes" + AC_DEFINE_UNQUOTED([CRAY_WLM_DETECT],[1], + [defined to 1 if cray wlm available, 0 otherwise]) + ], + [orte_check_cray_alps_happy="no"] + [AS_IF([test "$with_alps" = "yes"], + [AC_MSG_WARN([ALPS support requested but pkg-config failed.]) + AC_MSG_WARN([Need to explicitly indicate ALPS directory]) + AC_MSG_WARN([on the configure line using --with-alps option.]) + AC_MSG_ERROR([Aborting])],[])] + ) ], [AC_MSG_WARN([See ./configure --help for how to control Open MPI]) AC_MSG_WARN([configuration for ALPS on CLE 5 and higher systems]) @@ -146,10 +157,10 @@ AC_DEFUN([ORTE_CHECK_ALPS],[ AS_IF([test "$orte_check_cray_alps_happy" = "yes"], [$1_LDFLAGS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS" - $1_CPPFLAGS="$CRAY_ALPSLLI_CFLAGS $CRAY_ALPSUTIL_CFLAGS $CRAY_ALPS_CFLAGS" - $1_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS" - $1_WRAPPER_EXTRA_LDFLAGS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS" - $1_WRAPPER_EXTRA_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS"], + $1_CPPFLAGS="$CRAY_ALPSLLI_CFLAGS $CRAY_ALPSUTIL_CFLAGS $CRAY_ALPS_CFLAGS $CRAY_WLM_DETECT_CFLAGS" + $1_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS $CRAY_WLM_DETECT_LIBS" + $1_WRAPPER_EXTRA_LDFLAGS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS $CRAY_WLM_DETECT_LIBS" + $1_WRAPPER_EXTRA_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS $CRAY_WLM_DETECT_LIBS"], []) fi diff --git a/config/orte_setup_java.m4 b/config/orte_setup_java.m4 deleted file mode 100644 index e8c9789afdb..00000000000 --- a/config/orte_setup_java.m4 +++ /dev/null @@ -1,211 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2006 The University of Tennessee and The University -dnl of Tennessee Research Foundation. All rights -dnl reserved. -dnl Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright (c) 2004-2006 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights -dnl reserved. -dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved. -dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -# This macro is necessary to get the title to be displayed first. :-) -AC_DEFUN([ORTE_SETUP_JAVA_BANNER],[ - opal_show_subtitle "Java compiler" -]) - -# ORTE_SETUP_JAVA() -# ---------------- -# Do everything required to setup the Java compiler. Safe to AC_REQUIRE -# this macro. -AC_DEFUN([ORTE_SETUP_JAVA],[ - OPAL_VAR_SCOPE_PUSH([orte_java_happy bad found dir jnih PATH_save CPPFLAGS_save]) - AC_REQUIRE([ORTE_SETUP_JAVA_BANNER]) - - AC_ARG_ENABLE(java, - AC_HELP_STRING([--enable-java], - [Enable Java-based support in the system - use this option to disable all Java-based compiler tests (default: enabled)])) - - AC_ARG_WITH(jdk-dir, - AC_HELP_STRING([--with-jdk-dir(=DIR)], - [Location of the JDK header directory. If you use this option, do not specify --with-jdk-bindir or --with-jdk-headers.])) - AC_ARG_WITH(jdk-bindir, - AC_HELP_STRING([--with-jdk-bindir(=DIR)], - [Location of the JDK bin directory. If you use this option, you must also use --with-jdk-headers (and you must NOT use --with-jdk-dir)])) - AC_ARG_WITH(jdk-headers, - AC_HELP_STRING([--with-jdk-headers(=DIR)], - [Location of the JDK header directory. If you use this option, you must also use --with-jdk-bindir (and you must NOT use --with-jdk-dir)])) - - if test "$enable_java" = "no"; then - HAVE_JAVA_SUPPORT=0 - orte_java_happy=no - else - # Check for bozo case: ensure a directory was specified - AS_IF([test "$with_jdk_dir" = "yes" || test "$with_jdk_dir" = "no"], - [AC_MSG_WARN([Must specify a directory name for --with-jdk-dir]) - AC_MSG_ERROR([Cannot continue])]) - AS_IF([test "$with_jdk_bindir" = "yes" || test "$with_jdk_bindir" = "no"], - [AC_MSG_WARN([Must specify a directory name for --with-jdk-bindir]) - AC_MSG_ERROR([Cannot continue])]) - AS_IF([test "$with_jdk_headers" = "yes" || test "$with_jdk_headers" = "no"], - [AC_MSG_WARN([Must specify a directory name for --with-jdk-headers]) - AC_MSG_ERROR([Cannot continue])]) - - # Check for bozo case: either specify --with-jdk-dir or - # (--with-jdk-bindir, --with-jdk-headers) -- not both. - bad=0 - AS_IF([test -n "$with_jdk_dir" && \ - (test -n "$with_jdk_bindir" || test -n "$with_jdk_headers"]), - [bad=1]) - AS_IF([(test -z "$with_jdk_bindir" && test -n "$with_jdk_headers") || \ - (test -n "$with_jdk_bindir" && test -z "$with_jdk_headers")], - [bad=1]) - AS_IF([test "$bad" = "1"], - [AC_MSG_WARN([Either specify --with-jdk-dir or both of (--with-jdk_bindir, --with-jdk-headers) -- not both.]) - AC_MSG_ERROR([Cannot continue])]) - - AS_IF([test -n "$with_jdk_dir"], - [with_jdk_bindir=$with_jdk_dir/bin - with_jdk_headers=$with_jdk_dir/include]) - - ################################################################## - # with_jdk_dir can now be ignored; with_jdk_bindir and - # with_jdk_headers will be either empty or have valid values. - ################################################################## - - # Some java installations are in obscure places. So let's - # hard-code a few of the common ones so that users don't have to - # specify --with-java-=LONG_ANNOYING_DIRECTORY. - AS_IF([test -z "$with_jdk_bindir"], - [ # OS X Snow Leopard and Lion (10.6 and 10.7 -- did not - # check prior versions) - found=0 - dir=/System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers - AC_MSG_CHECKING([OSX locations]) - AS_IF([test -d $dir], - [AC_MSG_RESULT([found]) - found=1 - with_jdk_headers=$dir - with_jdk_bindir=/usr/bin], - [AC_MSG_RESULT([not found])]) - - if test "$found" = "0"; then - # Various Linux - if test -z "$JAVA_HOME"; then - dir='/usr/lib/jvm/java-*-openjdk-*/include/' - else - dir=$JAVA_HOME/include - fi - jnih=`ls $dir/jni.h 2>/dev/null | head -n 1` - AC_MSG_CHECKING([Linux locations]) - AS_IF([test -r "$jnih"], - [with_jdk_headers=`dirname $jnih` - OPAL_WHICH([javac], [with_jdk_bindir]) - AS_IF([test -n "$with_jdk_bindir"], - [AC_MSG_RESULT([found]) - found=1 - with_jdk_bindir=`dirname $with_jdk_bindir`], - [with_jdk_headers=])], - [dir='/usr/lib/jvm/default-java/include/' - jnih=`ls $dir/jni.h 2>/dev/null | head -n 1` - AS_IF([test -r "$jnih"], - [with_jdk_headers=`dirname $jnih` - OPAL_WHICH([javac], [with_jdk_bindir]) - AS_IF([test -n "$with_jdk_bindir"], - [AC_MSG_RESULT([found]) - found=1 - with_jdk_bindir=`dirname $with_jdk_bindir`], - [with_jdk_headers=])], - [AC_MSG_RESULT([not found])])]) - fi - - if test "$found" = "0"; then - # Solaris - dir=/usr/java - AC_MSG_CHECKING([Solaris locations]) - AS_IF([test -d $dir && test -r "$dir/include/jni.h"], - [AC_MSG_RESULT([found]) - with_jdk_headers=$dir/include - with_jdk_bindir=$dir/bin - found=1], - [AC_MSG_RESULT([not found])]) - fi - ], - [found=1]) - - if test "$found" = "1"; then - OPAL_CHECK_WITHDIR([jdk-bindir], [$with_jdk_bindir], [javac]) - OPAL_CHECK_WITHDIR([jdk-headers], [$with_jdk_headers], [jni.h]) - - # Look for various Java-related programs - orte_java_happy=no - PATH_save=$PATH - AS_IF([test -n "$with_jdk_bindir" && test "$with_jdk_bindir" != "yes" && test "$with_jdk_bindir" != "no"], - [PATH="$with_jdk_bindir:$PATH"]) - AC_PATH_PROG(JAVAC, javac) - AC_PATH_PROG(JAVAH, javah) - AC_PATH_PROG(JAR, jar) - PATH=$PATH_save - - # Check to see if we have all 3 programs. - AS_IF([test -z "$JAVAC" || test -z "$JAVAH" || test -z "$JAR"], - [orte_java_happy=no - HAVE_JAVA_SUPPORT=0], - [orte_java_happy=yes - HAVE_JAVA_SUPPORT=1]) - - # Look for jni.h - AS_IF([test "$orte_java_happy" = "yes"], - [CPPFLAGS_save=$CPPFLAGS - # silence a stupid Mac warning - CPPFLAGS="$CPPFLAGS -DTARGET_RT_MAC_CFM=0" - AS_IF([test -n "$with_jdk_headers" && test "$with_jdk_headers" != "yes" && test "$with_jdk_headers" != "no"], - [ORTE_JDK_CPPFLAGS="-I$with_jdk_headers" - # Some flavors of JDK also require -I/linux. - # See if that's there, and if so, add a -I for that, - # too. Ugh. - AS_IF([test -d "$with_jdk_headers/linux"], - [ORTE_JDK_CPPFLAGS="$ORTE_JDK_CPPFLAGS -I$with_jdk_headers/linux"]) - # Solaris JDK also require -I/solaris. - # See if that's there, and if so, add a -I for that, - # too. Ugh. - AS_IF([test -d "$with_jdk_headers/solaris"], - [ORTE_JDK_CPPFLAGS="$ORTE_JDK_CPPFLAGS -I$with_jdk_headers/solaris"]) - - CPPFLAGS="$CPPFLAGS $ORTE_JDK_CPPFLAGS"]) - AC_CHECK_HEADER([jni.h], [], - [orte_java_happy=no]) - CPPFLAGS=$CPPFLAGS_save - ]) - else - orte_java_happy=no; - HAVE_JAVA_SUPPORT=no; - fi - AC_SUBST(ORTE_JDK_CPPFLAGS) - fi - - # Are we happy? - AC_MSG_CHECKING([Java support available]) - AS_IF([test "$orte_java_happy" = "no"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes])]) - - AC_DEFINE_UNQUOTED([ORTE_HAVE_JAVA_SUPPOR]T, [$HAVE_JAVA_SUPPORT], [do we have Java support]) - AM_CONDITIONAL(ORTE_HAVE_JAVA_SUPPORT, test "$orte_java_happy" = "yes") - - OPAL_VAR_SCOPE_POP -]) diff --git a/config/oshmem_setup_java.m4 b/config/oshmem_setup_java.m4 deleted file mode 100644 index b8796134299..00000000000 --- a/config/oshmem_setup_java.m4 +++ /dev/null @@ -1,101 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2006 The University of Tennessee and The University -dnl of Tennessee Research Foundation. All rights -dnl reserved. -dnl Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright (c) 2004-2006 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights -dnl reserved. -dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved. -dnl Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -# This macro is necessary to get the title to be displayed first. :-) -AC_DEFUN([OSHMEM_SETUP_JAVA_BINDINGS_BANNER],[ - opal_show_subtitle "Java OSHMEM bindings" -]) - -# OSHMEM_SETUP_JAVA_BINDINGS() -# ---------------- -# Do everything required to setup the Java OSHMEM bindings. Safe to AC_REQUIRE -# this macro. -AC_DEFUN([OSHMEM_SETUP_JAVA_BINDINGS],[ - # must have Java setup - AC_REQUIRE([OPAL_SETUP_JAVA]) - - AC_REQUIRE([OSHMEM_SETUP_JAVA_BINDINGS_BANNER]) - - AC_MSG_CHECKING([if want Java bindings]) - AC_ARG_ENABLE(oshmem-java, - AC_HELP_STRING([--enable-oshmem-java], - [enable Java OSHMEM bindings (default: disabled)])) - - # check for required support - if test "$opal_java_happy" = "no" && test "$enable_oshmem_java" = "yes"; then - AC_MSG_RESULT([yes]) - AC_MSG_WARN([Java bindings requested but no Java support found]) - AC_MSG_ERROR([cannot continue]) - fi - - # Only build the Java bindings if requested - if test "$opal_java_happy" = "yes" && test "$enable_oshmem_java" = "yes"; then - AC_MSG_RESULT([yes]) - WANT_OSHMEM_JAVA_SUPPORT=1 - AC_MSG_CHECKING([if shared libraries are enabled]) - AS_IF([test "$enable_shared" != "yes"], - [AC_MSG_RESULT([no]) - AC_MSG_WARN([Java bindings cannot be built without shared libraries]) - AC_MSG_WARN([Please reconfigure with --enable-shared]) - AC_MSG_ERROR([Cannot continue])], - [AC_MSG_RESULT([yes])]) - # must have Java support - AC_MSG_CHECKING([if Java support was found]) - AS_IF([test "$opal_java_happy" = "yes"], - [AC_MSG_RESULT([yes])], - [AC_MSG_WARN([Java OSHMEM bindings requested, but Java support was not found]) - AC_MSG_WARN([Please reconfigure the --with-jdk options to where Java]) - AC_MSG_WARN([support can be found]) - AC_MSG_ERROR([Cannot continue])]) - - # Mac Java requires this file (i.e., some other Java-related - # header file needs this file, so we need to check for - # it/include it in our sources when compiling on Mac). - AC_CHECK_HEADERS([TargetConditionals.h]) - else - AC_MSG_RESULT([no]) - WANT_OSHMEM_JAVA_SUPPORT=0 - fi - AC_DEFINE_UNQUOTED([OSHMEM_WANT_JAVA_BINDINGS], [$WANT_OSHMEM_JAVA_SUPPORT], - [do we want java oshmem bindings]) - AM_CONDITIONAL(OSHMEM_WANT_JAVA_BINDINGS, test "$WANT_OSHMEM_JAVA_SUPPORT" = "1") - - # Are we happy? - AS_IF([test "$WANT_OSHMEM_JAVA_SUPPORT" = "1"], - [AC_MSG_WARN([******************************************************]) - AC_MSG_WARN([*** Java OSHMEM bindings are provided on a provisional]) - AC_MSG_WARN([*** basis. They are NOT part of the current or]) - AC_MSG_WARN([*** proposed OSHMEM standard. Continued inclusion of]) - AC_MSG_WARN([*** the Java OSHMEM bindings OSHMEM is contingent]) - AC_MSG_WARN([*** upon user interest and developer support.]) - AC_MSG_WARN([******************************************************]) - ]) - - AC_CONFIG_FILES([ - oshmem/shmem/java/Makefile - oshmem/shmem/java/java/Makefile - oshmem/shmem/java/c/Makefile - ]) -]) diff --git a/configure.ac b/configure.ac index 1d754232d3c..7bb7cbe8eb7 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. -# Copyright (c) 2004-2014 The University of Tennessee and The University +# Copyright (c) 2004-2015 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -20,7 +20,7 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2013-2014 Intel, Inc. All rights reserved. -# Copyright (c) 2014-2015 Research Organization for Information Science +# Copyright (c) 2014-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ # @@ -136,8 +136,7 @@ m4_ifdef([project_ompi], m4_ifdef([project_orte], [AC_SUBST(libopen_rte_so_version)]) m4_ifdef([project_oshmem], - [AC_SUBST(liboshmem_so_version) - AC_SUBST(liboshmem_java_so_version)]) + [AC_SUBST(liboshmem_so_version)]) AC_SUBST(libopen_pal_so_version) # It's icky that we have to hard-code the names of the # common components here. :-( This could probably be done @@ -145,12 +144,11 @@ AC_SUBST(libopen_pal_so_version) # and/or opal_mca.m4, but I don't have the cycles to do this # right now. AC_SUBST(libmca_opal_common_libfabric_so_version) -AC_SUBST(libmca_common_cuda_so_version) -AC_SUBST(libmca_common_ofacm_so_version) -AC_SUBST(libmca_common_sm_so_version) -AC_SUBST(libmca_common_ugni_so_version) -AC_SUBST(libmca_common_verbs_so_version) -AC_SUBST(libmca_common_alps_so_version) +AC_SUBST(libmca_opal_common_cuda_so_version) +AC_SUBST(libmca_opal_common_sm_so_version) +AC_SUBST(libmca_opal_common_ugni_so_version) +AC_SUBST(libmca_opal_common_verbs_so_version) +AC_SUBST(libmca_orte_common_alps_so_version) # # Get the versions of the autotools that were used to bootstrap us @@ -502,7 +500,6 @@ OPAL_CHECK_OFFSETOF # sets up the C++ MPI bindings, etc.). Perhaps it was moved to OPAL # just on the rationale that all compiler setup should be done in # OPAL...? Shrug. -WANT_MPI_CXX_SUPPORT=0 m4_ifdef([project_ompi], [OPAL_SETUP_CXX OMPI_SETUP_CXX]) # Used in Makefile.ompi-rules @@ -525,7 +522,6 @@ OPAL_CHECK_COMPILER_VERSION_ID ################################## # Only needed for OMPI m4_ifdef([project_ompi], [OMPI_SETUP_JAVA_BINDINGS]) -m4_ifdef([project_oshmem], [OSHMEM_SETUP_JAVA_BINDINGS]) ################################## @@ -1376,11 +1372,14 @@ AC_CONFIG_FILES([ test/event/Makefile test/asm/Makefile test/datatype/Makefile + test/dss/Makefile test/class/Makefile test/support/Makefile test/threads/Makefile test/util/Makefile ]) +m4_ifdef([project_ompi], [AC_CONFIG_FILES([test/monitoring/Makefile])]) + AC_CONFIG_FILES([contrib/dist/mofed/debian/rules], [chmod +x contrib/dist/mofed/debian/rules]) AC_CONFIG_FILES([contrib/dist/mofed/compile_debian_mlnx_example], diff --git a/contrib/dist/linux/openmpi.spec b/contrib/dist/linux/openmpi.spec index aacbbfc64a5..cddf6c5b926 100644 --- a/contrib/dist/linux/openmpi.spec +++ b/contrib/dist/linux/openmpi.spec @@ -12,6 +12,8 @@ # Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -670,7 +672,14 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %files %defattr(-, root, root, -) +%if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) +%{_bindir}/* +%{_includedir}/* +%{_libdir}/* +%{_datadir} +%else %{_prefix} +%endif # If the sysconfdir is not under the prefix, then list it explicitly. %if !%{sysconfdir_in_prefix} %{_sysconfdir} @@ -706,7 +715,13 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %files runtime -f runtime.files %defattr(-, root, root, -) -%dir %{_prefix} +%if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) +%{_bindir}/* +%{_libdir}/* +%{_datadir} +%else +%{_prefix} +%endif # If the sysconfdir is not under the prefix, then list it explicitly. %if !%{sysconfdir_in_prefix} %{_sysconfdir} @@ -729,9 +744,6 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %{shell_scripts_path}/%{shell_scripts_basename}.sh %{shell_scripts_path}/%{shell_scripts_basename}.csh %endif -%dir %{_bindir} -%dir %{_libdir} -%dir %{_libdir}/openmpi %doc README INSTALL LICENSE %{_pkgdatadir} @@ -756,6 +768,9 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT # ############################################################################# %changelog +* Thu Nov 12 2015 Gilles Gouaillardet +- Revamp packaging when prefix is /usr + * Tue Jan 20 2015 Bert Wesarg - Remove VampirTrace wrapper from package. diff --git a/contrib/dist/make_dist_tarball b/contrib/dist/make_dist_tarball index d8ede2ff263..d5a6bc69a0e 100755 --- a/contrib/dist/make_dist_tarball +++ b/contrib/dist/make_dist_tarball @@ -255,9 +255,9 @@ make_tarball() { echo "*** Running autogen $autogen_args..." rm -f success if test "$want_ompi" = "1" ; then - (./autogen.pl $autogen_args 2>&1 && touch success) | tee auto.out + (./autogen.pl --force $autogen_args 2>&1 && touch success) | tee auto.out else - (./autogen.pl --no-ompi $autogen_args 2>&1 && touch success) | tee auto.out + (./autogen.pl --force --no-ompi $autogen_args 2>&1 && touch success) | tee auto.out fi if test ! -f success; then echo "Autogen failed. Aborting" diff --git a/contrib/nightly/create_tarball.sh b/contrib/nightly/create_tarball.sh old mode 100755 new mode 100644 index 31f8186a706..b87bc8dcb2d --- a/contrib/nightly/create_tarball.sh +++ b/contrib/nightly/create_tarball.sh @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -70,13 +70,13 @@ send_error_mail() { rm -f "$outfile" touch "$outfile" for file in `/bin/ls $logdir/* | sort`; do - len="`wc -l $file | awk '{ print $1}'`" - if test "`expr $len \> $max_log_len`" = "1"; then - echo "[... previous lines snipped ...]" >> "$outfile" - tail -n $max_log_len "$file" >> "$outfile" - else - cat "$file" >> "$outfile" - fi + len="`wc -l $file | awk '{ print $1}'`" + if test "`expr $len \> $max_log_len`" = "1"; then + echo "[... previous lines snipped ...]" >> "$outfile" + tail -n $max_log_len "$file" >> "$outfile" + else + cat "$file" >> "$outfile" + fi done Mail -s "=== CREATE FAILURE ($version) ===" "$email" < "$outfile" rm -f "$outfile" @@ -101,16 +101,16 @@ do_command() { logfile="$logdir/20-command.txt" rm -f "$logfile" if test -n "$debug"; then - echo "*** Running command: $cmd" - eval $cmd > "$logfile" 2>&1 - st=$? - echo "*** Command complete: exit status: $st" + echo "*** Running command: $cmd" + eval $cmd > "$logfile" 2>&1 + st=$? + echo "*** Command complete: exit status: $st" else - eval $cmd > "$logfile" 2>&1 - st=$? + eval $cmd > "$logfile" 2>&1 + st=$? fi if test "$st" != "0"; then - cat > "$logdir/15-error.txt" < "$logdir/15-error.txt" < "$logdir/25-error.txt" < "$logdir/25-error.txt" < latest_snapshot.txt for ext in gz bz2; do count="`ls openmpi*.tar.$ext | wc -l | awk '{ print $1 }'`" if test "`expr $count \> $max_snapshots`" = "1"; then - num_old="`expr $count - $max_snapshots`" - old="`ls -rt openmpi*.tar.$ext | head -n $num_old`" - rm -f $old + num_old="`expr $count - $max_snapshots`" + old="`ls -rt openmpi*.tar.$ext | head -n $num_old`" + rm -f $old fi done diff --git a/contrib/ompi-time.sh b/contrib/ompi-time.sh new file mode 100755 index 00000000000..857a55452b5 --- /dev/null +++ b/contrib/ompi-time.sh @@ -0,0 +1,1244 @@ +#!/bin/sh +# +# Copyright (c) 2015 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This script is used to measure PMIx performance. +# +# --exec: scenario to run as sync or "test1 test2" +# --mpidir: path to mpi installation (/usr default) +# --parse: path to collected results +# HOWTO: +# 1 .Set test matrix using variables $node_list, ppn_list, test_list +# 2. Allocate nodes: +# $salloc --nodelist=node[1-4] +# or +# $salloc -N4 +# 3. Launch script: +# $./opmi-time.sh --exec="test1 test2" --mpidir= +# $./opmi-time.sh --mpidir= +# +# Output location is test name folder +# Output file formats +# ()_base.log +# timestamp (usec) hostnode label +# 1441715028369350 mir14 start +# 1441715030540656 mir14 end +# +# ()_out.log +# timestamp (usec) rank node +# 1441715030460727 0 mir9 +# 1441715030460628 1 mir10 +# +# ()_result.log +# time rank node +# 2.089 3 mir12 +# 2.093 2 mir11 +# +# report.log +# nodes ppn mintime maxtime +# 4 1 2.089 2.093 + + +# Settings +############################################################################### + +node_list=(2 4) +ppn_list=(1 2) +test_list="test1 test2 test3 test4 test5 test6 test7 test8 test9 test10 test11 test12 test13" + +# Declarations +############################################################################### + +prefix=pmix +module=${BASH_SOURCE[0]} + +# Command line parsing +############################################################################### + +opt="" +while [ "$#" -gt 0 ]; do + case "$1" in + + --parse=*) parse="${1#*=}"; shift 1;; + --exec=*) exec="${1#*=}"; shift 1;; + --mpidir=*) mpidir="${1#*=}"; shift 1;; + --parse|--exec|--mpidir) echo "$1 requires an argument" >&2; exit 1;; + + -*) echo "unknown option: $1" >&2; exit 1;; + *) shift 1;; + esac +done + +# The scenario of measurement +if [ -n "$exec" ]; then + test_list="$exec" +fi + +# The mpi path +mpidir=${mpidir:=/usr} + +# Functions +############################################################################### + +# format text +function do_format() { + local is_format=true + if [[ $is_format == true ]] ; then + res="" + for ((i=2; i<=$#; i++)) ; do + case "${!i}" in + "bold" ) res="$res\e[1m" ;; + "underline" ) res="$res\e[4m" ;; + "reverse" ) res="$res\e[7m" ;; + "red" ) res="$res\e[91m" ;; + "green" ) res="$res\e[92m" ;; + "yellow" ) res="$res\e[93m" ;; + esac + done + echo -e "$res$1\e[0m" + else + echo "$1" + fi +} + +# print message +function do_msg() { + echo -e "$*" 2>&1 | tee -a $logfile +} + +# print error message and exit script +function do_err() { + echo -e $(do_format "$module failed. aborting. $*" "red" "bold") 2>&1 | tee -a $logfile + exit 1 +} + +# print the seconds and current microseconds. +function do_timestamp() { + do_msg "$(($(date +%s%N)/1000))\t$(hostname -s)" "$1" +} + +# swap two files +function do_fswap() { + if (( $# == 2 )); then + mv "$1" /tmp/ + mv "$2" "`dirname $1`" + mv "/tmp/`basename $1`" "`dirname $2`" + else + echo "Usage: swap " + return 1 + fi +} + +function do_cmd() { + cmd="$*" + do_msg "Doing:" + do_msg "==================================================" + do_msg "$*" + eval $cmd >> $logfile 2>&1 + local status=$? + if test "$status" != "0"; then + echo "$module failed. Log:" + tail -20 $logfile + cat $logfile + exit $status + fi + do_msg "DONE" + do_msg "" +} + +function do_export() { + do_msg "Exporting PATHs:" + do_msg "==================================================" + do_msg "$1" + export PATH="$1/bin:${PATH}" + export LD_LIBRARY_PATH="$1/lib:${LD_LIBRARY_PATH}" + export MANPATH="$1/share/man:${MANPATH}" + do_msg "DONE" + do_msg "" +} + +function do_nodeinfo() { + do_msg "Node information:" + do_msg "==================================================" + do_msg $(hostname) + do_msg $(cat /etc/issue | grep We) + do_msg $(cat /proc/cpuinfo | grep 'model name' | sort -u | awk '{print $4, $5, $6, $7, $9}') + do_msg $(cat /proc/cpuinfo | grep proce | wc | awk '{print $1}') + do_msg $(uname -a | awk '{print $12}') + do_msg $(cat /proc/meminfo | grep [M,m]em) + do_msg $(uname -a | awk '{print $3}') + do_msg $(ibstat | grep -e "CA type" -e "Firmware version") + do_msg $(ibstatus | grep -e rate -e state | grep -v 'phys state') + do_msg $(ofed_info | head -6 | grep OFED) + do_msg "DONE" + do_msg "" +} + +function do_validate() { + command -v mpiexec >/dev/null 2>&1 || { do_err "mpiexec is not found."; } + command -v srun >/dev/null 2>&1 || { do_err "srun is not found."; } + command -v salloc >/dev/null 2>&1 || { do_err "salloc is not found."; } +} + +function do_check_pmix() { + eval "srun --mpi=list 2>&1 | grep pmix" +} + +function do_checksync_mpisync() { + local status + local tooldir=${tempdir}/mpisync + local verbose=$1 + local option=$* + + do_msg "Checking synchronization using mpisync:" + + if [ ! -e ${tooldir} ]; then + mkdir -p ${tooldir} + cd ${tooldir} + wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/mpigclock.c >> $logfile 2>&1 + wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/mpigclock.h >> $logfile 2>&1 + wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/hpctimer.c >> $logfile 2>&1 + wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/hpctimer.h >> $logfile 2>&1 + wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/sync.c >> $logfile 2>&1 + mpicc hpctimer.c mpigclock.c sync.c -o mpisync >> $logfile 2>&1 + fi + if [ ! -e "$tooldir" ] || [ ! -f "$tooldir/mpisync" ]; then + do_err "can not find $tooldir/mpisync" + fi + mpiexec -n $(($nodes)) -npernode 1 $mpioptions $tooldir/mpisync -o ${syncfile} ${option} 2>&1 + do_msg "Analysing ${syncfile}" + cat ${syncfile} >> $logfile 2>&1 + diff=$(grep -v '^#' ${syncfile} | cut -f3 -d' ' | sort -n | awk 'BEGIN {min=1000000; max=0;}; { if($1max && $1 != "") max = $1; } END { printf("%0.06f %0.06f %0.06f", min, max, max-min) }') >> $logfile 2>&1 + do_msg "sync drift is equal: $diff" + diff=`echo $diff | cut -f3 -d' '` + status=$(if (( `bc <<< "$diff >= 0.001"` == 1 )); then echo "value $diff >= 0.001"; fi) + if [ -n "$status" ] && [ -n $verbose -a "$verbose" == "on" ]; then + do_err "mpisync reports issue with synchronization as $status" + else + do_msg "Warning: mpiperf reports issue with synchronization as $status" + fi + + do_msg "DONE" + do_msg "" +} + +function do_checksync_mpiperf() { + local status + local tooldir=${tempdir}/mpiperf-0.0.3 + local verbose=$1 + + do_msg "Checking synchronization using mpiperf:" + + if [ ! -f ${tempdir}/mpiperf-0.0.3.tar.gz ]; then + wget http://mpiperf.cpct.sibsutis.ru/uploads/Main/mpiperf-0.0.3.tar.gz >> $logfile 2>&1 + tar zxvf mpiperf-0.0.3.tar.gz >> $logfile 2>&1 + cd $tooldir + make >> $logfile 2>&1 + fi + if [ ! -e "$tooldir" ] || [ ! -f "$tooldir/src/mpiperf" ]; then + do_err "can not find $tooldir/src/mpiperf" + fi + mpiexec -n 1 $mpioptions $tooldir/src/mpiperf -T >> $logfile 2>&1 + if [ -z "$(mpiexec -n 1 $mpioptions $tooldir/src/mpiperf -j -t gettimeofday 2>&1 | tee -a $logfile | sed -n '/PASSED/p')" ]; then + do_err "mpiperf does not support gettimeofday" + fi + mpiexec -n $(($nodes)) -npernode 1 $mpioptions $tooldir/src/mpiperf -t gettimeofday WaitPatternNull >> ${syncfile} 2>&1 + do_msg "Analysing ${syncfile}" + cat ${syncfile} >> $logfile 2>&1 + status=$(grep -v '^#' ${syncfile} | awk -F ' ' '{ print $6 }' | while read i; do if (( `bc <<< "$i >= 1"` == 1 )); then echo "value $i >= 1.00"; break; fi; done) + if [ -n "$status" ] && [ -n $verbose -a "$verbose" == "on" ]; then + do_err "mpiperf reports issue with synchronization as $status" + else + do_msg "Warning: mpiperf reports issue with synchronization as $status" + fi + + do_msg "DONE" + do_msg "" +} + +# $1 - sync filename +# $2 - verbose mode: on - exit in case synchronization values exceed a treshold and off - silent mode (default: off) +# $3+ - application additional options +function do_checksync() { + if [ -z "$1" ]; then + syncfile=${tempdir}/mpisync.log + else + syncfile=$1 + fi + + do_checksync_mpisync $2 "-a 0" +# do_checksync_mpisync $2 "-a 1" +# do_checksync_mpiperf + do_msg "syncfile: $syncfile" +} + +function do_analysis() { + local testdir=$1 + local basefile=$2 + local outfile=$3 + local outfile1="${3}.1" + local resultfile=${testdir}/${nodes}x${ppn}_result.log + + if [ ! -e $tesdir ]; then + do_err "can not find testdir: $testdir" + fi + if [ -z $basefile -o ! -f $basefile ]; then + do_err "can not find basefile: $basefile" + fi + if [ -z $outfile -o ! -f $outfile ]; then + do_err "can not find outfile: $outfile" + fi + if [ "$(cat $outfile | wc -l)" != "$(($nodes * $ppn))" ]; then + do_msg "Warning: number of lines in $outfile ($(cat $outfile | wc -l)) is not equal ($nodes * $ppn)." + fi + start_t=`awk -F $'\t' '{ if (NR == 1) print $1 }' $basefile` + + # Add sync value in output file + while read line; do + if [[ ! $line =~ ^[0-9] ]]; then + do_msg "Warning: ignoring line: $line." + continue + fi + local n=$(echo $line | cut -f3 -d' ') + local v1=$(echo $line | cut -f1 -d' ') + local v2=0 + + if [ ! -z $syncfile -o -f $syncfile ]; then + v2=$(echo "scale=2; ($(grep $n $syncfile | cut -f3 -d' ') * 1000000)" | bc -l) + # Round float value to int + v2=$(echo ${v2%%.*}) + v2=${v2:=0} + fi + echo -e "$(($v1 + $v2))\t${v2}\t${line}" >> $outfile1 + done < $outfile + + # Find maximum and minimum lines + min_line=`sort -n $outfile1 | head -n1` + max_line=`sort -n $outfile1 | tail -n1` + if [ -z "$min_line" -o -z "$max_line" ]; then + do_err "can not find max/min lines in : $outfile1" + fi + min_t=$( echo "$min_line" | cut -f1 -d$'\t') + max_t=$( echo "$max_line" | cut -f1 -d$'\t') + echo -e "`bc -l <<< "scale=3; (($min_t - $start_t) / 1000000)"`\t`echo "$min_line" | cut -f4 -d$'\t'`\t`echo "$min_line" | cut -f5 -d$'\t'`" >> $resultfile 2>&1 + echo -e "`bc -l <<< "scale=3; (($max_t - $start_t) / 1000000)"`\t`echo "$max_line" | cut -f4 -d$'\t'`\t`echo "$max_line" | cut -f5 -d$'\t'`" >> $resultfile 2>&1 + + echo -e "\n# Used synchronization file: $syncfile" >> $outfile1 + + do_report $testdir $resultfile +} + +function do_report() { + local testdir=$1 + local resultfile=$2 + local reportfile=${testdir}/report.log + + if [ -z $resultfile -o ! -f $resultfile ]; then + do_err "can not find resultfile: $resultfile" + fi + min_t=`awk -F $'\t' '{ if (NR == 1) print $1 }' $resultfile` + max_t=`awk -F $'\t' '{ if (NR == 2) print $1 }' $resultfile` + echo -e "${nodes}\t${ppn}\t${min_t}\t${max_t}" >> $reportfile 2>&1 +} + +function do_postresult() { + cd $tempdir/.. + tar -zcvf $PWD/pmix.$$.tar.gz $tempdir > /dev/null 2>&1 +} + +include_timestamp_func=$(cat < +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void timestamp(FILE *file) +{ + struct timeval tv; + char name[256]; + char *host_name = NULL; + char *domain = NULL; + int procid = -1; + char *str = NULL; + + gettimeofday(&tv, NULL); + + if (gethostname(name, sizeof(name)) != 0) + strcpy(name, "localhost"); + host_name = strdup(name); + domain = strchr(host_name, '.'); + if (domain) + *domain = '\0'; + + str = getenv("SLURM_PROCID"); + procid = ( str ? strtol(str, NULL, 10) : -1); + fprintf(file, "%lld\t%d\t%s\n", tv.tv_sec * 1000000LL + tv.tv_usec, procid, host_name); + fflush(file); +} + +END_MSG +) + +function do_exec() { + +# The number of nodes (see SLURM_NNODES) +nodes=${SLURM_NNODES} +nodes=${nodes:=2} + +# The number of tasks per node (see SLURM_NTASKS_PER_NODE or SLURM_TASKS_PER_NODE) +ppn=${SLURM_NTASKS_PER_NODE} +ppn=${ppn:=1} + +mpioptions=' -novm -mca btl_openib_warn_default_gid_prefix 0 -mca mpi_add_procs_cutoff 100000 ' +slurmoptions=' OMPI_MCA_btl_openib_warn_default_gid_prefix=0 OMPI_MCA_mpi_add_procs_cutoff=100000 ' + +if [ -z "$(env | grep SLURM)" ]; then + do_err "Do not see allocated nodes by SLURM. Probably salloc -N option is not set" +fi + +#if [ "${SLURM_NPROCS}" != "$(($nodes * $ppn))" ]; then +# do_err "SLURM_NPROCS=${SLURM_NPROCS} is not equal ($nodes * $ppn). Probably salloc -N option is not set" +#fi + +do_msg "" +do_msg "Configuration:" +do_msg "==================================================" +do_msg "tempdir: $tempdir" +do_msg "logfile: $logfile" +do_msg "mpi: $mpidir" +do_msg "exec: $exec" +do_msg "nodes: $nodes" +do_msg "ppn: $ppn" +do_msg "mpioptions: $mpioptions" +do_msg "slurmoptions: $slurmoptions" +do_msg "node list: $node_list" +do_msg "ppn list: $ppn_list" +do_msg "test list: $test_list" +do_msg "" + +do_export $mpidir +do_nodeinfo +do_validate + +if [ -f "${tempdir}/mpisync.log" ]; then + syncfile=${tempdir}/mpisync.log + do_msg "found sync data at ${syncfile}" +elif [ -f "${tempdir}/mpiperf.log" ]; then + syncfile=${tempdir}/mpiperf.log + do_msg "found sync data at ${syncfile}" +else + do_msg "sync data is not found" +fi + +# Launch scenario +node_list_len=${#node_list[*]} +ppn_list_len=${#ppn_list[*]} + +for ((i=0; $i < $node_list_len; i=$((i=$i+1)))); do + for ((j=0; $j < $ppn_list_len; j=$((j=$j+1)))); do + for test in $test_list; do + nodes=${node_list[$i]} + ppn=${ppn_list[$j]} + if [ "$test" = "sync" ]; then + do_checksync "${tempdir}/${nodes}x${ppn}_mpisync00.log" "off" + else + do_checksync "${tempdir}/${nodes}x${ppn}_mpisync_before.log" "off" + eval "do_${test}" + do_checksync "${tempdir}/${nodes}x${ppn}_mpisyn_after.log" "off" + fi + done + done +done + +do_postresult +} + +# $1 - result location +function do_parse() { + local parsedir=$1 + local result_list + local test_list + local parsefile + + for result in `ls -1 $workdir`; do + if [ ! -d "${parsedir}/${result}" ]; then + continue + fi + for test in `ls -1 "${parsedir}/${result}" | grep -v mpisync`; do + if [ ! -d "${parsedir}/${result}/${test}" ]; then + continue + fi + result_list="${result_list} ${result}" + test_list="${test_list} ${test}" + done + done + + result_list=`echo $result_list | tr " " "\n" | sort | uniq | tr "\n" " "` + test_list=`echo $test_list | tr " " "\n" | sort | uniq | tr "\n" " "` + + do_msg "results: $result_list" + do_msg "tests: $test_list" + + for test in $test_list; do + parsefile="${parsedir}/parse_${test}.log" + for result in $result_list; do + echo -e "\n${result}:" >> $parsefile 2>&1 + echo -e "nodes\tppn\tmin\tmax" >> $parsefile 2>&1 + cat "${parsedir}/${result}/${test}/report.log" >> $parsefile 2>&1 + done + done +} + +# Pure application srun launch +##################################################### +function do_test1 +{ + local status + local scenario=test1 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "srun pure overhead" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# Pure application mpiexec launch +##################################################### +function do_test2 +{ + local status + local scenario=test2 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "mpiexec pure overhead" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# Pure application oshrun launch +##################################################### +function do_test3 +{ + local status + local scenario=test3 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "osrun pure overhead" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# MPI_init application srun/pmi2 launch +##################################################### +function do_test4 +{ + local status + local scenario=test4 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmi2 ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "srun --mpi=pmi2:MPI_Init" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# start_pes application srun/pmi2 launch +##################################################### +function do_test5 +{ + local status + local scenario=test5 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmi2 ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "srun --mpi=pmi2:start_pes" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# MPI_Init application mpiexec launch +##################################################### +function do_test6 +{ + local status + local scenario=test6 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "mpiexec:MPI_Init" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# start_pes application oshrun launch +##################################################### +function do_test7 +{ + local status + local scenario=test7 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "osrun:start_pes" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# pure application mpiexec:orte_daemon +##################################################### +function do_test8 +{ + local status + local scenario=test8 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c < lib_$scenario.c <> $logfile 2>&1 + gcc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + LD_PRELOAD=$PWD/$scenario.so mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + eval cat $outfile.* >> $outfile + rm $outfile.* + echo -e "mpiexec:orte_daemon" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# pure application oshrun:orte_daemon +##################################################### +function do_test9 +{ + local status + local scenario=test9 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c < lib_$scenario.c <> $logfile 2>&1 + gcc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + LD_PRELOAD=$PWD/$scenario.so oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + eval cat $outfile.* >> $outfile + rm $outfile.* + echo -e "oshrun:orte_daemon" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# pure application mpiexec:orte_rml_base_update_contact_info +##################################################### +function do_test10 +{ + local status + local scenario=test10 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c < lib_$scenario.c <> $logfile 2>&1 + mpicc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + LD_PRELOAD=$PWD/$scenario.so mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + eval "cat $outfile.* >> $outfile" >> $logfile 2>&1 + rm $outfile.* >> $logfile 2>&1 + echo -e "mpiexec:orte_rml_base_update_contact_info" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# pure application oshrun:orte_rml_base_update_contact_info +##################################################### +function do_test11 +{ + local status + local scenario=test11 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c < lib_$scenario.c <> $logfile 2>&1 + oshcc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + LD_PRELOAD=$PWD/$scenario.so oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + eval "cat $outfile.* >> $outfile" >> $logfile 2>&1 + rm $outfile.* >> $logfile 2>&1 + echo -e "oshrun:orte_rml_base_update_contact_info" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# MPI_Init application mpiexec:srun/pmix +##################################################### +function do_test12 +{ + local status + local scenario=test12 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + do_check_pmix + if [ $? -eq 0 ]; then + do_msg "slurm has pmix plugin" + else + do_msg "skipping this test : slurm does not have pmix plugin" + return 1 + fi + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmix ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "srun --mpi=pmix:MPI_Init" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# start_pes application oshrun:srun/pmix +##################################################### +function do_test13 +{ + local status + local scenario=test13 + local testdir=${tempdir}/$scenario + local outfile=${testdir}/${nodes}x${ppn}_out.log + local basefile=${testdir}/${nodes}x${ppn}_base.log + + do_msg "Running $scenario ${nodes}x${ppn} :" + + do_check_pmix + if [ $? -eq 0 ]; then + do_msg "slurm has pmix plugin" + else + do_msg "skipping this test : slurm does not have pmix plugin" + return 1 + fi + + mkdir -p $testdir + cd $testdir + + cat > $scenario.c <> $logfile 2>&1 + + # Do test + do_timestamp "start" 2>&1 | tee -a $basefile + env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmix ./$scenario.out >> $outfile 2>&1 + test $? -eq 0 && status=OK || status=FAIL + do_timestamp "end" 2>&1 | tee -a $basefile + if [ "$status" == "FAIL" ]; then + do_err "can not launch a test" + fi + + echo -e "srun --mpi=pmix:start_pes" > ${testdir}/info.log 2>&1 + do_analysis $testdir $basefile $outfile + + do_msg "DONE" +} + +# Main +############################################################################### + + +# Check if --exec option is passed ($exec is defined) +if test ${exec+defined}; then + tempdir=$PWD/tmp/${prefix}.$$ + logfile=${tempdir}/${prefix}-time.log + + mkdir -p $tempdir + rm -f $logfile + cd $tempdir + + do_exec +fi + +# Check if --parse option is passed ($parse is defined) +if test ${parse+defined}; then + if [ -z "$parse" ]; then + tempdir=$PWD/tmp + else + tempdir=$parse + fi + logfile=${tempdir}/${prefix}-parse.log + + mkdir -p $tempdir + rm -f $logfile + cd $tempdir + + do_parse "$tempdir" +fi + +exit 0 diff --git a/contrib/scaling/Makefile b/contrib/scaling/Makefile index 172a9db100d..c0e871c608d 100644 --- a/contrib/scaling/Makefile +++ b/contrib/scaling/Makefile @@ -2,9 +2,13 @@ PROGS = orte_no_op mpi_no_op all: $(PROGS) -CC = mpicc CFLAGS = -O -CFLAGS_INTERNAL = -I../../.. -I../../../orte/include -I../../../opal/include + +orte_no_op: + ortecc -o orte_no_op orte_no_op.c + +mpi_no_op: + mpicc -o mpi_no_op mpi_no_op.c clean: rm -f $(PROGS) *~ diff --git a/contrib/scaling/scaling.pl b/contrib/scaling/scaling.pl old mode 100644 new mode 100755 index 531076211fd..0e7458ebd5c --- a/contrib/scaling/scaling.pl +++ b/contrib/scaling/scaling.pl @@ -2,117 +2,365 @@ # # Copyright (c) 2012 Los Alamos National Security, Inc. # All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. use strict; +use Getopt::Long; # globals -my $showme_arg = 0; -my $num_nodes = 0; +my $num_nodes = 2; my $my_arg; my $reps = 1; +my $usedvm = 0; +my $usesrun = 0; +my $usempirun = 0; +my $useaprun = 0; +my $useaprun = 0; +my $myapp; +my $runall = 0; +my $rawoutput = 0; +my $myresults; +my @csvrow; my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op); -my @options = ("", "", "", "-mca mpi_preconnect_mpi 1"); - -# Cannot use the usual GetOpts library as the user might -# be passing -options to us! So have to -# parse the options ourselves to look for help and showme -my $i = 0; -foreach $my_arg (@ARGV) { - if ($my_arg eq "-h" || - $my_arg eq "--h" || - $my_arg eq "-help" || - $my_arg eq "--help") { - print "Options: - --showme Show the actual commands without executing them - --nodes Number of nodes to run the test across - --reps Number of times to run each test (for statistics) - --help | -h This help list\n"; - exit; - } elsif ($my_arg eq "-showme" || - $my_arg eq "--showme") { - $showme_arg = 1; - } elsif ($my_arg eq "-nodes" || - $my_arg eq "--nodes") { - $num_nodes = @ARGV[$i+1]; - } elsif ($my_arg eq "--reps" || - $my_arg eq "-reps") { - $reps = @ARGV[$i+1]; - } - $i++; +my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1"); +my @starters = qw(mpirun orte-submit srun aprun orterun); +my @starteroptions = ("-npernode 1 --novm", + "--hnp file:dvm_uri -pernode", + "--distribution=cyclic", + "-N 1", + "-npernode 1 --novm"); + +# Set to true if the script should merely print the cmds +# it would run, but don't run them +my $SHOWME = 0; +# Set to true to suppress most informational messages. +my $QUIET = 0; +# Set to true if we just want to see the help message +my $HELP = 0; + +GetOptions( + "help" => \$HELP, + "quiet" => \$QUIET, + "showme" => \$SHOWME, + "reps=s" => \$reps, + "dvm" => \$usedvm, + "srun" => \$usesrun, + "aprun" => \$useaprun, + "mpirun" => \$usempirun, + "myapp=s" => \$myapp, + "all" => \$runall, + "results=s" => \$myresults, + "rawout" => \$rawoutput, +) or die "unable to parse options, stopped"; + +if ($HELP) { + print <$myresults" || die "file could not be opened"; +} + +# determine the number of nodes - doesn't +# matter which starter we use +$cmd = $starters[0] . " " . $starteroptions[0] . " hostname"; +print "CMD: $cmd\n"; +$output = `$cmd`; +print "$output\n"; +@lines = split(/\n/, $output); +$num_nodes = $#lines + 1; + +# collect the complete list of starters +my $mystarters; +$idx=1; +$mystarters = $starters[0]; +while ($idx < $#starters) { + $mystarters = $mystarters . "," . $starters[$idx]; + $idx = $idx + 1; +} + +# get the local date and time +my ($sec,$min,$hour,$day,$month,$yr19,@rest) = localtime(time); + +# start by printing out the resulting configuration +print "\n--------------------------------------------------\n"; +print "\nTest configuration:\n"; +print "\tDate:\t" . "$day-".++$month. "-".($yr19+1900) . " " . sprintf("%02d",$hour).":".sprintf("%02d",$min).":".sprintf("%02d",$sec) . "\n";; +print "\tNum nodes:\t" . $num_nodes . "\n"; +print "\tStarters:\t" . $mystarters . "\n"; print "\n--------------------------------------------------\n"; -foreach $test (@tests) { - $option = shift(@options); - if (-e $test) { - # pre-position the executable - $cmd = "mpirun -npernode 1 $test 2>&1"; - system($cmd); - $n = 1; - while ($n <= $num_nodes) { - $cmd = "time mpirun -npernode 1 -max-vm-size $n $option $test 2>&1"; - print $cmd . "\n"; - if (0 == $showme_arg) { - for (1..$reps) { - $toggle = 1; - $output = `$cmd`; - @lines = split(/\n/, $output); - foreach $line (@lines) { - if (0 <= index($line, "user") || - 0 <= index($line, "sys") || - 0 <= index($line, "real") || - 0 <= index($line, "elapsed")) { - $idx = 0; - @results = split(/\s+/,$line, 4); - foreach $res (@results) { - if ($idx < 3) { - print $res; - if (0 == $toggle) { - print " "; - $toggle = 1; - } else { - print " "; - $toggle = 0; - } + +# and tag the output file as well +if ($myresults) { + print FILE "Test configuration:\n"; + print FILE "Date:\t" . "$day-".++$month. "-".($yr19+1900) . " " . sprintf("%02d",$hour).":".sprintf("%02d",$min).":".sprintf("%02d",$sec) . "\n";; + print FILE "Num nodes:\t" . $num_nodes . "\n"; + print FILE "Starters:\t" . $mystarters . "\n"; +} + +my $index = 0; + +sub runcmd() +{ + for (1..$reps) { + $output = `$cmd`; + if ($myresults && $rawoutput) { + print FILE $n . " " . $output . "\n"; + } + @lines = split(/\n/, $output); + foreach $line (@lines) { + if (0 <= index($line, "real") || + 0 <= index($line, "elapsed")) { + # we know that at least one item of interest is + # in this line, so let's look for it - start + # by getting rid of any leading whitespace + $line =~ s/^\s+//; + @results = split (/ +/,$line); + $idx = 0; + foreach $res (@results) { + # we are only interested in the real or elapsed time + my $strloc = index($res, "real"); + if (0 <= $strloc) { + # some systems put the number in front of + # this word, and some append the word to + # the number - consider both cases + if (0 == $strloc) { + if (0 == $idx) { + # it must be in the next location + push @csvrow,$results[1]; + } else { + # it must be in the prior location + push @csvrow,$results[$idx-1]; + } + } else { + # take the portion of the string up to the tag + push @csvrow,substr($res, 0, $strloc); + } + } else { + $strloc = index($res, "elapsed"); + if (0 <= $strloc) { + # some systems put the number in front of + # this word, and some append the word to + # the number - consider both cases + if (0 == $strloc) { + if (0 == $idx) { + # it must be in the next location + push @csvrow,$results[1]; + } else { + # it must be in the prior location + push @csvrow,$results[$idx-1]; } - $idx = $idx + 1; + } else { + # take the portion of the string up to the tag + push @csvrow,substr($res, 0, $strloc); } - print "\n"; } } + $idx = $idx + 1; } - print "\n"; } - $n = 2 * $n; } - if ($n != (2 * $num_nodes)) { - $cmd = "time mpirun -npernode 1 $option $test 2>&1"; - print $cmd . "\n"; - if (0 == $showme_arg) { - for (1..$reps) { - $output = `$cmd`; - $output =~ s/(.+)\n.*/$1/; - @results = split(/\s+/,$output); - print $results[0] . " " . $results[1] . " " . $results[2] . "\n"; + } + # we have now completed all the reps, so log the results + if ($myresults) { + my $myout; + my $mycnt=0; + while ($mycnt <= $#csvrow) { + if (0 == $mycnt) { + $myout = $csvrow[$mycnt]; + } else { + $myout = $myout . "," . $csvrow[$mycnt]; + } + $mycnt = $mycnt + 1; + } + print FILE "$myout\n"; + # clear the output + @csvrow = (); + } + print "\n"; +} + +foreach $starter (@starters) { + # if we are going to use the dvm, then we + if ($starter eq "orte-submit") { + # need to start it + if (-e "dvm_uri") { + system("rm -f dvm_uri"); + } + $cmd = "orte-dvm --report-uri dvm_uri 2>&1 &"; + print $cmd . "\n"; + if (!$SHOWME) { + system($cmd); + # wait for the rendezvous file to appear + while (! -e "dvm_uri") { + sleep(1); + } + $havedvm = 1; + } + } + + if ($myresults) { + print FILE "\n\n$starter\n\n"; + } + my $testnum = 0; + foreach $test (@tests) { + $option = $options[$testnum]; + if (-e $test) { + if ($myresults) { + print FILE "#nodes,$test\n"; + } + if (!$SHOWME) { + # pre-position the executable + $cmd = $starter . $starteroptions[$index] . " $test 2>&1"; + system($cmd); + } + $n = 1; + while ($n <= $num_nodes) { + push @csvrow,$n; + $cmd = "time " . $starter . " " . $starteroptions[$index] . " -n $n $option $test 2>&1"; + print $cmd . "\n"; + if (!$SHOWME) { + runcmd(); + } + $n = 2 * $n; + } + if (0 != $num_nodes & $n) { + $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option $test 2>&1"; + print $cmd . "\n"; + if (!$SHOWME) { + runcmd(); } } + print "\n--------------------------------------------------\n"; + } else { + print "Test " . $test . " was not found - test skipped\n"; + print "\n--------------------------------------------------\n"; + } + $testnum = $testnum + 1; + } + if ($havedvm) { + if (!$SHOWME) { + $cmd = "orte-submit --hnp file:dvm_uri --terminate"; + system($cmd); + } + if (-e "dvm_uri") { + system("rm -f dvm_uri"); } - print "\n--------------------------------------------------\n"; - } else { - print "Test " . $test . " was not found - test skipped\n"; - print "\n--------------------------------------------------\n"; } + $index = $index + 1; } + +if ($myresults) { + close(FILE); +} + diff --git a/contrib/symbol-hiding.pl b/contrib/symbol-hiding.pl new file mode 100755 index 00000000000..ceafb3ba52c --- /dev/null +++ b/contrib/symbol-hiding.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ + +use strict; +use Getopt::Long; + +# globals +my $myfile; +my $mylib; +my $myprefix; +my $mysuffix; +my $mycapprefix; + +# Set to true if the script should merely check for symbols in +# the library that are not in the provided output file - useful +# for determining if something has changed prior to doing an update +my $CHECK_ONLY = 0; +# Set to true to suppress most informational messages. Only missing +# symbols will be printed. +my $QUIET = 0; +# Set to true if we just want to see the help message +my $HELP = 0; +# Set to true if we want to reverse the hiding direction +my $REVERSE = 0; + + +GetOptions( + "help" => \$HELP, + "quiet" => \$QUIET, + "check-only" => \$CHECK_ONLY, + "prefix=s" => \$myprefix, + "suffix=s" => \$mysuffix, + "lib=s" => \$mylib, + "file=s" => \$myfile, + "reverse" => \$REVERSE, +) or die "unable to parse options, stopped"; + +if ($HELP) { + print <$myfile" || die "file could not be opened"; +} +sub checkCase { + if ($_[0] =~ /^[[:upper:]]/) { + return 1; + } + else { + return 0; + } +} + +foreach my $sym (@symbols) { + my $out; + if ($REVERSE) { + # if the first char is a cap, then use the cap prefix + if (checkCase($sym)) { + $out = "#define " . $mycapprefix . $sym . $mysuffix; + } else { + $out = "#define " . $myprefix . $sym . $mysuffix; + } + } else { + $out = "#define " . $sym; + } + my $diff = $len - length($sym); + for (my $i=0; $i < $diff; $i++) { + $out = $out . " "; + } + if ($REVERSE) { + $out = $out . $sym . "\n"; + } else { + # if the first char is a cap, then use the cap prefix + if (checkCase($sym)) { + $out = $out . $mycapprefix . $sym . $mysuffix . "\n"; + } else { + $out = $out . $myprefix . $sym . $mysuffix . "\n"; + } + } + if ($myfile ne "") { + print FILE $out; + } else { + print $out; + } +} +if ($myfile ne "") { + close FILE; +} + diff --git a/examples/Hello_oshmem.java b/examples/Hello_oshmem.java deleted file mode 100644 index dc6595858a8..00000000000 --- a/examples/Hello_oshmem.java +++ /dev/null @@ -1,12 +0,0 @@ -import shmem.*; - -public class Hello_oshmem -{ - public static void main(String[] args) - { - ShMem.startPEs(0); - int nproc = ShMem.getNumPEs(); - int proc = ShMem.getMyPE(); - System.out.println("Hello, world, I am "+ proc +" of "+ nproc); - } -} diff --git a/examples/Makefile b/examples/Makefile index 372ca32d19e..92b53ddda20 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -53,8 +53,7 @@ EXAMPLES = \ hello_oshmem \ hello_oshmemfh \ Hello.class \ - Hello_oshmem.class \ - ring_c \ + ring_c \ ring_cxx \ ring_mpifh \ ring_usempi \ @@ -62,17 +61,12 @@ EXAMPLES = \ ring_oshmem \ ring_oshmemfh \ Ring.class \ - Ring_oshmem.class \ connectivity_c \ oshmem_shmalloc \ oshmem_circular_shift \ - oshmem_circular_shift.class \ oshmem_max_reduction \ - oshmem_max_reduction.class \ oshmem_strided_puts \ - oshmem_strided_puts.class \ - oshmem_symmetric_data \ - oshmem_symmetric_data.class + oshmem_symmetric_data # Default target. Always build the C MPI examples. Only build the @@ -122,14 +116,6 @@ oshmem: $(MAKE) hello_oshmemfh; \ $(MAKE) ring_oshmemfh; \ fi - @ if oshmem_info --parsable | grep bindings:java:yes >/dev/null; then \ - $(MAKE) Hello_oshmem.class; \ - $(MAKE) Ring_oshmem.class; \ - $(MAKE) oshmem_circular_shift.class; \ - $(MAKE) oshmem_max_reduction.class; \ - $(MAKE) oshmem_strided_puts.class; \ - $(MAKE) oshmem_symmetric_data.class; \ - fi # The usual "clean" target @@ -162,35 +148,23 @@ hello_oshmem: hello_oshmem_c.c $(SHMEMCC) $(CFLAGS) $? -o $@ hello_oshmemfh: hello_oshmemfh.f90 $(SHMEMFC) $(FCFLAGS) $? -o $@ -Hello_oshmem.class: Hello_oshmem.java - $(JAVAC) Hello_oshmem.java ring_oshmem: ring_oshmem_c.c $(SHMEMCC) $(CFLAGS) $? -o $@ ring_oshmemfh: ring_oshmemfh.f90 $(SHMEMFC) $(FCFLAGS) $? -o $@ -Ring_oshmem.class: Ring_oshmem.java - $(JAVAC) Ring_oshmem.java oshmem_shmalloc: oshmem_shmalloc.c $(SHMEMCC) $(CCFLAGS) $? -o $@ oshmem_circular_shift: oshmem_circular_shift.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_circular_shift.class: oshmem_circular_shift.java - $(JAVAC) oshmem_circular_shift.java oshmem_max_reduction: oshmem_max_reduction.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_max_reduction.class: oshmem_max_reduction.java - $(JAVAC) oshmem_max_reduction.java oshmem_strided_puts: oshmem_strided_puts.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_strided_puts.class: oshmem_strided_puts.java - $(JAVAC) oshmem_strided_puts.java oshmem_symmetric_data: oshmem_symmetric_data.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_symmetric_data.class: oshmem_symmetric_data.java - $(JAVAC) oshmem_symmetric_data.java diff --git a/examples/Makefile.include b/examples/Makefile.include index 2ac1d873cba..7707521c943 100644 --- a/examples/Makefile.include +++ b/examples/Makefile.include @@ -54,10 +54,4 @@ EXTRA_DIST += \ examples/oshmem_strided_puts.c \ examples/oshmem_symmetric_data.c \ examples/Hello.java \ - examples/Ring.java \ - examples/Hello_oshmem.java \ - examples/Ring_oshmem.java \ - examples/oshmem_circular_shift.java \ - examples/oshmem_max_reduction.java \ - examples/oshmem_strided_puts.java \ - examples/oshmem_symmetric_data.java + examples/Ring.java diff --git a/examples/Ring_oshmem.java b/examples/Ring_oshmem.java deleted file mode 100644 index 2bc126f12da..00000000000 --- a/examples/Ring_oshmem.java +++ /dev/null @@ -1,54 +0,0 @@ -import shmem.*; - -public class Ring_oshmem -{ - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - int nproc = ShMem.getNumPEs(); - int proc = ShMem.getMyPE(); - Addr rbuf = new Addr(4); // One integer value. - rbuf.putInt(-1); - int message = 10; - - // Calculate the PE number of the next process in the ring. Use the - // modulus operator so that the last process "wraps around" to PE 0. - int next = (proc + 1) % nproc; - - if(proc == 0) - { - System.out.println("Process 0 puts message "+ message +" to "+ - next +" ("+ nproc +" processes in ring)"); - - rbuf.putInt(message, next); - } - - // Pass the message around the ring. The exit mechanism works as - // follows: the message (a positive integer) is passed around the - // ring. Each time it passes PE 0, it is decremented. When each - // processes receives a message containing a 0 value, it passes the - // message on to the next process and then quits. By passing the 0 - // message first, every process gets the 0 message and can quit - // normally. - - while(message > 0) - { - rbuf.waitUntilInt(ShMem.CMP_EQ, message); - - if(proc == 0) - { - message--; - System.out.println("Process 0 decremented value: "+ message); - } - - rbuf.putInt(message, next); - - if(proc != 0) - message--; - } - - // All done - rbuf.free(); - System.out.println("Process "+ proc +" exiting"); - } -} diff --git a/examples/hello_oshmem_c.c b/examples/hello_oshmem_c.c index 079e5d49704..6bb0d08c062 100644 --- a/examples/hello_oshmem_c.c +++ b/examples/hello_oshmem_c.c @@ -12,16 +12,25 @@ #include #include "shmem.h" +#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200 +#error This application uses API 1.2 and up +#endif + int main(int argc, char* argv[]) { int proc, nproc; + char name[SHMEM_MAX_NAME_LEN]; + int major, minor; - start_pes(0); - nproc = _num_pes(); - proc = _my_pe(); + shmem_init(); + nproc = shmem_n_pes(); + proc = shmem_my_pe(); + shmem_info_get_name(name); + shmem_info_get_version(&major, &minor); - printf("Hello, world, I am %d of %d\n", - proc, nproc); + printf("Hello, world, I am %d of %d: %s (version: %d.%d)\n", + proc, nproc, name, major, minor); + shmem_finalize(); return 0; } diff --git a/examples/hello_oshmemfh.f90 b/examples/hello_oshmemfh.f90 index 283697bad30..09404c3bb8a 100644 --- a/examples/hello_oshmemfh.f90 +++ b/examples/hello_oshmemfh.f90 @@ -13,11 +13,17 @@ program hello_oshmem include 'shmem.fh' integer proc, nproc - integer my_pe, num_pes + integer shmem_my_pe, shmem_n_pes + integer major, minor, len + character(len=SHMEM_MAX_NAME_LEN) name - call START_PES(0) - proc = MY_PE() - nproc = NUM_PES() + call SHMEM_INIT() + proc = SHMEM_MY_PE() + nproc = SHMEM_N_PES() + call SHMEM_INFO_GET_VERSION(major, minor) + call SHMEM_INFO_GET_NAME(name) + + write(*, '("Hello, world, I am ", i2, " of ", i2, ": (version: ", i0, ".", i0, ")")') proc, nproc, major, minor + call SHMEM_FINALIZE() - write(*, '("Hello, world, I am ", i2, " of ", i2)') proc, nproc end program hello_oshmem diff --git a/examples/oshmem_circular_shift.c b/examples/oshmem_circular_shift.c index cfd6c0aa8b8..f3e409e492d 100644 --- a/examples/oshmem_circular_shift.c +++ b/examples/oshmem_circular_shift.c @@ -11,6 +11,8 @@ #include #include +#warning This application uses deprecated API see http://www.open-mpi.org/ + int main (void) { static int aaa, bbb; diff --git a/examples/oshmem_circular_shift.java b/examples/oshmem_circular_shift.java deleted file mode 100644 index 776d166d270..00000000000 --- a/examples/oshmem_circular_shift.java +++ /dev/null @@ -1,24 +0,0 @@ -import shmem.*; - -public class oshmem_circular_shift -{ - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - - int numPEs = ShMem.getNumPEs(), - myPE = ShMem.getMyPE(), - peer = (myPE + 1) % numPEs; - - int[] aaa = new int[1]; - Addr bbb = new Addr(4); - - System.out.println("Process "+ myPE +" gets message from "+ - peer +" ("+ numPEs +" processes in ring)"); - - bbb.getInt(aaa, peer); - ShMem.barrierAll(); - bbb.free(); - System.out.println("Process "+ myPE +" exiting"); - } -} diff --git a/examples/oshmem_max_reduction.c b/examples/oshmem_max_reduction.c index 2f0139313a3..1bf026292ea 100644 --- a/examples/oshmem_max_reduction.c +++ b/examples/oshmem_max_reduction.c @@ -15,6 +15,8 @@ #include +#warning This application uses deprecated API see http://www.open-mpi.org/ + long pSync[_SHMEM_BCAST_SYNC_SIZE]; #define N 3 diff --git a/examples/oshmem_max_reduction.java b/examples/oshmem_max_reduction.java deleted file mode 100644 index 499427d656f..00000000000 --- a/examples/oshmem_max_reduction.java +++ /dev/null @@ -1,43 +0,0 @@ -import shmem.*; -import java.nio.*; - -public class oshmem_max_reduction -{ - private static final int N = 3; - - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - - int numPEs = ShMem.getNumPEs(), - myPE = ShMem.getMyPE(); - - Addr src = new Addr(8 * N), // long is 8 bytes. - dst = new Addr(8 * N), - pWrk = new Addr(8 * ShMem.REDUCE_SYNC_SIZE); - PSync pSync = new PSync(ShMem.BCAST_SYNC_SIZE); - - LongBuffer srcBuf = src.asLongBuffer(), - dstBuf = dst.asLongBuffer(); - - for(int i = 0; i < N; i++) - srcBuf.put(i, myPE + i); - - ShMem.barrierAll(); - dst.maxToAllLong(src, N, 0, 0, numPEs, pWrk, pSync); - - StringBuilder sb = new StringBuilder(); - sb.append(myPE +"/"+ numPEs +" dst ="); - - for(int i = 0; i < N; i++) - sb.append(" "+ dstBuf.get(i)); - - sb.append("\n"); - System.out.print(sb); - - src.free(); - dst.free(); - pWrk.free(); - pSync.free(); - } -} diff --git a/examples/oshmem_shmalloc.c b/examples/oshmem_shmalloc.c index bc31d95dfaa..2c3f4cb6742 100644 --- a/examples/oshmem_shmalloc.c +++ b/examples/oshmem_shmalloc.c @@ -15,6 +15,8 @@ #include +#warning This application uses deprecated API see http://www.open-mpi.org/ + int main(void) { long *x; diff --git a/examples/oshmem_strided_puts.c b/examples/oshmem_strided_puts.c index 839417ff933..5ec24419779 100644 --- a/examples/oshmem_strided_puts.c +++ b/examples/oshmem_strided_puts.c @@ -27,6 +27,8 @@ #include #include +#warning This application uses deprecated API see http://www.open-mpi.org/ + int main(void) { short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; diff --git a/examples/oshmem_strided_puts.java b/examples/oshmem_strided_puts.java deleted file mode 100644 index 0213213e2ef..00000000000 --- a/examples/oshmem_strided_puts.java +++ /dev/null @@ -1,33 +0,0 @@ -import shmem.*; -import java.nio.*; - -public class oshmem_strided_puts -{ - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - int me = ShMem.getMyPE(); - - short[] source = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - Addr target = new Addr(2 * 10); // short is 2 bytes. - - if(me == 0) - { - /* put 10 words into target on PE 1 */ - target.iPutShort(source, 1, 2, 5, 1); - } - - ShMem.barrierAll(); // sync sender and receiver - - if(me == 1) - { - ShortBuffer buf = target.asShortBuffer(); - System.out.printf("target on PE %d is %d %d %d %d %d\n", me, - buf.get(0), buf.get(1), buf.get(2), - buf.get(3), buf.get(4)); - } - - ShMem.barrierAll(); // sync before exiting - target.free(); - } -} diff --git a/examples/oshmem_symmetric_data.c b/examples/oshmem_symmetric_data.c index 2234e2e06ea..9aaaeabac1a 100644 --- a/examples/oshmem_symmetric_data.c +++ b/examples/oshmem_symmetric_data.c @@ -11,6 +11,8 @@ #include #include +#warning This application uses deprecated API see http://www.open-mpi.org/ + #define SIZE 16 int main(int argc, char* argv[]) diff --git a/examples/oshmem_symmetric_data.java b/examples/oshmem_symmetric_data.java deleted file mode 100644 index d21ef48955e..00000000000 --- a/examples/oshmem_symmetric_data.java +++ /dev/null @@ -1,49 +0,0 @@ -import shmem.*; -import java.nio.*; - -public class oshmem_symmetric_data -{ - private static final int SIZE = 16; - - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - - int numPE = ShMem.getNumPEs(), - myPE = ShMem.getMyPE(); - - int[] source = new int[SIZE]; - Addr target = new Addr(4 * SIZE); // int is 4 bytes - - if(myPE == 0) - { - // initialize array - for(int i = 0; i < SIZE; i++) - source[i] = i; - - // local, not symmetric - // static makes it symmetric - // put "size" words into target on each PE - for(int i = 1; i < numPE; i++) - target.putInt(source, i); - } - - ShMem.barrierAll(); // sync sender and receiver - - if(myPE != 0) - { - StringBuilder sb = new StringBuilder(); - sb.append("Target on PE "+ myPE +" is \t"); - IntBuffer buf = target.asIntBuffer(); - - for(int i = 0; i < SIZE; i++) - sb.append(buf.get(i) +" \t"); - - sb.append('\n'); - System.out.print(sb); - } - - ShMem.barrierAll(); // sync before exiting - target.free(); - } -} diff --git a/examples/ring_oshmem_c.c b/examples/ring_oshmem_c.c index 0823b5e1fc7..5fe1c7f65dc 100644 --- a/examples/ring_oshmem_c.c +++ b/examples/ring_oshmem_c.c @@ -11,15 +11,19 @@ #include #include +#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200 +#error This application uses API 1.2 and up +#endif + int main (int argc, char * argv[]) { static int rbuf = -1; int proc, nproc, next; int message = 10; - start_pes(0); - proc = _my_pe(); - nproc = _num_pes(); + shmem_init(); + nproc = shmem_n_pes(); + proc = shmem_my_pe(); /* Calculate the PE number of the next process in the ring. Use the modulus operator so that the last process "wraps around" to PE 0. */ @@ -51,6 +55,7 @@ int main (int argc, char * argv[]) --message; } } + shmem_finalize(); /* All done */ diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 538f4058a99..1f34701e037 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -18,9 +18,10 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -148,7 +149,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, local_size = ompi_group_size (local_group); } - if (NULL != remote_group) { + if ( (NULL != remote_group) && (&ompi_mpi_group_null.group != remote_group) ) { remote_size = ompi_group_size (remote_group); } @@ -172,15 +173,14 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } else { newcomm->c_local_group = local_group; OBJ_RETAIN(newcomm->c_local_group); - ompi_group_increment_proc_count(newcomm->c_local_group); } newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank; /* Set remote group and duplicate the local comm, if applicable */ - if (0 < remote_size) { + if ( NULL != remote_group ) { ompi_communicator_t *old_localcomm; - if (NULL == remote_group || &ompi_mpi_group_null.group == remote_group) { + if (&ompi_mpi_group_null.group == remote_group) { ret = ompi_group_incl(oldcomm->c_remote_group, remote_size, remote_ranks, &newcomm->c_remote_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -189,7 +189,6 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } else { newcomm->c_remote_group = remote_group; OBJ_RETAIN(newcomm->c_remote_group); - ompi_group_increment_proc_count(newcomm->c_remote_group); } newcomm->c_flags |= OMPI_COMM_INTER; @@ -256,9 +255,6 @@ int ompi_comm_group ( ompi_communicator_t* comm, ompi_group_t **group ) /* increment reference counters for the group */ OBJ_RETAIN(comm->c_local_group); - /* increase also the reference counter for the procs */ - ompi_group_increment_proc_count(comm->c_local_group); - *group = comm->c_local_group; return OMPI_SUCCESS; } @@ -278,6 +274,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, int *allranks=NULL; int *rranks=NULL; int rc = OMPI_SUCCESS; + ompi_group_t *remote_group = NULL; /* silence clang warning. newcomm should never be NULL */ if (OPAL_UNLIKELY(NULL == newcomm)) { @@ -286,6 +283,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, if ( OMPI_COMM_IS_INTER(comm) ) { int tsize; + remote_group = &ompi_mpi_group_null.group; tsize = ompi_comm_remote_size(comm); allranks = (int *) malloc ( tsize * sizeof(int)); @@ -348,8 +346,8 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, comm->error_handler, /* error handler */ false, /* dont copy the topo */ group, /* local group */ - NULL /* remote group */ - ); + remote_group); /* remote group */ + if ( OMPI_SUCCESS != rc ) { goto exit; } @@ -432,7 +430,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, int rc=OMPI_SUCCESS; ompi_communicator_t *newcomp = NULL; int *lranks=NULL, *rranks=NULL; - ompi_group_t * local_group=NULL, * remote_group=NULL; + ompi_group_t * local_group=NULL, *remote_group=NULL; ompi_comm_allgatherfct *allgatherfct=NULL; @@ -570,8 +568,6 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, goto exit; } - ompi_group_increment_proc_count(local_group); - mode = OMPI_COMM_CID_INTER; } else { rranks = NULL; @@ -603,7 +599,6 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, } if ( inter ) { - ompi_group_decrement_proc_count (local_group); OBJ_RELEASE(local_group); if (NULL != newcomp->c_local_comm) { snprintf(newcomp->c_local_comm->c_name, MPI_MAX_OBJECT_NAME, @@ -704,7 +699,7 @@ static int ompi_comm_split_type_get_part (ompi_group_t *group, int *results, int int include = false; if (ompi_proc_is_sentinel (proc)) { - opal_process_name_t proc_name = ompi_proc_sentinel_to_name ((intptr_t) proc); + opal_process_name_t proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc); u16ptr = &locality; @@ -1988,27 +1983,21 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, count on the proc pointers This is just a quick fix, and will be looking for a better solution */ - OBJ_RELEASE( comm->c_local_group ); - /* silence clang warning about a NULL pointer dereference */ - assert (NULL != comm->c_local_group); - OBJ_RELEASE( comm->c_local_group ); - - /* allocate a group structure for the new communicator */ - comm->c_local_group = ompi_group_allocate(num_procs); + if (comm->c_local_group) { + OBJ_RELEASE( comm->c_local_group ); + } - /* free the malloced proc pointers */ - free(comm->c_local_group->grp_proc_pointers); + if (comm->c_remote_group) { + OBJ_RELEASE( comm->c_remote_group ); + } - /* set the group information */ - comm->c_local_group->grp_proc_pointers = proc_pointers; + /* allocate a group structure for the new communicator */ + comm->c_local_group = ompi_group_allocate_plist_w_procs (proc_pointers, num_procs); /* set the remote group to be the same as local group */ comm->c_remote_group = comm->c_local_group; OBJ_RETAIN( comm->c_remote_group ); - /* retain these proc pointers */ - ompi_group_increment_proc_count(comm->c_local_group); - /* set the rank information */ comm->c_local_group->grp_my_rank = my_rank; comm->c_my_rank = my_rank; diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index a7f302bbd4c..b2200bdb71e 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -425,7 +425,6 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) } if (NULL != comm->c_local_group) { - ompi_group_decrement_proc_count (comm->c_local_group); OBJ_RELEASE ( comm->c_local_group ); comm->c_local_group = NULL; if ( OMPI_COMM_IS_INTRA(comm) ) { @@ -438,7 +437,6 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) } if (NULL != comm->c_remote_group) { - ompi_group_decrement_proc_count (comm->c_remote_group); OBJ_RELEASE ( comm->c_remote_group ); comm->c_remote_group = NULL; } diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 1d189287626..5a62ac59096 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -11,9 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -481,39 +481,63 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, { ompi_datatype_args_t* args = (ompi_datatype_args_t*)datatype->args; int next_index = OMPI_DATATYPE_MAX_PREDEFINED; + void *packed_description = datatype->packed_description; void* recursive_buffer; - if( NULL == datatype->packed_description ) { - if( ompi_datatype_is_predefined(datatype) ) { - datatype->packed_description = malloc(2 * sizeof(int)); - } else if( NULL == args ) { - return OMPI_ERROR; + if (NULL == packed_description) { + if (opal_atomic_cmpset (&datatype->packed_description, NULL, (void *) 1)) { + if( ompi_datatype_is_predefined(datatype) ) { + packed_description = malloc(2 * sizeof(int)); + } else if( NULL == args ) { + return OMPI_ERROR; + } else { + packed_description = malloc(args->total_pack_size); + } + recursive_buffer = packed_description; + __ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index ); + + if (!ompi_datatype_is_predefined(datatype)) { + args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char *) packed_description); + } + + opal_atomic_wmb (); + datatype->packed_description = packed_description; } else { - datatype->packed_description = malloc(args->total_pack_size); + /* another thread beat us to it */ + packed_description = datatype->packed_description; } - recursive_buffer = datatype->packed_description; - __ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index ); - if( !ompi_datatype_is_predefined(datatype) ) { - args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char*)datatype->packed_description); + } + + if ((void *) 1 == packed_description) { + struct timespec interval = {.tv_sec = 0, .tv_nsec = 1000}; + + /* wait until the packed description is updated */ + while ((void *) 1 == datatype->packed_description) { + nanosleep (&interval, NULL); } + + packed_description = datatype->packed_description; } - *packed_buffer = (const void*)datatype->packed_description; + *packed_buffer = (const void *) packed_description; return OMPI_SUCCESS; } size_t ompi_datatype_pack_description_length( ompi_datatype_t* datatype ) { + void *packed_description = datatype->packed_description; + if( ompi_datatype_is_predefined(datatype) ) { return 2 * sizeof(int); } - if( NULL == datatype->packed_description ) { + if( NULL == packed_description || (void *) 1 == packed_description) { const void* buf; int rc; rc = ompi_datatype_get_pack_description(datatype, &buf); - if( OMPI_SUCCESS != rc ) + if( OMPI_SUCCESS != rc ) { return 0; + } } assert( NULL != (ompi_datatype_args_t*)datatype->args ); assert( NULL != (ompi_datatype_args_t*)datatype->packed_description ); @@ -749,7 +773,8 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* break; /******************************************************************/ case MPI_COMBINER_RESIZED: - /*ompi_datatype_set_args( datatype, 0, NULL, 2, a, 1, d, MPI_COMBINER_RESIZED );*/ + ompi_datatype_create_resized(d[0], a[0], a[1], &datatype); + ompi_datatype_set_args( datatype, 0, NULL, 2, a, 1, d, MPI_COMBINER_RESIZED ); break; /******************************************************************/ case MPI_COMBINER_HINDEXED_BLOCK: diff --git a/ompi/datatype/ompi_datatype_create_darray.c b/ompi/datatype/ompi_datatype_create_darray.c index 846566a8eb1..56caa2a8b05 100644 --- a/ompi/datatype/ompi_datatype_create_darray.c +++ b/ompi/datatype/ompi_datatype_create_darray.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -247,36 +247,33 @@ int32_t ompi_datatype_create_darray(int size, } - /* set displacement and UB correctly. Please read the comment in subarray */ + /** + * We need to shift the content (useful data) of the datatype, so + * we need to force the displacement to be moved. Therefore, we + * cannot use resize as it will only set the soft lb and ub + * markers without moving the data. Instead, we have to create a + * new data, and insert the last_Type with the correct + * displacement. + */ { - ptrdiff_t displs[3], tmp_size; - ompi_datatype_t *types[3]; - int blength[3] = { 1, 1, 1}; + ptrdiff_t displs[2], tmp_size = 1; - displs[1] = st_offsets[start_loop]; - tmp_size = 1; - for (i = start_loop + step ; i != end_loop ; i += step) { + displs[0] = st_offsets[start_loop]; + displs[1] = orig_extent; + for (i = start_loop + step; i != end_loop; i += step) { tmp_size *= gsize_array[i - step]; - displs[1] += tmp_size * st_offsets[i]; + displs[0] += tmp_size * st_offsets[i]; + displs[1] *= gsize_array[i]; } + displs[0] *= orig_extent; - displs[0] = 0; - displs[1] *= orig_extent; - displs[2] = orig_extent; - for (i = 0 ; i < ndims ; i++) { - displs[2] *= gsize_array[i]; - } - if(oldtype->super.flags & (OPAL_DATATYPE_FLAG_USER_LB | OPAL_DATATYPE_FLAG_USER_UB) ) { - types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB; - - rc = ompi_datatype_create_struct(3, blength, displs, types, newtype); - } else { - ompi_datatype_create_resized(lastType, displs[1], displs[2], newtype); - } + *newtype = ompi_datatype_create(lastType->super.desc.used); + rc = ompi_datatype_add(*newtype, lastType, 1, displs[0], displs[1]); ompi_datatype_destroy(&lastType); + opal_datatype_resize( &(*newtype)->super, 0, displs[1] ); /* need to destroy the old type even in error condition, so don't check return code from above until after cleanup. */ - if (MPI_SUCCESS != rc) goto cleanup; + if (MPI_SUCCESS != rc) newtype = NULL; } cleanup: diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index 9a75bca7a0b..9311eac7972 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,9 +38,7 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE extent; if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } disp = pDisp[0]; @@ -113,12 +113,13 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { - *newType = ompi_datatype_create(1); - if( 0 == count ) - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0 ); - else + if( 0 == count ) { + return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); + } else { + *newType = ompi_datatype_create(1); ompi_datatype_add( *newType, oldType, 0, pDisp[0] * extent, extent ); - return OMPI_SUCCESS; + return OMPI_SUCCESS; + } } pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; diff --git a/ompi/datatype/ompi_datatype_create_subarray.c b/ompi/datatype/ompi_datatype_create_subarray.c index 598b0ff3600..72412e559de 100644 --- a/ompi/datatype/ompi_datatype_create_subarray.c +++ b/ompi/datatype/ompi_datatype_create_subarray.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -13,7 +13,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -36,7 +36,7 @@ int32_t ompi_datatype_create_subarray(int ndims, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) { - MPI_Datatype last_type; + ompi_datatype_t *last_type; int32_t i, step, end_loop; MPI_Aint size, displ, extent; @@ -91,30 +91,18 @@ int32_t ompi_datatype_create_subarray(int ndims, } replace_subarray_type: - /* - * Resized will only set the soft lb and ub markers without moving the real - * data inside. Thus, in case the original data contains the hard markers - * (MPI_LB or MPI_UB) we must force the displacement of the data upward to - * the right position AND set the hard markers LB and UB. - * - * NTH: ompi_datatype_create_resized() does not do enough for the general - * pack/unpack functions to work correctly. Until this is fixed always use - * ompi_datatype_create_struct(). Once this is fixed remove 1 || below. To - * verify that the regression is fixed run the subarray test in the Open MPI - * ibm testsuite. - */ - if(1 || oldtype->super.flags & (OPAL_DATATYPE_FLAG_USER_LB | OPAL_DATATYPE_FLAG_USER_UB) ) { - MPI_Aint displs[3]; - MPI_Datatype types[3]; - int blength[3] = { 1, 1, 1 }; - - displs[0] = 0; displs[1] = displ * extent; displs[2] = size * extent; - types[0] = MPI_LB; types[1] = last_type; types[2] = MPI_UB; - ompi_datatype_create_struct( 3, blength, displs, types, newtype ); - } else { - ompi_datatype_create_resized(last_type, displ * extent, size * extent, newtype); - } + /** + * We need to shift the content (useful data) of the datatype, so + * we need to force the displacement to be moved. Therefore, we + * cannot use resize as it will only set the soft lb and ub + * markers without moving the data. Instead, we have to create a + * new data, and insert the last_Type with the correct + * displacement. + */ + *newtype = ompi_datatype_create( last_type->super.desc.used ); + ompi_datatype_add( *newtype, last_type, 1, displ * extent, size * extent); ompi_datatype_destroy( &last_type ); + opal_datatype_resize( &(*newtype)->super, 0, size * extent ); return OMPI_SUCCESS; } diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index ab6c3d49229..7b97f902096 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -16,7 +16,7 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -167,7 +167,13 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, dense = false; } for (i=0; i < size; i++) { - rc = opal_convert_process_name_to_string(&nstring, &(proc_list[i]->super.proc_name)); + opal_process_name_t proc_name; + if (ompi_proc_is_sentinel (proc_list[i])) { + proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc_list[i]); + } else { + proc_name = proc_list[i]->super.proc_name; + } + rc = opal_convert_process_name_to_string(&nstring, &proc_name); if (OPAL_SUCCESS != rc) { if (!dense) { free(proc_list); @@ -177,7 +183,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, } opal_argv_append_nosize(&members, nstring); free(nstring); - if (NULL == (nstring = (char*)opal_pmix.get_nspace(proc_list[i]->super.proc_name.jobid))) { + if (NULL == (nstring = (char*)opal_pmix.get_nspace(proc_name.jobid))) { opal_argv_free(members); free (proc_list); return OMPI_ERR_NOT_SUPPORTED; @@ -372,7 +378,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, opal_list_append(&ilist, &cd->super); } /* either way, add to the remote list */ - cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); cd->p = proc; opal_list_append(&rlist, &cd->super); } @@ -397,24 +403,19 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, i = 0; OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) { opal_value_t *kv; - new_proc_list[i] = cd->p; - /* set the locality */ - new_proc_list[i]->super.proc_flags = OPAL_PROC_NON_LOCAL; - /* have to save it for later */ + proc = cd->p; + new_proc_list[i] = proc ; + /* ompi_proc_complete_init_single() initializes and optionally retrieves + * OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without + * them, we are just fine */ + ompi_proc_complete_init_single(proc); + /* save the locality for later */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY); kv->type = OPAL_UINT16; - kv->data.uint16 = OPAL_PROC_NON_LOCAL; - opal_pmix.store_local(&cd->p->super.proc_name, kv); + kv->data.uint16 = proc->super.proc_flags; + opal_pmix.store_local(&proc->super.proc_name, kv); OBJ_RELEASE(kv); // maintain accounting - /* we can retrieve the hostname at no cost because it - * was provided at connect */ - OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_HOSTNAME, &new_proc_list[i]->super.proc_name, - (char**)&(new_proc_list[i]->super.proc_hostname), OPAL_STRING); - if (OPAL_SUCCESS != rc) { - /* we can live without it */ - new_proc_list[i]->super.proc_hostname = NULL; - } ++i; } /* call add_procs on the new ones */ @@ -441,12 +442,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, i=0; OPAL_LIST_FOREACH(cd, &rlist, ompi_dpm_proct_caddy_t) { new_group_pointer->grp_proc_pointers[i++] = cd->p; + /* retain the proc */ + OBJ_RETAIN(cd->p); } OPAL_LIST_DESTRUCT(&rlist); - /* increment proc reference counters */ - ompi_group_increment_proc_count(new_group_pointer); - /* set up communicator structure */ rc = ompi_comm_set ( &newcomp, /* new comm */ comm, /* old comm */ @@ -465,7 +465,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, goto exit; } - ompi_group_decrement_proc_count (new_group_pointer); OBJ_RELEASE(new_group_pointer); new_group_pointer = MPI_GROUP_NULL; @@ -515,56 +514,42 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers) int i; opal_namelist_t *nm, *n2; ompi_proc_t *proct; + opal_process_name_t proc_name; - if (OMPI_GROUP_IS_DENSE(group)) { - for (i=0; i < group->grp_proc_count; i++) { - if (NULL == (proct = group->grp_proc_pointers[i])) { - OMPI_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return OMPI_ERR_NOT_FOUND; - } - /* add to the list of peers */ - nm = OBJ_NEW(opal_namelist_t); - nm->name = proct->super.proc_name; - /* need to maintain an ordered list to ensure the tracker signatures - * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { - if (opal_compare_proc(nm->name, n2->name) < 0) { - opal_list_insert_pos(peers, &n2->super, &nm->super); - nm = NULL; - break; - } - } - if (NULL != nm) { - /* append to the end */ - opal_list_append(peers, &nm->super); - } + for (i=0; i < group->grp_proc_count; i++) { + if (OMPI_GROUP_IS_DENSE(group)) { + proct = group->grp_proc_pointers[i]; + } else { + proct = ompi_group_peer_lookup(group, i); } - } else { - for (i=0; i < group->grp_proc_count; i++) { - /* lookup this proc_t to get the process name */ - if (NULL == (proct = ompi_group_peer_lookup(group, i))) { - OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); - return OMPI_ERR_NOT_FOUND; - } - /* add to the list of peers */ - nm = OBJ_NEW(opal_namelist_t); - nm->name = proct->super.proc_name; - /* need to maintain an ordered list to ensure the tracker signatures - * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { - if (opal_compare_proc(nm->name, n2->name) < 0) { - opal_list_insert_pos(peers, &n2->super, &nm->super); - nm = NULL; - break; - } - } - if (NULL != nm) { - /* append to the end */ - opal_list_append(peers, &nm->super); + if (NULL == proct) { + OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); + return OMPI_ERR_NOT_FOUND; + } + if (ompi_proc_is_sentinel (proct)) { + proc_name = ompi_proc_sentinel_to_name ((uintptr_t)proct); + } else { + proc_name = proct->super.proc_name; + } + + /* add to the list of peers */ + nm = OBJ_NEW(opal_namelist_t); + nm->name = proc_name; + /* need to maintain an ordered list to ensure the tracker signatures + * match across all procs */ + OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { + if (opal_compare_proc(nm->name, n2->name) < 0) { + opal_list_insert_pos(peers, &n2->super, &nm->super); + nm = NULL; + break; } } + if (NULL != nm) { + /* append to the end */ + opal_list_append(peers, &nm->super); + } } - return ORTE_SUCCESS; + return OMPI_SUCCESS; } int ompi_dpm_disconnect(ompi_communicator_t *comm) diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index d7fee7be54f..5c832cc597d 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/errhandler/errhandler_predefined.h" #include "opal/class/opal_pointer_array.h" +#include "opal/mca/pmix/pmix.h" /* @@ -37,6 +39,10 @@ */ opal_pointer_array_t ompi_errhandler_f_to_c_table = {{0}}; +/* + * default errhandler id + */ +static int default_errhandler_id = -1; /* * Class information @@ -157,6 +163,7 @@ int ompi_errhandler_finalize(void) /* JMS Add stuff here checking for unreleased errorhandlers, similar to communicators, info handles, etc. */ + opal_pmix.deregister_errhandler(default_errhandler_id, NULL, NULL); /* Remove errhandler F2C table */ @@ -169,7 +176,7 @@ int ompi_errhandler_finalize(void) ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, - ompi_errhandler_generic_handler_fn_t *func, + ompi_errhandler_generic_handler_fn_t *func, ompi_errhandler_lang_t lang) { ompi_errhandler_t *new_errhandler; @@ -213,20 +220,33 @@ ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, return new_errhandler; } +/* registration callback */ +void ompi_errhandler_registration_callback(int status, + int errhandler_ref, + void *cbdata) +{ + ompi_errhandler_errtrk_t *errtrk = (ompi_errhandler_errtrk_t*)cbdata; + + default_errhandler_id = errhandler_ref; + errtrk->status = status; + errtrk->active = false; +} + /** - * Default runtime errhandler callback + * Default errhandler callback */ -int ompi_errhandler_runtime_callback(opal_pointer_array_t *errors) { - ompi_rte_error_report_t *err; - int errcode = 1; - - if (NULL != errors && - (NULL != (err = (ompi_rte_error_report_t*)opal_pointer_array_get_item(errors, 0)))) { - errcode = err->errcode; +void ompi_errhandler_callback(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, + void *cbdata) +{ + /* allow the caller to release its data */ + if (NULL != cbfunc) { + cbfunc(cbdata); } - - ompi_mpi_abort(MPI_COMM_WORLD, errcode); - return OMPI_SUCCESS; + /* our default action is to abort */ + ompi_mpi_abort(MPI_COMM_WORLD, status); } /************************************************************************** diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index 647836cd67b..0063b6541be 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +31,7 @@ #include "opal/prefetch.h" #include "opal/class/opal_object.h" #include "opal/class/opal_pointer_array.h" +#include "opal/mca/pmix/pmix.h" #include "ompi/mca/rte/rte.h" #include "ompi/runtime/mpiruntime.h" @@ -364,29 +366,28 @@ struct ompi_request_t; ompi_errhandler_lang_t language); /** - * Callback function from runtime layer to alert the MPI layer of an error at - * the runtime layer. - * - * @param errors A pointer array containing structs of type - * ompi_rte_error_report_t that consists of at least - * { - * ompi_process_name_t proc; - * int errcode; - * } - * Each RTE is allowed to add additional information - * as required + * Callback function to alert the MPI layer of an error or notification + * from the internal RTE and/or the resource manager. * * This function is used to alert the MPI layer to a specific fault detected by the - * runtime layer. This could be a process failure, a lost connection, or the inability + * runtime layer or host RM. This could be a process failure, a lost connection, or the inability * to send an OOB message. The MPI layer has the option to perform whatever actions it * needs to stabilize itself and continue running, abort, etc. - * - * Upon completion, the error handler should return OMPI_SUCCESS if the error has - * been resolved and no further callbacks are to be executed. Return of any other - * value will cause the RTE to continue executing error callbacks. */ -OMPI_DECLSPEC int ompi_errhandler_runtime_callback(opal_pointer_array_t *errors); - +typedef struct { + volatile bool active; + int status; +} ompi_errhandler_errtrk_t; + +OMPI_DECLSPEC void ompi_errhandler_callback(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, + void *cbdata); + +OMPI_DECLSPEC void ompi_errhandler_registration_callback(int status, + int errhandler_ref, + void *cbdata); /** * Check to see if an errhandler is intrinsic. * diff --git a/ompi/group/group.c b/ompi/group/group.c index e5e2540fd71..d88996590de 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -37,7 +37,6 @@ int ompi_group_free ( ompi_group_t **group ) ompi_group_t *l_group; l_group = (ompi_group_t *) *group; - ompi_group_decrement_proc_count (l_group); OBJ_RELEASE(l_group); *group = MPI_GROUP_NULL; diff --git a/ompi/group/group.h b/ompi/group/group.h index 2c83eaea188..c4ff03b6847 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -16,6 +16,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -153,6 +155,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_group_t *ompi_mpi_group_null_addr; * @return Pointer to new group structure */ OMPI_DECLSPEC ompi_group_t *ompi_group_allocate(int group_size); +ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_size); ompi_group_t *ompi_group_allocate_sporadic(int group_size); ompi_group_t *ompi_group_allocate_strided(void); ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size); @@ -351,7 +354,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, /* replace sentinel value with an actual ompi_proc_t */ ompi_proc_t *real_proc = - (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((intptr_t) proc)); + (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc)); if (opal_atomic_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) { OBJ_RETAIN(real_proc); diff --git a/ompi/group/group_init.c b/ompi/group/group_init.c index 5352493c4f3..997aebb213b 100644 --- a/ompi/group/group_init.c +++ b/ompi/group/group_init.c @@ -55,6 +55,24 @@ ompi_predefined_group_t *ompi_mpi_group_null_addr = &ompi_mpi_group_null; * Allocate a new group structure */ ompi_group_t *ompi_group_allocate(int group_size) +{ + /* local variables */ + ompi_proc_t **procs = calloc (group_size, sizeof (ompi_proc_t *)); + ompi_group_t *new_group; + + if (NULL == procs) { + return NULL; + } + + new_group = ompi_group_allocate_plist_w_procs (procs, group_size); + if (NULL == new_group) { + free (procs); + } + + return new_group; +} + +ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_size) { /* local variables */ ompi_group_t * new_group = NULL; @@ -65,28 +83,19 @@ ompi_group_t *ompi_group_allocate(int group_size) new_group = OBJ_NEW(ompi_group_t); if (NULL == new_group) { - goto error_exit; + return NULL; } if (0 > new_group->grp_f_to_c_index) { OBJ_RELEASE (new_group); - new_group = NULL; - goto error_exit; + return NULL; } /* * Allocate array of (ompi_proc_t *)'s, one for each * process in the group. */ - new_group->grp_proc_pointers = (struct ompi_proc_t **) - malloc(sizeof(struct ompi_proc_t *) * group_size); - - if (NULL == new_group->grp_proc_pointers) { - /* grp_proc_pointers allocation failed */ - OBJ_RELEASE (new_group); - new_group = NULL; - goto error_exit; - } + new_group->grp_proc_pointers = procs; /* set the group size */ new_group->grp_proc_count = group_size; @@ -95,8 +104,8 @@ ompi_group_t *ompi_group_allocate(int group_size) new_group->grp_my_rank = MPI_UNDEFINED; OMPI_GROUP_SET_DENSE(new_group); - error_exit: - /* return */ + ompi_group_increment_proc_count (new_group); + return new_group; } @@ -266,6 +275,8 @@ static void ompi_group_destruct(ompi_group_t *group) the proc counts are not increased during the constructor, either). */ + ompi_group_decrement_proc_count (group); + /* release thegrp_proc_pointers memory */ if (NULL != group->grp_proc_pointers) { free(group->grp_proc_pointers); @@ -284,7 +295,6 @@ static void ompi_group_destruct(ompi_group_t *group) } if (NULL != group->grp_parent_group_ptr){ - ompi_group_decrement_proc_count(group->grp_parent_group_ptr); OBJ_RELEASE(group->grp_parent_group_ptr); } diff --git a/ompi/group/group_plist.c b/ompi/group/group_plist.c index bb3a271036a..62007154f3b 100644 --- a/ompi/group/group_plist.c +++ b/ompi/group/group_plist.c @@ -14,6 +14,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,7 +63,7 @@ static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, con { if (OPAL_UNLIKELY(ompi_proc_is_sentinel (group->grp_proc_pointers[peer_id]))) { ompi_proc_t *proc = - (ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id])); + (ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((uintptr_t) group->grp_proc_pointers[peer_id])); if (NULL != proc) { /* replace sentinel value with an actual ompi_proc_t */ group->grp_proc_pointers[peer_id] = proc; diff --git a/ompi/include/mpif-sentinels.h b/ompi/include/mpif-sentinels.h index 59de52e941c..082154cdbbc 100644 --- a/ompi/include/mpif-sentinels.h +++ b/ompi/include/mpif-sentinels.h @@ -26,8 +26,7 @@ ! ! - the "mpi" module bindings ! - the "mpi_f08" module bindings -! - ompi/mpi/fortran/base/constants.h -! - ompi/mpi/runtime/ompi_init.c +! - ompi/mpi/fortran/base/gen-mpi-mangling.pl ! ! MPI_BOTTOM is only used where choice buffers can be used (meaning @@ -41,7 +40,7 @@ ! don't need another interface for MPI_COMM_SPAWN. character MPI_ARGV_NULL(1) ! Ditto for MPI_ARGVS_NULL / MPI_COMM_SPAWN_MULTIPLE. - character MPI_ARGVS_NULL(1) + character MPI_ARGVS_NULL(1, 1) ! MPI_ERRCODES_IGNORE has similar rationale to MPI_ARGV_NULL. The ! F77 functions are all smart enough to check that the errcodes ! parameter is not ERRCODES_IGNORE before assigning values into it @@ -53,8 +52,10 @@ integer MPI_STATUS_IGNORE(MPI_STATUS_SIZE) ! Ditto for MPI_STATUSES_IGNORE integer MPI_STATUSES_IGNORE(MPI_STATUS_SIZE, 1) - integer MPI_UNWEIGHTED - integer MPI_WEIGHTS_EMPTY +! Ditto for MPI_UNWEIGHTED + integer MPI_UNWEIGHTED(1) +! Ditto for MPI_WEIGHTS_EMPTY + integer MPI_WEIGHTS_EMPTY(1) common/mpi_fortran_bottom/MPI_BOTTOM common/mpi_fortran_in_place/MPI_IN_PLACE diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index 6ba22e6016e..ef59948137c 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -10,6 +10,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,6 +101,10 @@ static inline int memchecker_call (int (*f)(void *, size_t), const void * addr, return OMPI_SUCCESS; } + if ((0 == count) || (0 == datatype->super.size)) { + return OMPI_SUCCESS; + } + if( datatype->super.size == (size_t) (datatype->super.true_ub - datatype->super.true_lb) ) { /* We have a contiguous type. */ f( (void*)addr , datatype->super.size * count ); diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c index 1a65b5cb015..86a2811b003 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c @@ -99,6 +99,7 @@ int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block, * associated with this bank have been freed. */ assert(generation == buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter); + (void)generation; // silence compiler warning /* * increment counter of completed buffers diff --git a/ompi/mca/bcol/iboffload/configure.m4 b/ompi/mca/bcol/iboffload/configure.m4 index cd8dad8f536..510e0117e05 100644 --- a/ompi/mca/bcol/iboffload/configure.m4 +++ b/ompi/mca/bcol/iboffload/configure.m4 @@ -2,6 +2,8 @@ # # Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. # Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,7 +28,7 @@ AC_DEFUN([MCA_ompi_bcol_iboffload_CONFIG],[ OPAL_CHECK_OPENFABRICS([bcol_iboffload], [bcol_ofa_happy="yes"]) OPAL_CHECK_MLNX_OPENFABRICS([bcol_iboffload], [bcol_mlnx_ofed_happy="yes"]) - AS_IF([test "$bcol_ofa_happy" = "yes" -a "$bcol_mlnx_ofed_happy" = "yes"], + AS_IF([test "$bcol_ofa_happy" = "yes" && test "$bcol_mlnx_ofed_happy" = "yes"], [$1], [$2]) diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c index 200e109f71e..14a4f769585 100644 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c @@ -255,6 +255,9 @@ int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args, buffer_size = ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX; assert(buffer_size >= count * dtype->super.size * ptpcoll_module->k_nomial_radix); + (void)buffer_size; // silence compiler warning + (void)dtype; + (void)count; return bcol_ptpcoll_allreduce_narraying_progress (input_args, const_args); } diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c index 96bc7b9ad4f..18fefe52549 100644 --- a/ompi/mca/bml/r2/bml_r2.c +++ b/ompi/mca/bml/r2/bml_r2.c @@ -222,7 +222,7 @@ static int mca_bml_r2_endpoint_add_btl (struct ompi_proc_t *proc, mca_bml_base_e size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send); bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, size - 1); - if (!bml_btl || bml_btl->btl->btl_exclusivity < btl->btl_exclusivity) { + if (!bml_btl || bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity) { /* this btl has higher exclusivity than an existing btl or none exists */ opal_output_verbose(1, opal_btl_base_framework.framework_output, diff --git a/ompi/mca/coll/base/README.memory_management b/ompi/mca/coll/base/README.memory_management new file mode 100644 index 00000000000..1e34f577c1e --- /dev/null +++ b/ompi/mca/coll/base/README.memory_management @@ -0,0 +1,124 @@ + /* This comment applies to all collectives (including the basic + * module) where we allocate a temporary buffer. For the next few + * lines of code, it's tremendously complicated how we decided that + * this was the Right Thing to do. Sit back and enjoy. And prepare + * to have your mind warped. :-) + * + * Recall some definitions (I always get these backwards, so I'm + * going to put them here): + * + * extent: the length from the lower bound to the upper bound -- may + * be considerably larger than the buffer required to hold the data + * (or smaller! But it's easiest to think about when it's larger). + * + * true extent: the exact number of bytes required to hold the data + * in the layout pattern in the datatype. + * + * For example, consider the following buffer (just talking about + * true_lb, extent, and true extent -- extrapolate for true_ub: + * + * A B C + * -------------------------------------------------------- + * | | | + * -------------------------------------------------------- + * + * There are multiple cases: + * + * 1. A is what we give to MPI_Send (and friends), and A is where + * the data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-A + * - true_lb: 0 + * + * A C + * -------------------------------------------------------- + * | | + * -------------------------------------------------------- + * <=======================extent=========================> + * <======================true extent=====================> + * + * 2. A is what we give to MPI_Send (and friends), B is where the + * data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-B + * - true_lb: positive + * + * A B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <===============true extent=============> + * + * 3. B is what we give to MPI_Send (and friends), A is where the + * data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-A + * - true_lb: negative + * + * A B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <======================true extent=====================> + * + * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is + * where the data starts, and C is where the data ends. In this + * case: + * + * - extent: C-MPI_BOTTOM + * - true extent: C-B + * - true_lb: [potentially very large] positive + * + * MPI_BOTTOM B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <===============true extent=============> + * + * So in all cases, for a temporary buffer, all we need to malloc() + * is a buffer of size true_extent. We therefore need to know two + * pointer values: what value to give to MPI_Send (and friends) and + * what value to give to free(), because they might not be the same. + * + * Clearly, what we give to free() is exactly what was returned from + * malloc(). That part is easy. :-) + * + * What we give to MPI_Send (and friends) is a bit more complicated. + * Let's take the 4 cases from above: + * + * 1. If A is what we give to MPI_Send and A is where the data + * starts, then clearly we give to MPI_Send what we got back from + * malloc(). + * + * 2. If B is what we get back from malloc, but we give A to + * MPI_Send, then the buffer range [A,B) represents "dead space" + * -- no data will be put there. So it's safe to give B-true_lb to + * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is + * actually A. + * + * 3. If A is what we get back from malloc, and B is what we give to + * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal + * B. + * + * 4. Although this seems like the weirdest case, it's actually + * quite similar to case #2 -- the pointer we give to MPI_Send is + * smaller than the pointer we got back from malloc(). + * + * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send. + * + * This works fine and dandy if we only have (count==1), which we + * rarely do. ;-) So we really need to allocate (true_extent + + * ((count - 1) * extent)) to get enough space for the rest. This may + * be more than is necessary, but it's ok. + * + * Simple, no? :-) + * + */ + + diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c index 14ecc39067f..d789edd89a3 100644 --- a/ompi/mca/coll/base/coll_base_allgather.c +++ b/ompi/mca/coll/base/coll_base_allgather.c @@ -167,19 +167,16 @@ int ompi_coll_base_allgather_intra_bruck(const void *sbuf, int scount, - copy blocks from shift buffer starting at block [rank] in rbuf. */ if (0 != rank) { - ptrdiff_t true_extent, true_lb; char *free_buf = NULL, *shift_buf = NULL; + ptrdiff_t span, gap; - err = ompi_datatype_get_true_extent(rdtype, &true_lb, &true_extent); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + span = opal_datatype_span(&rdtype->super, (int64_t)(size - rank) * rcount, &gap); - free_buf = (char*) calloc(((true_extent + - ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)), - sizeof(char)); + free_buf = (char*)calloc(span, sizeof(char)); if (NULL == free_buf) { line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; } - shift_buf = free_buf - true_lb; + shift_buf = free_buf - gap; /* 1. copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer */ err = ompi_datatype_copy_content_same_ddt(rdtype, ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount), @@ -206,6 +203,7 @@ int ompi_coll_base_allgather_intra_bruck(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -342,6 +340,7 @@ ompi_coll_base_allgather_intra_recursivedoubling(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -429,6 +428,7 @@ int ompi_coll_base_allgather_intra_ring(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -602,6 +602,7 @@ ompi_coll_base_allgather_intra_neighborexchange(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -661,6 +662,7 @@ int ompi_coll_base_allgather_intra_two_procs(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c index fc1df369d74..9b5cff2cbe0 100644 --- a/ompi/mca/coll/base/coll_base_allgatherv.c +++ b/ompi/mca/coll/base/coll_base_allgatherv.c @@ -211,6 +211,7 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -297,6 +298,7 @@ int ompi_coll_base_allgatherv_intra_ring(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -502,6 +504,7 @@ ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -563,6 +566,7 @@ int ompi_coll_base_allgatherv_intra_two_procs(const void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } diff --git a/ompi/mca/coll/base/coll_base_allreduce.c b/ompi/mca/coll/base/coll_base_allreduce.c index 277ea6abc55..c21676af1e4 100644 --- a/ompi/mca/coll/base/coll_base_allreduce.c +++ b/ompi/mca/coll/base/coll_base_allreduce.c @@ -135,8 +135,8 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, int ret, line, rank, size, adjsize, remote, distance; int newrank, newremote, extra_ranks; char *tmpsend = NULL, *tmprecv = NULL, *tmpswap = NULL, *inplacebuf = NULL; - ptrdiff_t true_lb, true_extent, lb, extent; ompi_request_t *reqs[2] = {NULL, NULL}; + OPAL_PTRDIFF_TYPE span, gap; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -154,12 +154,8 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, } /* Allocate and initialize temporary send buffer */ - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - - inplacebuf = (char*) malloc(true_extent + (ptrdiff_t)(count - 1) * extent); + span = opal_datatype_span(&dtype->super, count, &gap); + inplacebuf = (char*) malloc(span); if (NULL == inplacebuf) { ret = -1; line = __LINE__; goto error_hndl; } if (MPI_IN_PLACE == sbuf) { @@ -273,6 +269,7 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, error_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret)); + (void)line; // silence compiler warning if (NULL != inplacebuf) free(inplacebuf); return ret; } @@ -532,6 +529,7 @@ ompi_coll_base_allreduce_intra_ring(const void *sbuf, void *rbuf, int count, error_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret)); + (void)line; // silence compiler warning if (NULL != inbuf[0]) free(inbuf[0]); if (NULL != inbuf[1]) free(inbuf[1]); return ret; @@ -629,9 +627,9 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int int segcount, max_segcount, num_phases, phase, block_count, inbi; size_t typelng; char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL}; - ptrdiff_t true_lb, true_extent, lb, extent; ptrdiff_t block_offset, max_real_segsize; ompi_request_t *reqs[2] = {NULL, NULL}; + OPAL_PTRDIFF_TYPE lb, extent, gap; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -649,10 +647,6 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int } /* Determine segment count based on the suggested segment size */ - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } ret = ompi_datatype_type_size( dtype, &typelng); if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } segcount = count; @@ -685,7 +679,10 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int early_blockcount, late_blockcount ); COLL_BASE_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi, max_segcount, k); - max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent; + + ret = ompi_datatype_get_extent(dtype, &lb, &extent); + if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } + max_real_segsize = opal_datatype_span(&dtype->super, max_segcount, &gap); /* Allocate and initialize temporary buffers */ inbuf[0] = (char*)malloc(max_real_segsize); @@ -740,8 +737,8 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int block_count = ((rank < split_rank)? early_blockcount : late_blockcount); COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase, early_phase_segcount, late_phase_segcount) - phase_count = ((phase < split_phase)? - (early_phase_segcount) : (late_phase_segcount)); + phase_count = ((phase < split_phase)? + (early_phase_segcount) : (late_phase_segcount)); phase_offset = ((phase < split_phase)? ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase)); @@ -851,6 +848,7 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int error_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret)); + (void)line; // silence compiler warning if (NULL != inbuf[0]) free(inbuf[0]); if (NULL != inbuf[1]) free(inbuf[1]); return ret; diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 5b1f13160aa..c9a34b5fd04 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -43,10 +43,10 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, { mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; int i, j, size, rank, err = MPI_SUCCESS, line; + OPAL_PTRDIFF_TYPE ext, gap; MPI_Request *preq; char *tmp_buffer; size_t max_size; - ptrdiff_t ext, true_lb, true_ext; /* Initialize. */ @@ -60,14 +60,14 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); - ompi_datatype_get_true_extent ( rdtype, &true_lb, &true_ext); - max_size = true_ext + ext * (rcount-1); + max_size = opal_datatype_span(&rdtype->super, rcount, &gap); /* Allocate a temporary buffer */ tmp_buffer = calloc (max_size, 1); if (NULL == tmp_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } + tmp_buffer -= gap; max_size = ext * rcount; /* in-place alltoall slow algorithm (but works) */ @@ -124,6 +124,7 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning ompi_coll_base_free_reqs(base_module->base_data->mcct_reqs, 2); } @@ -185,6 +186,7 @@ int ompi_coll_base_alltoall_intra_pairwise(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -199,7 +201,7 @@ int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, int i, k, line = -1, rank, size, err = 0; int sendto, recvfrom, distance, *displs = NULL, *blen = NULL; char *tmpbuf = NULL, *tmpbuf_free = NULL; - ptrdiff_t rlb, slb, tlb, sext, rext, tsext; + OPAL_PTRDIFF_TYPE sext, rext, span, gap; struct ompi_datatype_t *new_ddt; if (MPI_IN_PLACE == sbuf) { @@ -213,15 +215,13 @@ int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:alltoall_intra_bruck rank %d", rank)); - err = ompi_datatype_get_extent (sdtype, &slb, &sext); + err = ompi_datatype_type_extent (sdtype, &sext); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_true_extent(sdtype, &tlb, &tsext); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - err = ompi_datatype_get_extent (rdtype, &rlb, &rext); + err = ompi_datatype_type_extent (rdtype, &rext); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + span = opal_datatype_span(&sdtype->super, (int64_t)size * scount, &gap); displs = (int *) malloc(size * sizeof(int)); if (displs == NULL) { line = __LINE__; err = -1; goto err_hndl; } @@ -229,9 +229,9 @@ int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, if (blen == NULL) { line = __LINE__; err = -1; goto err_hndl; } /* tmp buffer allocation for message data */ - tmpbuf_free = (char *) malloc(tsext + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sext); + tmpbuf_free = (char *)malloc(span); if (tmpbuf_free == NULL) { line = __LINE__; err = -1; goto err_hndl; } - tmpbuf = tmpbuf_free - slb; + tmpbuf = tmpbuf_free - gap; /* Step 1 - local rotation - shift up by rank */ err = ompi_datatype_copy_content_same_ddt (sdtype, @@ -309,6 +309,7 @@ int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning if (tmpbuf != NULL) free(tmpbuf_free); if (displs != NULL) free(displs); if (blen != NULL) free(blen); @@ -470,6 +471,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error, rank)); + (void)line; // silence compiler warning ompi_coll_base_free_reqs(reqs, 2 * total_reqs); return error; } @@ -530,6 +532,7 @@ int ompi_coll_base_alltoall_intra_two_procs(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -652,6 +655,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount, if( MPI_SUCCESS != err ) { OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank) ); + (void)line; // silence compiler warning } /* Free the reqs in all cases as they are persistent requests */ ompi_coll_base_free_reqs(req, nreqs); diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 7b395a551de..09da2593e09 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -38,16 +38,16 @@ int mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts, const int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; int i, j, size, rank, err=MPI_SUCCESS; MPI_Request *preq; char *tmp_buffer; size_t max_size, rdtype_size; - ptrdiff_t ext; + OPAL_PTRDIFF_TYPE ext, gap = 0; /* Initialize. */ @@ -63,16 +63,17 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[i]; - + size_t size = opal_datatype_span(&rdtype->super, rcounts[i], &gap); max_size = size > max_size ? size : max_size; } + /* The gap will always be the same as we are working on the same datatype */ /* Allocate a temporary buffer */ tmp_buffer = calloc (max_size, 1); if (NULL == tmp_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } + tmp_buffer += gap; /* in-place alltoallv slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { @@ -183,6 +184,7 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d at step %d", __FILE__, line, err, rank, step)); + (void)line; // silence compiler warning return err; } diff --git a/ompi/mca/coll/base/coll_base_barrier.c b/ompi/mca/coll/base/coll_base_barrier.c index 7aafd8d6a6f..3ce5102c099 100644 --- a/ompi/mca/coll/base/coll_base_barrier.c +++ b/ompi/mca/coll/base/coll_base_barrier.c @@ -86,6 +86,7 @@ ompi_coll_base_sendrecv_zero(int dest, int stag, */ OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n", __FILE__, line, err)); + (void)line; // silence compiler warning return err; } @@ -169,6 +170,7 @@ int ompi_coll_base_barrier_intra_doublering(struct ompi_communicator_t *comm, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -247,6 +249,7 @@ int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *c err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -282,6 +285,7 @@ int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm, err_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -369,6 +373,7 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm, err_hndl: OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank) ); + (void)line; // silence compiler warning if( NULL != requests ) ompi_coll_base_free_reqs(requests, size-1); return err; diff --git a/ompi/mca/coll/base/coll_base_bcast.c b/ompi/mca/coll/base/coll_base_bcast.c index 258832d1287..bb051861cab 100644 --- a/ompi/mca/coll/base/coll_base_bcast.c +++ b/ompi/mca/coll/base/coll_base_bcast.c @@ -41,7 +41,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer, uint32_t count_by_segment, ompi_coll_tree_t* tree ) { - int err = 0, line, i, rank, size, segindex, req_index; + int err = 0, line, i, rank, segindex, req_index; int num_segments; /* Number of segments */ int sendcount; /* number of elements sent in this segment */ size_t realsegsize, type_size; @@ -52,9 +52,12 @@ ompi_coll_base_bcast_intra_generic( void* buffer, ompi_request_t **send_reqs = NULL; #endif +#if OPAL_ENABLE_DEBUG + int size; size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); assert( size > 1 ); +#endif + rank = ompi_comm_rank(comm); ompi_datatype_get_extent (datatype, &lb, &extent); ompi_datatype_type_size( datatype, &type_size ); @@ -240,6 +243,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer, error_hndl: OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank) ); + (void)line; // silence compiler warnings if( MPI_SUCCESS != err ) { ompi_coll_base_free_reqs( recv_reqs, 2); if( NULL != send_reqs ) { @@ -603,6 +607,7 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, error_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); + (void)line; // silence compiler warning return (err); } diff --git a/ompi/mca/coll/base/coll_base_gather.c b/ompi/mca/coll/base/coll_base_gather.c index bd2004f80f3..794b82bd6a9 100644 --- a/ompi/mca/coll/base/coll_base_gather.c +++ b/ompi/mca/coll/base/coll_base_gather.c @@ -49,8 +49,8 @@ ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, char *ptmp = NULL, *tempbuf = NULL; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, slb, strue_lb, strue_extent; - MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; + MPI_Aint sextent, sgap, ssize; + MPI_Aint rextent, rgap, rsize; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; @@ -64,14 +64,14 @@ ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root ); bmtree = data->cached_in_order_bmtree; - ompi_datatype_get_extent(sdtype, &slb, &sextent); - ompi_datatype_get_true_extent(sdtype, &strue_lb, &strue_extent); + ompi_datatype_type_extent(sdtype, &sextent); + ompi_datatype_type_extent(rdtype, &rextent); + ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap); + rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap); vrank = (rank - root + size) % size; if (rank == root) { - ompi_datatype_get_extent(rdtype, &rlb, &rextent); - ompi_datatype_get_true_extent(rdtype, &rtrue_lb, &rtrue_extent); if (0 == root){ /* root on 0, just use the recv buffer */ ptmp = (char *) rbuf; @@ -83,12 +83,12 @@ ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, } else { /* root is not on 0, allocate temp buffer for recv, * rotate data at the end */ - tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent); + tempbuf = (char *) malloc(rsize); if (NULL == tempbuf) { err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - rtrue_lb; + ptmp = tempbuf - rgap; if (sbuf != MPI_IN_PLACE) { /* copy from sbuf to temp buffer */ err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, @@ -106,12 +106,12 @@ ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, /* other non-leaf nodes, allocate temp buffer for data received from * children, the most we need is half of the total data elements due * to the property of binimoal tree */ - tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent); + tempbuf = (char *) malloc(ssize); if (NULL == tempbuf) { err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - strue_lb; + ptmp = tempbuf - sgap; /* local copy to tempbuf */ err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, ptmp, scount, sdtype); @@ -193,6 +193,7 @@ ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -330,6 +331,7 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount, OPAL_OUTPUT (( ompi_coll_base_framework.framework_output, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret )); + (void)line; // silence compiler warning return ret; } diff --git a/ompi/mca/coll/base/coll_base_reduce.c b/ompi/mca/coll/base/coll_base_reduce.c index b6ea005f3f8..f5b2449727e 100644 --- a/ompi/mca/coll/base/coll_base_reduce.c +++ b/ompi/mca/coll/base/coll_base_reduce.c @@ -55,19 +55,17 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi char *inbuf[2] = {NULL, NULL}, *inbuf_free[2] = {NULL, NULL}; char *accumbuf = NULL, *accumbuf_free = NULL; char *local_op_buffer = NULL, *sendtmpbuf = NULL; - ptrdiff_t extent, lower_bound, segment_increment; + ptrdiff_t extent, size, gap, segment_increment; ompi_request_t **sreq = NULL, *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; int num_segments, line, ret, segindex, i, rank; int recvcount, prevcount, inbi; - size_t typelng; /** * Determine number of segments and number of elements * sent per operation */ - ompi_datatype_get_extent( datatype, &lower_bound, &extent ); - ompi_datatype_type_size( datatype, &typelng ); - num_segments = (original_count + count_by_segment - 1) / count_by_segment; + ompi_datatype_type_extent( datatype, &extent ); + num_segments = (int)(((size_t)original_count + (size_t)count_by_segment - (size_t)1) / (size_t)count_by_segment); segment_increment = (ptrdiff_t)count_by_segment * extent; sendtmpbuf = (char*) sendbuf; @@ -84,21 +82,19 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi /* non-leaf nodes - wait for children to send me data & forward up (if needed) */ if( tree->tree_nextsize > 0 ) { - ptrdiff_t true_lower_bound, true_extent, real_segment_size; - ompi_datatype_get_true_extent( datatype, &true_lower_bound, - &true_extent ); + ptrdiff_t real_segment_size; /* handle non existant recv buffer (i.e. its NULL) and protect the recv buffer on non-root nodes */ accumbuf = (char*)recvbuf; if( (NULL == accumbuf) || (root != rank) ) { /* Allocate temporary accumulator buffer. */ - accumbuf_free = (char*)malloc(true_extent + - (ptrdiff_t)(original_count - 1) * extent); + size = opal_datatype_span(&datatype->super, original_count, &gap); + accumbuf_free = (char*)malloc(size); if (accumbuf_free == NULL) { line = __LINE__; ret = -1; goto error_hndl; } - accumbuf = accumbuf_free - lower_bound; + accumbuf = accumbuf_free - gap; } /* If this is a non-commutative operation we must copy @@ -109,12 +105,12 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi (char*)sendtmpbuf); } /* Allocate two buffers for incoming segments */ - real_segment_size = true_extent + (ptrdiff_t)(count_by_segment - 1) * extent; + real_segment_size = opal_datatype_span(&datatype->super, count_by_segment, &gap); inbuf_free[0] = (char*) malloc(real_segment_size); if( inbuf_free[0] == NULL ) { line = __LINE__; ret = -1; goto error_hndl; } - inbuf[0] = inbuf_free[0] - lower_bound; + inbuf[0] = inbuf_free[0] - gap; /* if there is chance to overlap communication - allocate second buffer */ if( (num_segments > 1) || (tree->tree_nextsize > 1) ) { @@ -122,7 +118,7 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi if( inbuf_free[1] == NULL ) { line = __LINE__; ret = -1; goto error_hndl; } - inbuf[1] = inbuf_free[1] - lower_bound; + inbuf[1] = inbuf_free[1] - gap; } /* reset input buffer index and receive count */ @@ -330,6 +326,7 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi OPAL_OUTPUT (( ompi_coll_base_framework.framework_output, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret )); + (void)line; // silence compiler warning if( inbuf_free[0] != NULL ) free(inbuf_free[0]); if( inbuf_free[1] != NULL ) free(inbuf_free[1]); if( accumbuf_free != NULL ) free(accumbuf); @@ -517,14 +514,13 @@ int ompi_coll_base_reduce_intra_in_order_binary( const void *sendbuf, void *recv use_this_sendbuf = (void *)sendbuf; use_this_recvbuf = recvbuf; if (io_root != root) { - ptrdiff_t tlb, text, lb, ext; + ptrdiff_t dsize, gap; char *tmpbuf = NULL; - ompi_datatype_get_extent(datatype, &lb, &ext); - ompi_datatype_get_true_extent(datatype, &tlb, &text); + dsize = opal_datatype_span(&datatype->super, count, &gap); if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { - tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext); + tmpbuf = (char *) malloc(dsize); if (NULL == tmpbuf) { return MPI_ERR_INTERN; } @@ -533,7 +529,7 @@ int ompi_coll_base_reduce_intra_in_order_binary( const void *sendbuf, void *recv (char*)recvbuf); use_this_sendbuf = tmpbuf; } else if (io_root == rank) { - tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext); + tmpbuf = (char *) malloc(dsize); if (NULL == tmpbuf) { return MPI_ERR_INTERN; } @@ -585,8 +581,6 @@ int ompi_coll_base_reduce_intra_in_order_binary( const void *sendbuf, void *recv * GEF Oct05 after asking Jeff. */ -/* copied function (with appropriate renaming) starts here */ - /* * reduce_lin_intra * @@ -603,7 +597,7 @@ ompi_coll_base_reduce_intra_basic_linear(const void *sbuf, void *rbuf, int count mca_coll_base_module_t *module) { int i, rank, err, size; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t extent, dsize, gap; char *free_buffer = NULL; char *pml_buffer = NULL; char *inplace_temp = NULL; @@ -623,151 +617,27 @@ ompi_coll_base_reduce_intra_basic_linear(const void *sbuf, void *rbuf, int count return err; } - /* Root receives and reduces messages. Allocate buffer to receive - * messages. This comment applies to all collectives in this basic - * module where we allocate a temporary buffer. For the next few - * lines of code, it's tremendously complicated how we decided that - * this was the Right Thing to do. Sit back and enjoy. And prepare - * to have your mind warped. :-) - * - * Recall some definitions (I always get these backwards, so I'm - * going to put them here): - * - * extent: the length from the lower bound to the upper bound -- may - * be considerably larger than the buffer required to hold the data - * (or smaller! But it's easiest to think about when it's larger). - * - * true extent: the exact number of bytes required to hold the data - * in the layout pattern in the datatype. - * - * For example, consider the following buffer (just talking about - * true_lb, extent, and true extent -- extrapolate for true_ub: - * - * A B C - * -------------------------------------------------------- - * | | | - * -------------------------------------------------------- - * - * There are multiple cases: - * - * 1. A is what we give to MPI_Send (and friends), and A is where - * the data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-A - * - true_lb: 0 - * - * A C - * -------------------------------------------------------- - * | | - * -------------------------------------------------------- - * <=======================extent=========================> - * <======================true extent=====================> - * - * 2. A is what we give to MPI_Send (and friends), B is where the - * data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-B - * - true_lb: positive - * - * A B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <===============true extent=============> - * - * 3. B is what we give to MPI_Send (and friends), A is where the - * data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-A - * - true_lb: negative - * - * A B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <======================true extent=====================> - * - * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is - * where the data starts, and C is where the data ends. In this - * case: - * - * - extent: C-MPI_BOTTOM - * - true extent: C-B - * - true_lb: [potentially very large] positive - * - * MPI_BOTTOM B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <===============true extent=============> - * - * So in all cases, for a temporary buffer, all we need to malloc() - * is a buffer of size true_extent. We therefore need to know two - * pointer values: what value to give to MPI_Send (and friends) and - * what value to give to free(), because they might not be the same. - * - * Clearly, what we give to free() is exactly what was returned from - * malloc(). That part is easy. :-) - * - * What we give to MPI_Send (and friends) is a bit more complicated. - * Let's take the 4 cases from above: - * - * 1. If A is what we give to MPI_Send and A is where the data - * starts, then clearly we give to MPI_Send what we got back from - * malloc(). - * - * 2. If B is what we get back from malloc, but we give A to - * MPI_Send, then the buffer range [A,B) represents "dead space" - * -- no data will be put there. So it's safe to give B-true_lb to - * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is - * actually A. - * - * 3. If A is what we get back from malloc, and B is what we give to - * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal - * B. - * - * 4. Although this seems like the weirdest case, it's actually - * quite similar to case #2 -- the pointer we give to MPI_Send is - * smaller than the pointer we got back from malloc(). - * - * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send. - * - * This works fine and dandy if we only have (count==1), which we - * rarely do. ;-) So we really need to allocate (true_extent + - * ((count - 1) * extent)) to get enough space for the rest. This may - * be more than is necessary, but it's ok. - * - * Simple, no? :-) - * - */ - - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + ompi_datatype_type_extent(dtype, &extent); if (MPI_IN_PLACE == sbuf) { sbuf = rbuf; - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); + inplace_temp = (char*)malloc(dsize); if (NULL == inplace_temp) { return OMPI_ERR_OUT_OF_RESOURCE; } - rbuf = inplace_temp - true_lb; + rbuf = inplace_temp - gap; } if (size > 1) { - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { if (NULL != inplace_temp) { free(inplace_temp); } return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; } /* Initialize the receive buffer. */ @@ -823,4 +693,3 @@ ompi_coll_base_reduce_intra_basic_linear(const void *sbuf, void *rbuf, int count return MPI_SUCCESS; } -/* copied function (with appropriate renaming) ends here */ diff --git a/ompi/mca/coll/base/coll_base_reduce_scatter.c b/ompi/mca/coll/base/coll_base_reduce_scatter.c index a9e674ca9cd..9bbd11455c4 100644 --- a/ompi/mca/coll/base/coll_base_reduce_scatter.c +++ b/ompi/mca/coll/base/coll_base_reduce_scatter.c @@ -76,13 +76,11 @@ int ompi_coll_base_reduce_scatter_intra_nonoverlapping(const void *sbuf, void *r if (root == rank) { /* We must allocate temporary receive buffer on root to ensure that rbuf is big enough */ - ptrdiff_t lb, extent, tlb, textent; + ptrdiff_t dsize, gap; + dsize = opal_datatype_span(&dtype->super, total_count, &gap); - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &tlb, &textent); - - tmprbuf_free = (char*) malloc(textent + (ptrdiff_t)(total_count - 1) * extent); - tmprbuf = tmprbuf_free - lb; + tmprbuf_free = (char*) malloc(dsize); + tmprbuf = tmprbuf_free - gap; } err = comm->c_coll.coll_reduce (sbuf, tmprbuf, total_count, dtype, op, root, comm, comm->c_coll.coll_reduce_module); @@ -134,7 +132,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving( const void *sbuf, { int i, rank, size, count, err = OMPI_SUCCESS; int tmp_size, remain = 0, tmp_rank, *disps = NULL; - ptrdiff_t true_lb, true_extent, lb, extent, buf_size; + ptrdiff_t extent, buf_size, gap; char *recv_buf = NULL, *recv_buf_free = NULL; char *result_buf = NULL, *result_buf_free = NULL; @@ -161,9 +159,8 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving( const void *sbuf, } /* get datatype information */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - buf_size = true_extent + (ptrdiff_t)(count - 1) * extent; + ompi_datatype_type_extent(dtype, &extent); + buf_size = opal_datatype_span(&dtype->super, count, &gap); /* Handle MPI_IN_PLACE */ if (MPI_IN_PLACE == sbuf) { @@ -172,7 +169,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving( const void *sbuf, /* Allocate temporary receive buffer. */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; @@ -180,7 +177,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving( const void *sbuf, /* allocate temporary buffer for results */ result_buf_free = (char*) malloc(buf_size); - result_buf = result_buf_free - true_lb; + result_buf = result_buf_free - gap; /* copy local buffer into the temporary results */ err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype); @@ -459,9 +456,8 @@ ompi_coll_base_reduce_scatter_intra_ring( const void *sbuf, void *rbuf, const in int inbi, *displs = NULL; char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL; char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL}; - ptrdiff_t true_lb, true_extent, lb, extent, max_real_segsize; + ptrdiff_t extent, max_real_segsize, dsize, gap; ompi_request_t *reqs[2] = {NULL, NULL}; - size_t typelng; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -500,26 +496,23 @@ ompi_coll_base_reduce_scatter_intra_ring( const void *sbuf, void *rbuf, const in rbuf can be of rcounts[rank] size. - up to two temporary buffers used for communication/computation overlap. */ - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_type_size( dtype, &typelng); + ret = ompi_datatype_type_extent(dtype, &extent); if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent; + max_real_segsize = opal_datatype_span(&dtype->super, max_block_count, &gap); + dsize = opal_datatype_span(&dtype->super, total_count, &gap); - accumbuf_free = (char*)malloc(true_extent + (ptrdiff_t)(total_count - 1) * extent); + accumbuf_free = (char*)malloc(dsize); if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; } - accumbuf = accumbuf_free - lb; + accumbuf = accumbuf_free - gap; inbuf_free[0] = (char*)malloc(max_real_segsize); if (NULL == inbuf_free[0]) { ret = -1; line = __LINE__; goto error_hndl; } - inbuf[0] = inbuf_free[0] - lb; + inbuf[0] = inbuf_free[0] - gap; if (size > 2) { inbuf_free[1] = (char*)malloc(max_real_segsize); if (NULL == inbuf_free[1]) { ret = -1; line = __LINE__; goto error_hndl; } - inbuf[1] = inbuf_free[1] - lb; + inbuf[1] = inbuf_free[1] - gap; } /* Handle MPI_IN_PLACE for size > 1 */ @@ -614,6 +607,7 @@ ompi_coll_base_reduce_scatter_intra_ring( const void *sbuf, void *rbuf, const in error_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret)); + (void)line; // silence compiler warning if (NULL != displs) free(displs); if (NULL != accumbuf_free) free(accumbuf_free); if (NULL != inbuf_free[0]) free(inbuf_free[0]); diff --git a/ompi/mca/coll/base/coll_base_scatter.c b/ompi/mca/coll/base/coll_base_scatter.c index b8b69aef9bf..44b09d28b9a 100644 --- a/ompi/mca/coll/base/coll_base_scatter.c +++ b/ompi/mca/coll/base/coll_base_scatter.c @@ -47,10 +47,10 @@ ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, char *ptmp, *tempbuf = NULL; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, slb, strue_lb, strue_extent; - MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; + ptrdiff_t sextent, rextent, ssize, rsize, sgap, rgap; + size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -62,10 +62,11 @@ ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root ); bmtree = data->cached_in_order_bmtree; - ompi_datatype_get_extent(sdtype, &slb, &sextent); - ompi_datatype_get_true_extent(sdtype, &strue_lb, &strue_extent); - ompi_datatype_get_extent(rdtype, &rlb, &rextent); - ompi_datatype_get_true_extent(rdtype, &rtrue_lb, &rtrue_extent); + ompi_datatype_type_extent(sdtype, &sextent); + ompi_datatype_type_extent(rdtype, &rextent); + + ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap); + rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap); vrank = (rank - root + size) % size; ptmp = (char *) rbuf; /* by default suppose leaf nodes, just use rbuf */ @@ -82,12 +83,11 @@ ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, } } else { /* root is not on 0, allocate temp buffer for send */ - tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent); + tempbuf = (char *) malloc(ssize); if (NULL == tempbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - - ptmp = tempbuf - strue_lb; + ptmp = tempbuf - sgap; /* and rotate data so they will eventually in the right place */ err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root), @@ -110,12 +110,11 @@ ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, } else if (!(vrank % 2)) { /* non-root, non-leaf nodes, allocte temp buffer for recv * the most we need is rcount*size/2 */ - tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent); + tempbuf = (char *) malloc(rsize); if (NULL == tempbuf) { err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - - ptmp = tempbuf - rtrue_lb; + ptmp = tempbuf - rgap; sdtype = rdtype; scount = rcount; @@ -171,6 +170,7 @@ ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning return err; } @@ -204,7 +204,7 @@ ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount, mca_coll_base_module_t *module) { int i, rank, size, err; - ptrdiff_t lb, incr; + ptrdiff_t incr; char *ptmp; /* Initialize */ @@ -223,7 +223,7 @@ ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount, /* I am the root, loop sending data. */ - err = ompi_datatype_get_extent(sdtype, &lb, &incr); + err = ompi_datatype_type_extent(sdtype, &incr); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index e0e6a29a5e9..a4e8c57cb85 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -100,6 +100,7 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, */ OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n", __FILE__, line, err)); + (void)line; // silence compiler warning if (MPI_STATUS_IGNORE != status) { status->MPI_ERROR = err; } diff --git a/ompi/mca/coll/basic/coll_basic_allgather.c b/ompi/mca/coll/basic/coll_basic_allgather.c index 71177355706..75870d2cfc2 100644 --- a/ompi/mca/coll/basic/coll_basic_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_allgather.c @@ -155,6 +155,7 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount, if( MPI_SUCCESS != err ) { OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank) ); + (void)line; // silence compiler warning if( NULL != reqs ) ompi_coll_base_free_reqs(reqs, rsize+1); } if (NULL != tmpbuf) { diff --git a/ompi/mca/coll/basic/coll_basic_allreduce.c b/ompi/mca/coll/basic/coll_basic_allreduce.c index 68fe0ec4b98..e64094d02ba 100644 --- a/ompi/mca/coll/basic/coll_basic_allreduce.c +++ b/ompi/mca/coll/basic/coll_basic_allreduce.c @@ -81,8 +81,7 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int err, i, rank, root = 0, rsize, line; - ptrdiff_t lb, extent; - ptrdiff_t true_lb, true_extent; + ptrdiff_t extent, dsize, gap; char *tmpbuf = NULL, *pml_buffer = NULL; ompi_request_t *req[2]; ompi_request_t **reqs = NULL; @@ -100,18 +99,14 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, * simultaniously. */ /*****************************************************************/ if (rank == root) { - err = ompi_datatype_get_extent(dtype, &lb, &extent); + err = ompi_datatype_type_extent(dtype, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } - err = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - - tmpbuf = (char *) malloc(true_extent + (count - 1) * extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + tmpbuf = (char *) malloc(dsize); if (NULL == tmpbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; } - pml_buffer = tmpbuf - true_lb; + pml_buffer = tmpbuf - gap; reqs = coll_base_comm_get_reqs(module->base_data, rsize - 1); if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; } @@ -200,6 +195,7 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, if( MPI_SUCCESS != err ) { OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank)); + (void)line; // silence compiler warning ompi_coll_base_free_reqs(reqs, rsize - 1); } if (NULL != tmpbuf) { diff --git a/ompi/mca/coll/basic/coll_basic_alltoallw.c b/ompi/mca/coll/basic/coll_basic_alltoallw.c index 4b123151e35..cd6e4f7313d 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallw.c @@ -41,10 +41,10 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, j, size, rank, err=MPI_SUCCESS, max_size; + int i, j, size, rank, err = MPI_SUCCESS, max_size; MPI_Request *preq, *reqs = NULL; - char *tmp_buffer; - ptrdiff_t ext; + char *tmp_buffer, *save_buffer = NULL; + ptrdiff_t ext, gap; /* Initialize. */ @@ -58,17 +58,17 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con /* Find the largest receive amount */ for (i = 0, max_size = 0 ; i < size ; ++i) { - ompi_datatype_type_extent (rdtypes[i], &ext); - ext *= rcounts[i]; + ext = opal_datatype_span(&rdtypes[i]->super, rcounts[i], &gap); max_size = ext > max_size ? ext : max_size; } /* Allocate a temporary buffer */ - tmp_buffer = calloc (max_size, 1); + tmp_buffer = save_buffer = calloc (max_size, 1); if (NULL == tmp_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } + tmp_buffer -= gap; reqs = coll_base_comm_get_reqs( module->base_data, 2); /* in-place alltoallw slow algorithm (but works) */ @@ -126,7 +126,7 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con error_hndl: /* Free the temporary buffer */ - free (tmp_buffer); + free (save_buffer); if( MPI_SUCCESS != err ) { /* Free the requests. */ if( NULL != reqs ) { ompi_coll_base_free_reqs(reqs, 2); diff --git a/ompi/mca/coll/basic/coll_basic_exscan.c b/ompi/mca/coll/basic/coll_basic_exscan.c index 34ec43fadc1..057bcfa48c5 100644 --- a/ompi/mca/coll/basic/coll_basic_exscan.c +++ b/ompi/mca/coll/basic/coll_basic_exscan.c @@ -49,7 +49,7 @@ mca_coll_basic_exscan_intra(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int size, rank, err; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t dsize, gap; char *free_buffer = NULL; char *reduce_buffer = NULL; @@ -83,14 +83,13 @@ mca_coll_basic_exscan_intra(const void *sbuf, void *rbuf, int count, /* Get a temporary buffer to perform the reduction into. Rationale * for malloc'ing this size is provided in coll_basic_reduce.c. */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - reduce_buffer = free_buffer - true_lb; + reduce_buffer = free_buffer - gap; err = ompi_datatype_copy_content_same_ddt(dtype, count, reduce_buffer, (char*)sbuf); diff --git a/ompi/mca/coll/basic/coll_basic_reduce.c b/ompi/mca/coll/basic/coll_basic_reduce.c index 20650d0e2ea..ad2fd1e6f3e 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce.c +++ b/ompi/mca/coll/basic/coll_basic_reduce.c @@ -92,7 +92,7 @@ mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count, { int i, size, rank, vrank; int err, peer, dim, mask; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t lb, extent, dsize, gap; char *free_buffer = NULL; char *free_rbuf = NULL; char *pml_buffer = NULL; @@ -120,14 +120,14 @@ mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count, * rationale above. */ ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; /* read the comment about commutative operations (few lines down * the page) */ if (ompi_op_is_commute(op)) { @@ -138,12 +138,12 @@ mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count, * rationale above. */ if (MPI_IN_PLACE == sbuf) { - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); + inplace_temp = (char*)malloc(dsize); if (NULL == inplace_temp) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup_and_return; } - sbuf = inplace_temp - true_lb; + sbuf = inplace_temp - gap; err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, (char*)rbuf); } snd_buffer = (char*)sbuf; @@ -152,12 +152,12 @@ mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count, /* root is the only one required to provide a valid rbuf. * Assume rbuf is invalid for all other ranks, so fix it up * here to be valid on all non-leaf ranks */ - free_rbuf = (char*)malloc(true_extent + (count - 1) * extent); + free_rbuf = (char*)malloc(dsize); if (NULL == free_rbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup_and_return; } - rbuf = free_rbuf - true_lb; + rbuf = free_rbuf - gap; } /* Loop over cube dimensions. High processes send to low ones in the @@ -288,7 +288,7 @@ mca_coll_basic_reduce_lin_inter(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int i, err, size; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t dsize, gap; char *free_buffer = NULL; char *pml_buffer = NULL; @@ -305,14 +305,13 @@ mca_coll_basic_reduce_lin_inter(const void *sbuf, void *rbuf, int count, MCA_PML_BASE_SEND_STANDARD, comm)); } else { /* Root receives and reduces messages */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; /* Initialize the receive buffer. */ diff --git a/ompi/mca/coll/basic/coll_basic_reduce_scatter.c b/ompi/mca/coll/basic/coll_basic_reduce_scatter.c index 43be9ba6ea1..d8e9cc8a0db 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce_scatter.c +++ b/ompi/mca/coll/basic/coll_basic_reduce_scatter.c @@ -71,7 +71,7 @@ mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rco mca_coll_base_module_t *module) { int i, rank, size, count, err = OMPI_SUCCESS; - ptrdiff_t true_lb, true_extent, lb, extent, buf_size; + ptrdiff_t extent, buf_size, gap; int *disps = NULL; char *recv_buf = NULL, *recv_buf_free = NULL; char *result_buf = NULL, *result_buf_free = NULL; @@ -96,9 +96,8 @@ mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rco } /* get datatype information */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - buf_size = true_extent + (count - 1) * extent; + ompi_datatype_type_extent(dtype, &extent); + buf_size = opal_datatype_span(&dtype->super, count, &gap); /* Handle MPI_IN_PLACE */ if (MPI_IN_PLACE == sbuf) { @@ -111,7 +110,7 @@ mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rco /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; @@ -119,7 +118,7 @@ mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rco /* allocate temporary buffer for results */ result_buf_free = (char*) malloc(buf_size); - result_buf = result_buf_free - true_lb; + result_buf = result_buf_free - gap; /* copy local buffer into the temporary results */ err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype); @@ -323,7 +322,7 @@ mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rco /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; diff --git a/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c b/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c index 9d2b3a4d5d9..fca39e5d51a 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c +++ b/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c @@ -58,7 +58,7 @@ mca_coll_basic_reduce_scatter_block_intra(const void *sbuf, void *rbuf, int rcou mca_coll_base_module_t *module) { int rank, size, count, err = OMPI_SUCCESS; - ptrdiff_t true_lb, true_extent, lb, extent, buf_size; + ptrdiff_t extent, buf_size, gap; char *recv_buf = NULL, *recv_buf_free = NULL; /* Initialize */ @@ -72,9 +72,8 @@ mca_coll_basic_reduce_scatter_block_intra(const void *sbuf, void *rbuf, int rcou } /* get datatype information */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - buf_size = true_extent + (count - 1) * extent; + ompi_datatype_type_extent(dtype, &extent); + buf_size = opal_datatype_span(&dtype->super, count, &gap); /* Handle MPI_IN_PLACE */ if (MPI_IN_PLACE == sbuf) { @@ -85,7 +84,7 @@ mca_coll_basic_reduce_scatter_block_intra(const void *sbuf, void *rbuf, int rcou /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; diff --git a/ompi/mca/coll/basic/coll_basic_scan.c b/ompi/mca/coll/basic/coll_basic_scan.c index e74cc3fac89..2ee07d0fd24 100644 --- a/ompi/mca/coll/basic/coll_basic_scan.c +++ b/ompi/mca/coll/basic/coll_basic_scan.c @@ -47,7 +47,7 @@ mca_coll_basic_scan_intra(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int size, rank, err; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t dsize, gap; char *free_buffer = NULL; char *pml_buffer = NULL; @@ -74,14 +74,12 @@ mca_coll_basic_scan_intra(const void *sbuf, void *rbuf, int count, * listed in coll_basic_reduce.c. Use this temporary buffer to * receive into, later. */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + free_buffer = malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; /* Copy the send buffer into the receive buffer. */ diff --git a/ompi/mca/coll/cuda/coll_cuda_allreduce.c b/ompi/mca/coll/cuda/coll_cuda_allreduce.c index 05e2c3910ac..1606bcdf928 100644 --- a/ompi/mca/coll/cuda/coll_cuda_allreduce.c +++ b/ompi/mca/coll/cuda/coll_cuda_allreduce.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -34,15 +34,14 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -50,7 +49,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -61,7 +60,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_allreduce(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_allreduce_module); if (NULL != sbuf1) { diff --git a/ompi/mca/coll/cuda/coll_cuda_exscan.c b/ompi/mca/coll/cuda/coll_cuda_exscan.c index 1f93722d62e..bc336341ac0 100644 --- a/ompi/mca/coll/cuda/coll_cuda_exscan.c +++ b/ompi/mca/coll/cuda/coll_cuda_exscan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -26,15 +26,14 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -42,7 +41,7 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -53,7 +52,7 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce.c b/ompi/mca/coll/cuda/coll_cuda_reduce.c index 1cd667f30bb..2bcce13c75c 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce.c +++ b/ompi/mca/coll/cuda/coll_cuda_reduce.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -34,15 +34,15 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -50,7 +50,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -61,7 +61,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_reduce((void *) sbuf, rbuf, count, dtype, op, root, comm, diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c b/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c index dab1d86ce4a..0dccbc580fe 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c +++ b/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -38,16 +38,16 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t sbufsize, rbufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - sbufsize = (true_extent + (ptrdiff_t)(rcount - 1) * extent) * ompi_comm_size(comm); - rbufsize = true_extent + (ptrdiff_t)(rcount - 1) * extent; + rbufsize = opal_datatype_span(&dtype->super, rcount, &gap); + + sbufsize = rbufsize * ompi_comm_size(comm); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(sbufsize); if (NULL == sbuf1) { @@ -55,7 +55,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, } opal_cuda_memcpy_sync(sbuf1, sbuf, sbufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -66,7 +66,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, } opal_cuda_memcpy_sync(rbuf1, rbuf, rbufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm, s->c_coll.coll_reduce_scatter_block_module); diff --git a/ompi/mca/coll/cuda/coll_cuda_scan.c b/ompi/mca/coll/cuda/coll_cuda_scan.c index 6cfb06a3916..e9afde81075 100644 --- a/ompi/mca/coll/cuda/coll_cuda_scan.c +++ b/ompi/mca/coll/cuda/coll_cuda_scan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -33,15 +33,14 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -49,7 +48,7 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -60,7 +59,7 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_scan_module); diff --git a/ompi/mca/coll/fca/coll_fca.h b/ompi/mca/coll/fca/coll_fca.h index cc3b7cb4f86..52bab4e3bd6 100644 --- a/ompi/mca/coll/fca/coll_fca.h +++ b/ompi/mca/coll/fca/coll_fca.h @@ -286,7 +286,7 @@ int mca_coll_fca_get_fca_lib(struct ompi_communicator_t *comm); /* Collective functions */ -int mca_coll_fca_allreduce(void *sbuf, void *rbuf, int count, +int mca_coll_fca_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -294,7 +294,7 @@ int mca_coll_fca_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_reduce(void *sbuf, void* rbuf, int count, +int mca_coll_fca_reduce(const void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -302,54 +302,54 @@ int mca_coll_fca_reduce(void *sbuf, void* rbuf, int count, int mca_coll_fca_barrier(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_allgather(void *sbuf, int scount, struct ompi_datatype_t *sdtype, +int mca_coll_fca_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_allgatherv(void *sbuf, int scount, +int mca_coll_fca_allgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_alltoall(void *sbuf, int scount, +int mca_coll_fca_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_alltoallv(void *sbuf, int *scounts, int *sdisps, +int mca_coll_fca_alltoallv(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_alltoallw(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +int mca_coll_fca_alltoallw(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_gather(void *sbuf, int scount, +int mca_coll_fca_gather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_gatherv(void *sbuf, int scount, +int mca_coll_fca_gatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_fca_reduce_scatter(void *sbuf, void *rbuf, int *rcounts, +int mca_coll_fca_reduce_scatter(const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/fca/coll_fca_ops.c b/ompi/mca/coll/fca/coll_fca_ops.c index 7d2711f15a3..0dc1d11b20c 100644 --- a/ompi/mca/coll/fca/coll_fca_ops.c +++ b/ompi/mca/coll/fca/coll_fca_ops.c @@ -1,12 +1,18 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ +/* + * Copyright (c) 2011 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "ompi_config.h" #include "ompi/constants.h" #include "coll_fca.h" @@ -253,7 +259,7 @@ int mca_coll_fca_bcast(void *buff, int count, struct ompi_datatype_t *datatype, * Accepts: - same as MPI_Reduce() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_fca_reduce(void *sbuf, void *rbuf, int count, +int mca_coll_fca_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -263,7 +269,7 @@ int mca_coll_fca_reduce(void *sbuf, void *rbuf, int count, int ret; mca_coll_fca_get_reduce_root(root, fca_module->rank, &spec); - spec.sbuf = sbuf; + spec.sbuf = (void *)sbuf; spec.rbuf = rbuf; if (mca_coll_fca_fill_reduce_spec(count, dtype, op, &spec, fca_module->fca_comm_caps.max_payload) @@ -295,7 +301,7 @@ int mca_coll_fca_reduce(void *sbuf, void *rbuf, int count, * Accepts: - same as MPI_Allreduce() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_fca_allreduce(void *sbuf, void *rbuf, int count, +int mca_coll_fca_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -304,7 +310,7 @@ int mca_coll_fca_allreduce(void *sbuf, void *rbuf, int count, fca_reduce_spec_t spec; int ret; - spec.sbuf = sbuf; + spec.sbuf = (void *)sbuf; spec.rbuf = rbuf; if (mca_coll_fca_fill_reduce_spec(count, dtype, op, &spec, fca_module->fca_comm_caps.max_payload) @@ -377,7 +383,7 @@ static size_t __setup_gather_sendbuf_inplace(void *inplace_sbuf, int rcount, * Accepts: - same as MPI_Allgather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_fca_allgather(void *sbuf, int scount, struct ompi_datatype_t *sdtype, +int mca_coll_fca_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -399,7 +405,7 @@ int mca_coll_fca_allgather(void *sbuf, int scount, struct ompi_datatype_t *sdtyp (char *)rbuf + rcount * fca_module->rank * rdtype_extent, rcount, rdtype, &sconv, &spec.sbuf); } else { - spec.size = __setup_gather_sendbuf(sbuf, scount, sdtype, &sconv, &spec.sbuf); + spec.size = __setup_gather_sendbuf((void *)sbuf, scount, sdtype, &sconv, &spec.sbuf); } /* Setup recv buffer */ @@ -442,9 +448,9 @@ int mca_coll_fca_allgather(void *sbuf, int scount, struct ompi_datatype_t *sdtyp comm, fca_module->previous_allgather_module); } -int mca_coll_fca_allgatherv(void *sbuf, int scount, +int mca_coll_fca_allgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -471,7 +477,7 @@ int mca_coll_fca_allgatherv(void *sbuf, int scount, (char *)rbuf + disps[fca_module->rank] * rdtype_extent, rcounts[fca_module->rank], rdtype, &sconv, &spec.sbuf); } else { - spec.sendsize = __setup_gather_sendbuf(sbuf, scount, sdtype, &sconv, &spec.sbuf); + spec.sendsize = __setup_gather_sendbuf((void *)sbuf, scount, sdtype, &sconv, &spec.sbuf); } /* Allocate alternative recvsizes/displs on the stack, which will be in bytes */ @@ -548,7 +554,7 @@ int mca_coll_fca_allgatherv(void *sbuf, int scount, fca_module->previous_allgatherv_module); } -int mca_coll_fca_alltoall(void *sbuf, int scount, +int mca_coll_fca_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -561,9 +567,9 @@ int mca_coll_fca_alltoall(void *sbuf, int scount, comm, fca_module->previous_alltoall_module); } -int mca_coll_fca_alltoallv(void *sbuf, int *scounts, int *sdisps, +int mca_coll_fca_alltoallv(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -574,10 +580,10 @@ int mca_coll_fca_alltoallv(void *sbuf, int *scounts, int *sdisps, comm, fca_module->previous_alltoallv_module); } -int mca_coll_fca_alltoallw(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +int mca_coll_fca_alltoallw(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -587,7 +593,7 @@ int mca_coll_fca_alltoallw(void *sbuf, int *scounts, int *sdisps, comm, fca_module->previous_alltoallw_module); } -int mca_coll_fca_gather(void *sbuf, int scount, +int mca_coll_fca_gather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -600,9 +606,9 @@ int mca_coll_fca_gather(void *sbuf, int scount, comm, fca_module->previous_gather_module); } -int mca_coll_fca_gatherv(void *sbuf, int scount, +int mca_coll_fca_gatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -613,7 +619,7 @@ int mca_coll_fca_gatherv(void *sbuf, int scount, comm, fca_module->previous_gatherv_module); } -int mca_coll_fca_reduce_scatter(void *sbuf, void *rbuf, int *rcounts, +int mca_coll_fca_reduce_scatter(const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/hcoll/coll_hcoll.h b/ompi/mca/coll/hcoll/coll_hcoll.h index 25a60b39225..ed9b34703c2 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll.h +++ b/ompi/mca/coll/hcoll/coll_hcoll.h @@ -137,8 +137,12 @@ struct mca_coll_hcoll_module_t { mca_coll_base_module_t *previous_ibarrier_module; mca_coll_base_module_iallgather_fn_t previous_iallgather; mca_coll_base_module_t *previous_iallgather_module; + mca_coll_base_module_iallgatherv_fn_t previous_iallgatherv; + mca_coll_base_module_t *previous_iallgatherv_module; mca_coll_base_module_iallreduce_fn_t previous_iallreduce; mca_coll_base_module_t *previous_iallreduce_module; + mca_coll_base_module_ireduce_fn_t previous_ireduce; + mca_coll_base_module_t *previous_ireduce_module; mca_coll_base_module_igatherv_fn_t previous_igatherv; mca_coll_base_module_t *previous_igatherv_module; mca_coll_base_module_ialltoall_fn_t previous_ialltoall; @@ -168,14 +172,22 @@ int mca_coll_hcoll_bcast(void *buff, int count, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_allgather(void *sbuf, int scount, +int mca_coll_hcoll_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_gather(void *sbuf, int scount, +int mca_coll_hcoll_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_gather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -183,31 +195,38 @@ int mca_coll_hcoll_gather(void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, +int mca_coll_hcoll_allreduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_alltoall(void *sbuf, int scount, +int mca_coll_hcoll_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_alltoallv(void *sbuf, int *scounts, - int *sdisps, +int mca_coll_hcoll_alltoallv(const void *sbuf, const int *scounts, + const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *rdisps, + void *rbuf, const int *rcounts, + const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_gatherv(void* sbuf, int scount, +int mca_coll_hcoll_gatherv(const void* sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *displs, + void* rbuf, const int *rcounts, const int *displs, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, @@ -223,7 +242,7 @@ int mca_coll_hcoll_ibcast(void *buff, int count, ompi_request_t** request, mca_coll_base_module_t *module); -int mca_coll_hcoll_iallgather(void *sbuf, int scount, +int mca_coll_hcoll_iallgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -231,14 +250,31 @@ int mca_coll_hcoll_iallgather(void *sbuf, int scount, ompi_request_t** request, mca_coll_base_module_t *module); -int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, +int mca_coll_hcoll_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_iallreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t** request, mca_coll_base_module_t *module); -int mca_coll_hcoll_ialltoall(void *sbuf, int scount, +int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_ialltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -246,7 +282,7 @@ int mca_coll_hcoll_ialltoall(void *sbuf, int scount, ompi_request_t **req, mca_coll_base_module_t *module); -int mca_coll_hcoll_ialltoallv(void *sbuf, int *scounts, +int mca_coll_hcoll_ialltoallv(const void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, @@ -256,9 +292,9 @@ int mca_coll_hcoll_ialltoallv(void *sbuf, int *scounts, ompi_request_t **req, mca_coll_base_module_t *module); -int mca_coll_hcoll_igatherv(void* sbuf, int scount, +int mca_coll_hcoll_igatherv(const void* sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *displs, + void* rbuf, const int *rcounts, const int *displs, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/hcoll/coll_hcoll_component.c b/ompi/mca/coll/hcoll/coll_hcoll_component.c index 9b457de8e3a..0bf4ec767a3 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_component.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_component.c @@ -92,40 +92,6 @@ enum { }; -/* - * utility routine for string parameter registration - */ -static int reg_string(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - const char* default_value, char **storage, - int flags) -{ - int index; - - *storage = (char *) default_value; - index = mca_base_component_var_register( - &mca_coll_hcoll_component.super.collm_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, - "ompi", "coll", "hcoll", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGSTR_EMPTY_OK) && - (NULL == *storage || 0 == strlen(*storage))) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - - /* * Utility routine for integer parameter registration */ diff --git a/ompi/mca/coll/hcoll/coll_hcoll_module.c b/ompi/mca/coll/hcoll/coll_hcoll_module.c index f1af6931b18..7814cc1d8e1 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_module.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_module.c @@ -41,12 +41,15 @@ static void mca_coll_hcoll_module_clear(mca_coll_hcoll_module_t *hcoll_module) hcoll_module->previous_alltoall = NULL; hcoll_module->previous_alltoallv = NULL; hcoll_module->previous_alltoallw = NULL; + hcoll_module->previous_reduce = NULL; hcoll_module->previous_reduce_scatter = NULL; hcoll_module->previous_ibarrier = NULL; hcoll_module->previous_ibcast = NULL; hcoll_module->previous_iallreduce = NULL; hcoll_module->previous_iallgather = NULL; + hcoll_module->previous_iallgatherv = NULL; hcoll_module->previous_igatherv = NULL; + hcoll_module->previous_ireduce = NULL; } static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_module) @@ -80,17 +83,21 @@ static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module OBJ_RELEASE(hcoll_module->previous_bcast_module); OBJ_RELEASE(hcoll_module->previous_allreduce_module); OBJ_RELEASE(hcoll_module->previous_allgather_module); + OBJ_RELEASE(hcoll_module->previous_allgatherv_module); OBJ_RELEASE(hcoll_module->previous_gatherv_module); OBJ_RELEASE(hcoll_module->previous_alltoall_module); OBJ_RELEASE(hcoll_module->previous_alltoallv_module); + OBJ_RELEASE(hcoll_module->previous_reduce_module); OBJ_RELEASE(hcoll_module->previous_ibarrier_module); OBJ_RELEASE(hcoll_module->previous_ibcast_module); OBJ_RELEASE(hcoll_module->previous_iallreduce_module); OBJ_RELEASE(hcoll_module->previous_iallgather_module); + OBJ_RELEASE(hcoll_module->previous_iallgatherv_module); OBJ_RELEASE(hcoll_module->previous_igatherv_module); OBJ_RELEASE(hcoll_module->previous_ialltoall_module); OBJ_RELEASE(hcoll_module->previous_ialltoallv_module); + OBJ_RELEASE(hcoll_module->previous_ireduce_module); /* OBJ_RELEASE(hcoll_module->previous_allgatherv_module); @@ -127,7 +134,9 @@ static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_modu HCOL_SAVE_PREV_COLL_API(barrier); HCOL_SAVE_PREV_COLL_API(bcast); HCOL_SAVE_PREV_COLL_API(allreduce); + HCOL_SAVE_PREV_COLL_API(reduce); HCOL_SAVE_PREV_COLL_API(allgather); + HCOL_SAVE_PREV_COLL_API(allgatherv); HCOL_SAVE_PREV_COLL_API(gatherv); HCOL_SAVE_PREV_COLL_API(alltoall); HCOL_SAVE_PREV_COLL_API(alltoallv); @@ -135,7 +144,9 @@ static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_modu HCOL_SAVE_PREV_COLL_API(ibarrier); HCOL_SAVE_PREV_COLL_API(ibcast); HCOL_SAVE_PREV_COLL_API(iallreduce); + HCOL_SAVE_PREV_COLL_API(ireduce); HCOL_SAVE_PREV_COLL_API(iallgather); + HCOL_SAVE_PREV_COLL_API(iallgatherv); HCOL_SAVE_PREV_COLL_API(igatherv); HCOL_SAVE_PREV_COLL_API(ialltoall); HCOL_SAVE_PREV_COLL_API(ialltoallv); @@ -157,7 +168,7 @@ static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_modu /* ** Communicator free callback */ -int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extra) +static int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extra) { mca_coll_hcoll_module_t *hcoll_module; @@ -312,14 +323,26 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL; hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL; hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL; + hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL; hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL; hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL; hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL; hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL; + hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL; hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL; hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL; hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL; +#if HCOLL_API >= HCOLL_VERSION(3,5) + hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL; +#else + hcoll_module->super.coll_iallgatherv = NULL; +#endif hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL; +#if HCOLL_API >= HCOLL_VERSION(3,5) + hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL; +#else + hcoll_module->super.coll_ireduce = NULL; +#endif hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL; hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL; hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL; diff --git a/ompi/mca/coll/hcoll/coll_hcoll_ops.c b/ompi/mca/coll/hcoll/coll_hcoll_ops.c index 32e7612d2f7..6d6756b1e72 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_ops.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_ops.c @@ -57,7 +57,7 @@ int mca_coll_hcoll_bcast(void *buff, int count, return rc; } -int mca_coll_hcoll_allgather(void *sbuf, int scount, +int mca_coll_hcoll_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -86,7 +86,7 @@ int mca_coll_hcoll_allgather(void *sbuf, int scount, hcoll_module->previous_allgather_module); return rc; } - rc = hcoll_collectives.coll_allgather(sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); + rc = hcoll_collectives.coll_allgather((void *)sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHER"); rc = hcoll_module->previous_allgather(sbuf,scount,sdtype, @@ -97,7 +97,52 @@ int mca_coll_hcoll_allgather(void *sbuf, int scount, return rc; } -int mca_coll_hcoll_gather(void *sbuf, int scount, +int mca_coll_hcoll_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + dte_data_representation_t stype; + dte_data_representation_t rtype; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHERV"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + stype = ompi_dtype_2_dte_dtype(sdtype); + rtype = ompi_dtype_2_dte_dtype(rdtype); + if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) + || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) + && mca_coll_hcoll_component.hcoll_datatype_fallback){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback allgatherv;", + sdtype->super.name, + rdtype->super.name); + rc = hcoll_module->previous_allgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + hcoll_module->previous_allgatherv_module); + return rc; + } + rc = hcoll_collectives.coll_allgatherv((void *)sbuf,scount,stype,rbuf,rcount,displs,rtype,hcoll_module->hcoll_context); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHERV"); + rc = hcoll_module->previous_allgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + hcoll_module->previous_allgatherv_module); + } + return rc; +} + +int mca_coll_hcoll_gather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -126,10 +171,10 @@ int mca_coll_hcoll_gather(void *sbuf, int scount, hcoll_module->previous_allgather_module); return rc; } - rc = hcoll_collectives.coll_gather(sbuf,scount,stype,rbuf,rcount,rtype,root,hcoll_module->hcoll_context); + rc = hcoll_collectives.coll_gather((void *)sbuf,scount,stype,rbuf,rcount,rtype,root,hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK GATHER"); - rc = hcoll_module->previous_gather(sbuf,scount,sdtype, + rc = hcoll_module->previous_gather((void *)sbuf,scount,sdtype, rbuf,rcount,rdtype,root, comm, hcoll_module->previous_allgather_module); @@ -138,7 +183,7 @@ int mca_coll_hcoll_gather(void *sbuf, int scount, } -int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, +int mca_coll_hcoll_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -176,7 +221,7 @@ int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, return rc; } - rc = hcoll_collectives.coll_allreduce(sbuf,rbuf,count,Dtype,Op,hcoll_module->hcoll_context); + rc = hcoll_collectives.coll_allreduce((void *)sbuf,rbuf,count,Dtype,Op,hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK ALLREDUCE"); rc = hcoll_module->previous_allreduce(sbuf,rbuf, @@ -186,7 +231,59 @@ int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, return rc; } -int mca_coll_hcoll_alltoall(void *sbuf, int scount, +int mca_coll_hcoll_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + dte_data_representation_t Dtype; + hcoll_dte_op_t *Op; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL REDUCE"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + Dtype = ompi_dtype_2_dte_dtype(dtype); + if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(Dtype) || HCOL_DTE_IS_COMPLEX(Dtype))) + && mca_coll_hcoll_component.hcoll_datatype_fallback){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback reduce;", + dtype->super.name); + rc = hcoll_module->previous_reduce(sbuf,rbuf, + count,dtype,op, + root, + comm, hcoll_module->previous_reduce_module); + return rc; + } + + Op = ompi_op_2_hcolrte_op(op); + if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback reduce;", + op->o_name); + rc = hcoll_module->previous_reduce(sbuf,rbuf, + count,dtype,op, + root, + comm, hcoll_module->previous_reduce_module); + return rc; + } + + rc = hcoll_collectives.coll_reduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK REDUCE"); + rc = hcoll_module->previous_reduce(sbuf,rbuf, + count,dtype,op, + root, + comm, hcoll_module->previous_reduce_module); + } + return rc; +} + +int mca_coll_hcoll_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -215,7 +312,7 @@ int mca_coll_hcoll_alltoall(void *sbuf, int scount, hcoll_module->previous_alltoall_module); return rc; } - rc = hcoll_collectives.coll_alltoall(sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); + rc = hcoll_collectives.coll_alltoall((void *)sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK ALLTOALL"); rc = hcoll_module->previous_alltoall(sbuf,scount,sdtype, @@ -226,9 +323,9 @@ int mca_coll_hcoll_alltoall(void *sbuf, int scount, return rc; } -int mca_coll_hcoll_alltoallv(void *sbuf, int *scounts, int *sdisps, +int mca_coll_hcoll_alltoallv(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -251,8 +348,8 @@ int mca_coll_hcoll_alltoallv(void *sbuf, int *scounts, int *sdisps, comm, hcoll_module->previous_alltoallv_module); return rc; } - rc = hcoll_collectives.coll_alltoallv(sbuf, scounts, sdisps, stype, - rbuf, rcounts, rdisps, rtype, + rc = hcoll_collectives.coll_alltoallv((void *)sbuf, (int *)scounts, (int *)sdisps, stype, + rbuf, (int *)rcounts, (int *)rdisps, rtype, hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK ALLTOALLV"); @@ -263,9 +360,9 @@ int mca_coll_hcoll_alltoallv(void *sbuf, int *scounts, int *sdisps, return rc; } -int mca_coll_hcoll_gatherv(void* sbuf, int scount, +int mca_coll_hcoll_gatherv(const void* sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *displs, + void* rbuf, const int *rcounts, const int *displs, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, @@ -292,7 +389,7 @@ int mca_coll_hcoll_gatherv(void* sbuf, int scount, comm, hcoll_module->previous_gatherv_module); return rc; } - rc = hcoll_collectives.coll_gatherv(sbuf,scount,stype,rbuf,rcounts,displs, rtype, root, hcoll_module->hcoll_context); + rc = hcoll_collectives.coll_gatherv((void *)sbuf, scount, stype, rbuf, (int *)rcounts, (int *)displs, rtype, root, hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK GATHERV"); rc = hcoll_module->previous_gatherv(sbuf,scount,sdtype, @@ -352,7 +449,7 @@ int mca_coll_hcoll_ibcast(void *buff, int count, return rc; } -int mca_coll_hcoll_iallgather(void *sbuf, int scount, +int mca_coll_hcoll_iallgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -385,7 +482,7 @@ int mca_coll_hcoll_iallgather(void *sbuf, int scount, hcoll_module->previous_iallgather_module); return rc; } - rc = hcoll_collectives.coll_iallgather(sbuf, scount, stype, rbuf, rcount, rtype, hcoll_module->hcoll_context, rt_handle); + rc = hcoll_collectives.coll_iallgather((void *)sbuf, scount, stype, rbuf, rcount, rtype, hcoll_module->hcoll_context, rt_handle); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLGATHER"); rc = hcoll_module->previous_iallgather(sbuf,scount,sdtype, @@ -396,8 +493,58 @@ int mca_coll_hcoll_iallgather(void *sbuf, int scount, } return rc; } - -int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, +#if HCOLL_API >= HCOLL_VERSION(3,5) +int mca_coll_hcoll_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + dte_data_representation_t stype; + dte_data_representation_t rtype; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLGATHERV"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + stype = ompi_dtype_2_dte_dtype(sdtype); + rtype = ompi_dtype_2_dte_dtype(rdtype); + void **rt_handle = (void **) request; + if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) + || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) + && mca_coll_hcoll_component.hcoll_datatype_fallback){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback non-blocking allgatherv;", + sdtype->super.name, + rdtype->super.name); + rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + request, + hcoll_module->previous_iallgatherv_module); + return rc; + } + rc = hcoll_collectives.coll_iallgatherv((void *)sbuf,scount,stype,rbuf,rcount,displs,rtype, + hcoll_module->hcoll_context, rt_handle); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLGATHER"); + rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + request, + hcoll_module->previous_iallgatherv_module); + } + return rc; +} +#endif +int mca_coll_hcoll_iallreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -438,7 +585,7 @@ int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, return rc; } - rc = hcoll_collectives.coll_iallreduce(sbuf, rbuf, count, Dtype, Op, hcoll_module->hcoll_context, rt_handle); + rc = hcoll_collectives.coll_iallreduce((void *)sbuf, rbuf, count, Dtype, Op, hcoll_module->hcoll_context, rt_handle); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLREDUCE"); rc = hcoll_module->previous_iallreduce(sbuf,rbuf, @@ -447,10 +594,67 @@ int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, } return rc; } +#if HCOLL_API >= HCOLL_VERSION(3,5) +int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + dte_data_representation_t Dtype; + hcoll_dte_op_t *Op; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING REDUCE"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + Dtype = ompi_dtype_2_dte_dtype(dtype); + void **rt_handle = (void**) request; + if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(Dtype) || HCOL_DTE_IS_COMPLEX(Dtype))) + && mca_coll_hcoll_component.hcoll_datatype_fallback){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback non-blocking reduce;", + dtype->super.name); + rc = hcoll_module->previous_ireduce(sbuf,rbuf,count,dtype,op, + root, + comm, request, + hcoll_module->previous_ireduce_module); + return rc; + } -int mca_coll_hcoll_igatherv(void* sbuf, int scount, + Op = ompi_op_2_hcolrte_op(op); + if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback non-blocking reduce;", + op->o_name); + rc = hcoll_module->previous_ireduce(sbuf,rbuf, + count,dtype,op, + root, + comm, request, + hcoll_module->previous_ireduce_module); + return rc; + } + + rc = hcoll_collectives.coll_ireduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context,rt_handle); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING REDUCE"); + rc = hcoll_module->previous_ireduce(sbuf,rbuf, + count,dtype,op, + root, + comm, + request, + hcoll_module->previous_ireduce_module); + } + return rc; +} +#endif +int mca_coll_hcoll_igatherv(const void* sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *displs, + void* rbuf, const int *rcounts, const int *displs, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, @@ -481,7 +685,7 @@ int mca_coll_hcoll_igatherv(void* sbuf, int scount, hcoll_module->previous_igatherv_module); return rc; } - rc = hcoll_collectives.coll_igatherv(sbuf,scount,stype,rbuf,rcounts,displs, rtype, root, hcoll_module->hcoll_context, rt_handle); + rc = hcoll_collectives.coll_igatherv((void *)sbuf, scount, stype, rbuf, (int *)rcounts, (int *)displs, rtype, root, hcoll_module->hcoll_context, rt_handle); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK IGATHERV"); rc = hcoll_module->previous_igatherv(sbuf,scount,sdtype, diff --git a/ompi/mca/coll/hcoll/coll_hcoll_rte.c b/ompi/mca/coll/hcoll/coll_hcoll_rte.c index e9fc4a6b71f..c5b4651888d 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_rte.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_rte.c @@ -79,7 +79,9 @@ static int get_ec_handles( int num_ec , rte_grp_handle_t , rte_ec_handle_t * ec_handles ); +#if 0 /* This callback is not used */ static int get_my_ec(rte_grp_handle_t , rte_ec_handle_t *ec_handle); +#endif static int group_size ( rte_grp_handle_t group ); static int my_rank (rte_grp_handle_t grp_h); @@ -181,7 +183,6 @@ static int recv_nb(struct dte_data_representation_t data, /*do inline nb recv*/ size_t size; ompi_request_t *ompi_req; - opal_free_list_item_t *item; if (!buffer && !HCOL_DTE_IS_ZERO(data)) { fprintf(stderr, "***Error in hcolrte_rml_recv_nb: buffer pointer is NULL" @@ -339,6 +340,7 @@ static int get_ec_handles( int num_ec , return HCOLL_SUCCESS; } +#if 0 /* This callback is not used */ static int get_my_ec ( rte_grp_handle_t grp_h, rte_ec_handle_t *ec_handle) { ompi_communicator_t *comm = (ompi_communicator_t *)grp_h; @@ -348,7 +350,7 @@ static int get_my_ec ( rte_grp_handle_t grp_h, rte_ec_handle_t *ec_handle) ec_handle->rank = my_rank; return HCOLL_SUCCESS; } - +#endif static int group_size ( rte_grp_handle_t grp_h ) { @@ -404,6 +406,7 @@ static void* get_coll_handle(void) OMPI_REQUEST_INIT(ompi_req,false); ompi_req->req_complete_cb = NULL; ompi_req->req_status.MPI_ERROR = MPI_SUCCESS; + ompi_req->req_state = OMPI_REQUEST_ACTIVE; ompi_req->req_free = request_free; return (void *)ompi_req; } diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 81bb7275019..ef9ace0ec4a 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -294,7 +294,7 @@ static inline void NBC_Free (NBC_Handle* handle) { * to be called *only* from the progress thread !!! */ int NBC_Progress(NBC_Handle *handle) { int flag, res, ret=NBC_CONTINUE; - unsigned long size; + unsigned long size = 0; char *delim; /* the handle is done if there is no schedule attached */ diff --git a/ompi/mca/coll/libnbc/nbc_iallgather.c b/ompi/mca/coll/libnbc/nbc_iallgather.c index 401d98b37bb..60180d14190 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgather.c +++ b/ompi/mca/coll/libnbc/nbc_iallgather.c @@ -58,7 +58,7 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { return res; } @@ -175,9 +175,9 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iallgatherv.c b/ompi/mca/coll/libnbc/nbc_iallgatherv.c index 7b32c7555f3..a8a77dc8935 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_iallgatherv.c @@ -45,9 +45,9 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent (recvtype, &rcvext); + res = ompi_datatype_type_extent (recvtype, &rcvext); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { - NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } @@ -124,9 +124,9 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D rsize = ompi_comm_remote_size (comm); - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { - NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index b56a4aa4991..2e1b0dd00b6 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -70,13 +70,13 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M res = ompi_datatype_get_extent(datatype, &lb, &ext); if (OMPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } res = ompi_datatype_type_size (datatype, &size); if (OMPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Type_size() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_type_size() (%i)", res); return res; } @@ -193,7 +193,8 @@ int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int co struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, size, rsize; + int rank, res, rsize; + size_t size; MPI_Aint ext; NBC_Schedule *schedule; NBC_Handle *handle; @@ -202,15 +203,15 @@ int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int co rank = ompi_comm_rank (comm); rsize = ompi_comm_remote_size (comm); - res = MPI_Type_extent(datatype, &ext); + res = ompi_datatype_type_extent(datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_size(datatype, &size); + res = ompi_datatype_type_size(datatype, &size); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index e9a73f54014..c18096e0509 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -51,7 +51,8 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, a2asize, sndsize, datasize; + int rank, p, res, datasize; + size_t a2asize, sndsize; NBC_Schedule *schedule; MPI_Aint rcvext, sndext; #ifdef NBC_CACHE_SCHEDULE @@ -67,21 +68,21 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent(sendtype, &sndext); + res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_size(sendtype, &sndsize); + res = ompi_datatype_type_size(sendtype, &sndsize); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); return res; } @@ -93,7 +94,7 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype * total communicated size is smaller than 1<<17 *and* if we don't * have eager messages (msgsize < 1<<13) */ alg = NBC_A2A_LINEAR; - } else if(a2asize < (1<<12)*p) { + } else if(a2asize < (1<<12)*(unsigned int)p) { /*alg = NBC_A2A_DISS;*/ alg = NBC_A2A_LINEAR; } else @@ -120,9 +121,9 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype if(NBC_Type_intrinsic(sendtype)) { datasize = sndext * sendcount; } else { - res = MPI_Pack_size (sendcount, sendtype, comm, &datasize); + res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Pack_size() (%i)", res); + NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res); NBC_Return_handle (handle); return res; } @@ -156,20 +157,20 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype int pos=0; /* non-contiguous - pack */ - res = MPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, handle->tmpbuf, + res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, handle->tmpbuf, (p - rank) * datasize, &pos, comm); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { - NBC_Error("MPI Error in MPI_Pack() (%i)", res); + NBC_Error("MPI Error in PMPI_Pack() (%i)", res); NBC_Return_handle (handle); return res; } if (rank != 0) { pos = 0; - res = MPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) handle->tmpbuf + datasize * (p - rank), + res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) handle->tmpbuf + datasize * (p - rank), rank * datasize, &pos, comm); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { - NBC_Error("MPI Error in MPI_Pack() (%i)", res); + NBC_Error("MPI Error in PMPI_Pack() (%i)", res); NBC_Return_handle (handle); return res; } @@ -277,15 +278,15 @@ int ompi_coll_libnbc_ialltoall_inter (const void* sendbuf, int sendcount, MPI_Da rsize = ompi_comm_remote_size (comm); - res = MPI_Type_extent (sendtype, &sndext); + res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_extent (recvtype, &rcvext); + res = ompi_datatype_type_extent (recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } @@ -414,9 +415,9 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve if(NBC_Type_intrinsic(sendtype)) { datasize = sndext*sendcount; } else { - res = MPI_Pack_size(sendcount, sendtype, comm, &datasize); + res = PMPI_Pack_size(sendcount, sendtype, comm, &datasize); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Pack_size() (%i)", res); + NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res); return res; } } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index 8961d860455..946b627ca77 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -37,15 +37,15 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent (sendtype, &sndext); + res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_extent (recvtype, &rcvext); + res = ompi_datatype_type_extent (recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } @@ -128,15 +128,15 @@ int ompi_coll_libnbc_ialltoallv_inter (const void* sendbuf, const int *sendcount ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = MPI_Type_extent(sendtype, &sndext); + res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c index ce3a5631231..afa39287df0 100644 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ b/ompi/mca/coll/libnbc/nbc_ibarrier.c @@ -9,6 +9,7 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Author(s): Torsten Hoefler * @@ -177,7 +178,7 @@ int ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_reque } /* inform remote peers that all local peers have entered the barrier */ - for (int peer = 0 ; peer < rsize ; ++peer) { + for (int peer = 1; peer < rsize ; ++peer) { res = NBC_Sched_send (0, true, 1, MPI_BYTE, peer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { NBC_Return_handle (handle); diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index d82b3461b0b..5f0470a1337 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -1,11 +1,11 @@ /* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 The Technical University of Chemnitz. All + * rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -20,7 +20,7 @@ static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule * static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, - MPI_Datatype datatype, int fragsize, int size); + MPI_Datatype datatype, int fragsize, size_t size); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ @@ -44,7 +44,8 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, size, segsize; + int rank, p, res, segsize; + size_t size; NBC_Schedule *schedule; #ifdef NBC_CACHE_SCHEDULE NBC_Bcast_args *args, *found, search; @@ -56,9 +57,9 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_size(datatype, &size); + res = ompi_datatype_type_size(datatype, &size); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); return res; } @@ -251,7 +252,7 @@ static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *sc } /* simple chained MPI_Ibcast */ -static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, int size) { +static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, size_t size) { int res, vrank, rpeer, speer, numfrag, fragcount, thiscount; MPI_Aint ext; char *buf; @@ -259,9 +260,9 @@ static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *sch RANK2VRANK(rank, vrank, root); VRANK2RANK(rpeer, vrank-1, root); VRANK2RANK(speer, vrank+1, root); - res = MPI_Type_extent(datatype, &ext); + res = ompi_datatype_type_extent(datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ibcast_inter.c b/ompi/mca/coll/libnbc/nbc_ibcast_inter.c index 59ed17a3e41..8188ec65732 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast_inter.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast_inter.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -18,17 +18,11 @@ int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int res, size; + int res; NBC_Schedule *schedule; NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_size() (%i)", res); - return res; - } - schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { return OMPI_ERR_OUT_OF_RESOURCE; diff --git a/ompi/mca/coll/libnbc/nbc_iexscan.c b/ompi/mca/coll/libnbc/nbc_iexscan.c index 32a7d835daf..736f16c50be 100644 --- a/ompi/mca/coll/libnbc/nbc_iexscan.c +++ b/ompi/mca/coll/libnbc/nbc_iexscan.c @@ -59,9 +59,9 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent(datatype, &ext); + res = ompi_datatype_type_extent(datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_igather.c b/ompi/mca/coll/libnbc/nbc_igather.c index 057b3087a6a..76d972edf1d 100644 --- a/ompi/mca/coll/libnbc/nbc_igather.c +++ b/ompi/mca/coll/libnbc/nbc_igather.c @@ -56,9 +56,9 @@ int ompi_coll_libnbc_igather(const void* sendbuf, int sendcount, MPI_Datatype se p = ompi_comm_size (comm); if (rank == root) { - res = MPI_Type_extent (recvtype, &rcvext); + res = ompi_datatype_type_extent (recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } } @@ -187,9 +187,9 @@ int ompi_coll_libnbc_igather_inter (const void* sendbuf, int sendcount, MPI_Data rsize = ompi_comm_remote_size (comm); if (root == MPI_ROOT) { - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } } diff --git a/ompi/mca/coll/libnbc/nbc_igatherv.c b/ompi/mca/coll/libnbc/nbc_igatherv.c index 4cec8903654..afda8b4a653 100644 --- a/ompi/mca/coll/libnbc/nbc_igatherv.c +++ b/ompi/mca/coll/libnbc/nbc_igatherv.c @@ -12,6 +12,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Author(s): Torsten Hoefler * @@ -40,9 +41,9 @@ int ompi_coll_libnbc_igatherv(const void* sendbuf, int sendcount, MPI_Datatype s p = ompi_comm_size (comm); if (rank == root) { - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } } @@ -118,12 +119,12 @@ int ompi_coll_libnbc_igatherv_inter (const void* sendbuf, int sendcount, MPI_Dat NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - rsize = ompi_comm_size (comm); + rsize = ompi_comm_remote_size (comm); if (MPI_ROOT == root) { - res = MPI_Type_extent(recvtype, &rcvext); + res = ompi_datatype_type_extent(recvtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c index 9eec9991e9b..e9ae7b388a9 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c @@ -48,9 +48,9 @@ int ompi_coll_libnbc_ineighbor_allgather(const void *sbuf, int scount, MPI_Datat ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; - res = MPI_Type_extent (rtype, &rcvext); + res = ompi_datatype_type_extent (rtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c index ba9d5e8999d..50d85ee406a 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c @@ -49,9 +49,9 @@ int ompi_coll_libnbc_ineighbor_allgatherv(const void *sbuf, int scount, MPI_Data ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; - res = MPI_Type_extent(rtype, &rcvext); + res = ompi_datatype_type_extent(rtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c index 20b50f4fe41..3d8b34a1918 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c @@ -45,15 +45,15 @@ int ompi_coll_libnbc_ineighbor_alltoall(const void *sbuf, int scount, MPI_Dataty ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; - res = MPI_Type_extent(stype, &sndext); + res = ompi_datatype_type_extent(stype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); + res = ompi_datatype_type_extent(rtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c index 3db725db709..52983b1632b 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c @@ -49,15 +49,15 @@ int ompi_coll_libnbc_ineighbor_alltoallv(const void *sbuf, const int *scounts, c ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; - res = MPI_Type_extent (stype, &sndext); + res = ompi_datatype_type_extent (stype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_extent (rtype, &rcvext); + res = ompi_datatype_type_extent (rtype, &rcvext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index 20b16bdf10a..7b7e3210f9b 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -503,9 +503,9 @@ static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, memcpy(tgt, src, srccount*ext); } else { /* we have to pack and unpack */ - res = MPI_Pack_size(srccount, srctype, comm, &size); + res = PMPI_Pack_size(srccount, srctype, comm, &size); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Pack_size() (%i:%i)", res, size); + NBC_Error ("MPI Error in PMPI_Pack_size() (%i:%i)", res, size); return res; } @@ -519,19 +519,19 @@ static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, } pos=0; - res = MPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); + res = PMPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Pack() (%i)", res); + NBC_Error ("MPI Error in PMPI_Pack() (%i)", res); free (packbuf); return res; } pos=0; - res = MPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); + res = PMPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); free(packbuf); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Unpack() (%i)", res); + NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res); return res; } } @@ -560,15 +560,15 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void } else { /* we have to unpack */ - res = MPI_Pack_size(srccount, srctype, comm, &size); + res = PMPI_Pack_size(srccount, srctype, comm, &size); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Pack_size() (%i)", res); + NBC_Error ("MPI Error in PMPI_Pack_size() (%i)", res); return res; } pos = 0; - res = MPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); + res = PMPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Unpack() (%i)", res); + NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res); return res; } } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce.c b/ompi/mca/coll/libnbc/nbc_ireduce.c index e1ed4776ce8..0045deb6a54 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce.c @@ -18,7 +18,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle); static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); + MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); @@ -47,7 +47,8 @@ int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param) int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, segsize, size; + int rank, p, res, segsize; + size_t size; MPI_Aint ext; NBC_Schedule *schedule; char *redbuf=NULL, inplace; @@ -60,15 +61,15 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent(datatype, &ext); + res = ompi_datatype_type_extent(datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } - res = MPI_Type_size(datatype, &size); + res = ompi_datatype_type_size(datatype, &size); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); return res; } @@ -207,9 +208,9 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count rank = ompi_comm_rank (comm); rsize = ompi_comm_remote_size (comm); - res = MPI_Type_extent (datatype, &ext); + res = ompi_datatype_type_extent (datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } @@ -359,7 +360,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen /* chain send ... */ static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { + MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { int res, vrank, rpeer, speer, numfrag, fragcount, thiscount; long offset; diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c index e890cea790b..cd1dad14e76 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c @@ -50,9 +50,9 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent (datatype, &ext); + res = ompi_datatype_type_extent (datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } @@ -208,9 +208,9 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, rank = ompi_comm_rank (comm); rsize = ompi_comm_remote_size (comm); - res = MPI_Type_extent (datatype, &ext); + res = ompi_datatype_type_extent (datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c index dd2b35730b5..c11a7ad4186 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c @@ -48,9 +48,9 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent(datatype, &ext); + res = ompi_datatype_type_extent(datatype, &ext); if (MPI_SUCCESS != res || 0 == ext) { - NBC_Error ("MPI Error in MPI_Type_extent() (%i:%i)", res, (int) ext); + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i:%i)", res, (int) ext); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } @@ -208,9 +208,9 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sbuf, void *rbuf, i rank = ompi_comm_rank (comm); rsize = ompi_comm_remote_size (comm); - res = MPI_Type_extent (dtype, &ext); + res = ompi_datatype_type_extent (dtype, &ext); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iscan.c b/ompi/mca/coll/libnbc/nbc_iscan.c index 0d0b8c01bfc..a239d14ed10 100644 --- a/ompi/mca/coll/libnbc/nbc_iscan.c +++ b/ompi/mca/coll/libnbc/nbc_iscan.c @@ -56,9 +56,9 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = MPI_Type_extent (datatype, &ext); + res = ompi_datatype_type_extent (datatype, &ext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatter.c b/ompi/mca/coll/libnbc/nbc_iscatter.c index d6686081596..e4b1d0dbda9 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatter.c +++ b/ompi/mca/coll/libnbc/nbc_iscatter.c @@ -57,9 +57,9 @@ int ompi_coll_libnbc_iscatter (const void* sendbuf, int sendcount, MPI_Datatype p = ompi_comm_size (comm); if (rank == root) { - res = MPI_Type_extent (sendtype, &sndext); + res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } } @@ -183,9 +183,9 @@ int ompi_coll_libnbc_iscatter_inter (const void* sendbuf, int sendcount, MPI_Dat rsize = ompi_comm_remote_size (comm); if (MPI_ROOT == root) { - res = MPI_Type_extent(sendtype, &sndext); + res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); return res; } } diff --git a/ompi/mca/coll/libnbc/nbc_iscatterv.c b/ompi/mca/coll/libnbc/nbc_iscatterv.c index 8b475cffc4e..3460afdfa52 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatterv.c +++ b/ompi/mca/coll/libnbc/nbc_iscatterv.c @@ -46,9 +46,9 @@ int ompi_coll_libnbc_iscatterv(const void* sendbuf, const int *sendcounts, const /* receive from root */ if (rank == root) { - res = MPI_Type_extent (sendtype, &sndext); + res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); OBJ_RELEASE(schedule); return res; } @@ -131,9 +131,9 @@ int ompi_coll_libnbc_iscatterv_inter (const void* sendbuf, const int *sendcounts return res; } } else if (MPI_ROOT == root) { - res = MPI_Type_extent(sendtype, &sndext); + res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); OBJ_RELEASE(schedule); return res; } diff --git a/ompi/mca/coll/ml/common_sym_whitelist.txt b/ompi/mca/coll/ml/common_sym_whitelist.txt new file mode 100644 index 00000000000..6a99e2b40c9 --- /dev/null +++ b/ompi/mca/coll/ml/common_sym_whitelist.txt @@ -0,0 +1,4 @@ +# Ignore symbols in this component that are auto-generated and we +# can't do anything about them (e.g., flex/bison symbols). +coll_ml_config_yyleng +coll_ml_config_yytext diff --git a/ompi/mca/coll/portals4/Makefile.am b/ompi/mca/coll/portals4/Makefile.am index 5d6a9faa150..c8668033564 100644 --- a/ompi/mca/coll/portals4/Makefile.am +++ b/ompi/mca/coll/portals4/Makefile.am @@ -16,6 +16,7 @@ local_sources = \ coll_portals4_bcast.c \ coll_portals4_reduce.c \ coll_portals4_gather.c \ + coll_portals4_scatter.c \ coll_portals4_request.h \ coll_portals4_request.c diff --git a/ompi/mca/coll/portals4/coll_portals4.h b/ompi/mca/coll/portals4/coll_portals4.h index 4eb631e222f..9d3386db4da 100644 --- a/ompi/mca/coll/portals4/coll_portals4.h +++ b/ompi/mca/coll/portals4/coll_portals4.h @@ -105,9 +105,6 @@ struct mca_coll_portals4_module_t { /* binomial tree */ ompi_coll_portals4_tree_t *cached_in_order_bmtree; int cached_in_order_bmtree_root; - - size_t barrier_count; - size_t gather_count; }; typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t; OBJ_CLASS_DECLARATION(mca_coll_portals4_module_t); @@ -235,6 +232,19 @@ int ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_d mca_coll_base_module_t *module); int ompi_coll_portals4_igather_intra_fini(struct ompi_coll_portals4_request_t *request); +int ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); +int ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t **request, + mca_coll_base_module_t *module); +int ompi_coll_portals4_iscatter_intra_fini(struct ompi_coll_portals4_request_t *request); + static inline ptl_process_t ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank) diff --git a/ompi/mca/coll/portals4/coll_portals4_component.c b/ompi/mca/coll/portals4/coll_portals4_component.c index 679d55ad322..4c3bb7be40f 100644 --- a/ompi/mca/coll/portals4/coll_portals4_component.c +++ b/ompi/mca/coll/portals4/coll_portals4_component.c @@ -573,6 +573,10 @@ portals4_comm_query(struct ompi_communicator_t *comm, return NULL; } + opal_output_verbose(50, ompi_coll_base_framework.framework_output, + "%s:%d: My nid,pid = (%x,%x)\n", + __FILE__, __LINE__, proc->phys.nid, proc->phys.pid); + /* check for logical addressing mode in the MTL */ if (0 == proc->phys.pid) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, @@ -595,6 +599,9 @@ portals4_comm_query(struct ompi_communicator_t *comm, portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra; portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra; + portals4_module->super.coll_scatter = ompi_coll_portals4_scatter_intra; + portals4_module->super.coll_iscatter = ompi_coll_portals4_iscatter_intra; + portals4_module->cached_in_order_bmtree=NULL; portals4_module->cached_in_order_bmtree_root=-1; @@ -607,9 +614,6 @@ portals4_comm_query(struct ompi_communicator_t *comm, portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra; portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra; - portals4_module->barrier_count = 0; - portals4_module->gather_count = 0; - return &(portals4_module->super); } @@ -706,8 +710,7 @@ portals4_progress(void) ompi_coll_portals4_iallreduce_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_SCATTER: - opal_output(ompi_coll_base_framework.framework_output, - "scatter is not supported yet\n"); + ompi_coll_portals4_iscatter_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_GATHER: ompi_coll_portals4_igather_intra_fini(ptl_request); diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c index 9cde2d2bd98..c2aca74493e 100644 --- a/ompi/mca/coll/portals4/coll_portals4_gather.c +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -177,10 +177,6 @@ setup_gather_buffers_binomial(struct ompi_communicator_t *comm, /* Setup Gather Buffers */ /**********************************/ if (vrank == 0) { - request->u.gather.unpack_bytes= - request->u.gather.unpack_dst_true_extent + - ((ptrdiff_t)request->u.gather.unpack_dst_count * (ptrdiff_t)request->u.gather.size - 1) * request->u.gather.unpack_dst_extent; - request->u.gather.gather_bytes=request->u.gather.packed_size * (ptrdiff_t)request->u.gather.size; /* @@ -282,10 +278,6 @@ setup_gather_buffers_linear(struct ompi_communicator_t *comm, /* Setup Gather Buffers */ /**********************************/ if (i_am_root) { - request->u.gather.unpack_bytes= - request->u.gather.unpack_dst_true_extent + - ((ptrdiff_t)request->u.gather.unpack_dst_count * (ptrdiff_t)request->u.gather.size - 1) * request->u.gather.unpack_dst_extent; - request->u.gather.gather_bytes=request->u.gather.packed_size * (ptrdiff_t)request->u.gather.size; /* @@ -1005,8 +997,6 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct "completed CTWait(expected_ops=%d)\n", expected_ops); } - ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree)); - OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank)); @@ -1016,8 +1006,6 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct if (NULL != request->u.gather.gather_buf) free(request->u.gather.gather_buf); - ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree)); - opal_output(ompi_coll_base_framework.framework_output, "%s:%4d:%4d\tError occurred ret=%d, rank %2d", __FILE__, __LINE__, line, ret, request->u.gather.my_rank); diff --git a/ompi/mca/coll/portals4/coll_portals4_request.h b/ompi/mca/coll/portals4/coll_portals4_request.h index 341e6b6ae78..f78c2dee631 100644 --- a/ompi/mca/coll/portals4/coll_portals4_request.h +++ b/ompi/mca/coll/portals4/coll_portals4_request.h @@ -114,7 +114,6 @@ struct ompi_coll_portals4_request_t { MPI_Aint pack_src_lb; MPI_Aint pack_src_true_lb; MPI_Aint pack_src_offset; - uint64_t unpack_bytes; char *unpack_dst_buf; int unpack_dst_count; struct ompi_datatype_t *unpack_dst_dtype; @@ -123,6 +122,47 @@ struct ompi_coll_portals4_request_t { MPI_Aint unpack_dst_lb; MPI_Aint unpack_dst_true_lb; } gather; + + struct { + opal_convertor_t send_converter; + opal_convertor_t recv_converter; + size_t packed_size; + int8_t is_sync; + int8_t free_after; + size_t coll_count; + char *scatter_buf; + uint64_t scatter_bytes; + ptl_match_bits_t scatter_match_bits; + ptl_handle_md_t scatter_mdh; + ptl_handle_ct_t scatter_cth; + ptl_handle_md_t scatter_meh; + ptl_match_bits_t sync_match_bits; + ptl_handle_md_t sync_mdh; + ptl_handle_ct_t sync_cth; + ptl_handle_me_t sync_meh; + int my_rank; + int root_rank; + int size; + const void *sbuf; + void *rbuf; + uint64_t pack_bytes; + const char *pack_src_buf; + int pack_src_count; + struct ompi_datatype_t *pack_src_dtype; + MPI_Aint pack_src_extent; + MPI_Aint pack_src_true_extent; + MPI_Aint pack_src_lb; + MPI_Aint pack_src_true_lb; + uint64_t unpack_bytes; + char *unpack_dst_buf; + int unpack_dst_count; + struct ompi_datatype_t *unpack_dst_dtype; + MPI_Aint unpack_dst_extent; + MPI_Aint unpack_dst_true_extent; + MPI_Aint unpack_dst_lb; + MPI_Aint unpack_dst_true_lb; + MPI_Aint unpack_dst_offset; + } scatter; } u; }; typedef struct ompi_coll_portals4_request_t ompi_coll_portals4_request_t; diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c new file mode 100644 index 00000000000..c1ec41dd847 --- /dev/null +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -0,0 +1,774 @@ +/* + * Copyright (c) 2015 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/util/bit_ops.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" + +#include "coll_portals4.h" +#include "coll_portals4_request.h" + + +#undef RTR_USES_TRIGGERED_PUT + + +#define VRANK(ra, ro, si) ((ra - ro + si) % si) + + +static int +setup_scatter_buffers_linear(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + int8_t i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); + + ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter, + request->u.scatter.pack_src_buf, + ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), + request->u.scatter.pack_src_count, + request->u.scatter.pack_src_dtype); + opal_convertor_get_packed_size(&request->u.scatter.send_converter, &request->u.scatter.packed_size); + OBJ_DESTRUCT(&request->u.scatter.send_converter); + + /**********************************/ + /* Setup Scatter Buffers */ + /**********************************/ + if (i_am_root) { + + /* + * calculate the total size of the packed data + */ + request->u.scatter.scatter_bytes=request->u.scatter.packed_size * (ptrdiff_t)request->u.scatter.size; + + /* all transfers done using request->u.scatter.sdtype. + * allocate temp buffer for recv, copy and/or rotate data at the end */ + request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes); + if (NULL == request->u.scatter.scatter_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.scatter.free_after = 1; + + for (int32_t i=0;iu.scatter.size;i++) { + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * i; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): offset(%lu)", + __FILE__, __LINE__, request->u.scatter.my_rank, + offset); + + ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter, + request->u.scatter.pack_src_buf + offset, + ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), + request->u.scatter.pack_src_count, + request->u.scatter.pack_src_dtype); + + iov.iov_len = request->u.scatter.packed_size; + iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.scatter.scatter_buf + (request->u.scatter.packed_size*i)); + opal_convertor_pack(&request->u.scatter.send_converter, &iov, &iov_count, &max_data); + + OBJ_DESTRUCT(&request->u.scatter.send_converter); + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): root - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld) * size(%d)", + __FILE__, __LINE__, request->u.scatter.my_rank, + request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes, + request->u.scatter.packed_size, request->u.scatter.size); + } else { + request->u.scatter.scatter_bytes=request->u.scatter.packed_size; + request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes); + if (NULL == request->u.scatter.scatter_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.scatter.free_after = 1; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): leaf - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld)", + __FILE__, __LINE__, request->u.scatter.my_rank, + request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes, + request->u.scatter.packed_size); + } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +setup_scatter_handles(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + ptl_me_t me; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Setup Scatter Handles */ + /**********************************/ + COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm), + 0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)", + request->u.scatter.my_rank, request->u.scatter.scatter_match_bits)); + + ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, + &request->u.scatter.scatter_cth); + if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } + + request->u.scatter.scatter_mdh = mca_coll_portals4_component.data_md_h; + + me.start = request->u.scatter.scatter_buf; + me.length = request->u.scatter.scatter_bytes; + me.ct_handle = request->u.scatter.scatter_cth; + me.min_free = 0; + me.uid = mca_coll_portals4_component.uid; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | + PTL_ME_EVENT_CT_COMM; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.match_bits = request->u.scatter.scatter_match_bits; + me.ignore_bits = 0; + ret = PtlMEAppend(mca_coll_portals4_component.ni_h, + mca_coll_portals4_component.pt_idx, + &me, + PTL_PRIORITY_LIST, + NULL, + &request->u.scatter.scatter_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +setup_sync_handles(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + ptl_me_t me; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Setup Sync Handles */ + /**********************************/ + COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm), + 0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)", + request->u.scatter.my_rank, request->u.scatter.sync_match_bits)); + + ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, + &request->u.scatter.sync_cth); + if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } + + request->u.scatter.sync_mdh = mca_coll_portals4_component.zero_md_h; + + me.start = NULL; + me.length = 0; + me.ct_handle = request->u.scatter.sync_cth; + me.min_free = 0; + me.uid = mca_coll_portals4_component.uid; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | + PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.match_bits = request->u.scatter.sync_match_bits; + me.ignore_bits = 0; + ret = PtlMEAppend(mca_coll_portals4_component.ni_h, + mca_coll_portals4_component.pt_idx, + &me, + PTL_PRIORITY_LIST, + NULL, + &request->u.scatter.sync_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +cleanup_scatter_handles(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Cleanup Scatter Handles */ + /**********************************/ + do { + ret = PtlMEUnlink(request->u.scatter.scatter_meh); + if (PTL_IN_USE == ret) { + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d: scatter_meh still in use (ret=%d, rank %2d)", + __FILE__, __LINE__, ret, request->u.scatter.my_rank); + continue; + } + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } while (ret == PTL_IN_USE); + + ret = PtlCTFree(request->u.scatter.scatter_cth); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +cleanup_sync_handles(ompi_coll_portals4_request_t *request) +{ + int ret, line; + int ptl_ret; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Cleanup Sync Handles */ + /**********************************/ + do { + ret = PtlMEUnlink(request->u.scatter.sync_meh); + if (PTL_IN_USE == ret) { + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d: sync_meh still in use (ret=%d, rank %2d)", + __FILE__, __LINE__, ret, request->u.scatter.my_rank); + continue; + } + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } while (ret == PTL_IN_USE); + + ret = PtlCTFree(request->u.scatter.sync_cth); + if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred (ptl_ret=%d) ret=%d, rank %2d", + __FILE__, __LINE__, line, ptl_ret, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_base_module_t *module) +{ + mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; + int ret, line; + ptl_ct_event_t ct; + + ptl_ct_event_t sync_incr_event; + + int8_t i_am_root; + + int32_t expected_rtrs = 0; + int32_t expected_puts = 0; + int32_t expected_acks = 0; + int32_t expected_ops = 0; + + int32_t expected_chained_rtrs = 0; + int32_t expected_chained_acks = 0; + + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank)); + + request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER; + request->u.scatter.scatter_buf = NULL; + request->u.scatter.scatter_mdh = PTL_INVALID_HANDLE; + request->u.scatter.scatter_cth = PTL_INVALID_HANDLE; + request->u.scatter.scatter_meh = PTL_INVALID_HANDLE; + request->u.scatter.sync_mdh = PTL_INVALID_HANDLE; + request->u.scatter.sync_cth = PTL_INVALID_HANDLE; + request->u.scatter.sync_meh = PTL_INVALID_HANDLE; + + request->u.scatter.my_rank = ompi_comm_rank(comm); + request->u.scatter.size = ompi_comm_size(comm); + request->u.scatter.root_rank = root; + request->u.scatter.sbuf = sbuf; + request->u.scatter.rbuf = rbuf; + + request->u.scatter.pack_src_buf = sbuf; + request->u.scatter.pack_src_count = scount; + request->u.scatter.pack_src_dtype = sdtype; + ompi_datatype_get_extent(request->u.scatter.pack_src_dtype, + &request->u.scatter.pack_src_lb, + &request->u.scatter.pack_src_extent); + ompi_datatype_get_true_extent(request->u.scatter.pack_src_dtype, + &request->u.scatter.pack_src_true_lb, + &request->u.scatter.pack_src_true_extent); + + if ((root == request->u.scatter.my_rank) && (rbuf == MPI_IN_PLACE)) { + request->u.scatter.unpack_dst_buf = NULL; + request->u.scatter.unpack_dst_count = 0; + request->u.scatter.unpack_dst_dtype = MPI_DATATYPE_NULL; + } else { + request->u.scatter.unpack_dst_buf = rbuf; + request->u.scatter.unpack_dst_count = rcount; + request->u.scatter.unpack_dst_dtype = rdtype; + request->u.scatter.unpack_dst_offset = 0; + ompi_datatype_get_extent(request->u.scatter.unpack_dst_dtype, + &request->u.scatter.unpack_dst_lb, + &request->u.scatter.unpack_dst_extent); + ompi_datatype_get_true_extent(request->u.scatter.unpack_dst_dtype, + &request->u.scatter.unpack_dst_true_lb, + &request->u.scatter.unpack_dst_true_extent); + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): request->u.scatter.unpack_dst_offset(%lu)", + __FILE__, __LINE__, request->u.scatter.my_rank, + request->u.scatter.unpack_dst_offset); + + /**********************************/ + /* Setup Common Parameters */ + /**********************************/ + + i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); + + request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + + ret = setup_scatter_buffers_linear(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_scatter_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_sync_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + /**********************************/ + /* do the scatter */ + /**********************************/ + if (i_am_root) { + /* operations on the sync counter */ + expected_rtrs = request->u.scatter.size - 1; /* expect RTRs from non-root ranks */ + expected_acks = request->u.scatter.size - 1; /* expect Recv-ACKs from non-root ranks */ + + /* operations on the scatter counter */ + expected_puts = 0; + expected_chained_rtrs = 1; + expected_chained_acks = 1; + + /* Chain the RTR and Recv-ACK to the Scatter CT */ + sync_incr_event.success=1; + sync_incr_event.failure=0; + ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth, + sync_incr_event, + request->u.scatter.sync_cth, + expected_rtrs); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth, + sync_incr_event, + request->u.scatter.sync_cth, + expected_rtrs + expected_acks); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + /* root, so put packed bytes to other ranks */ + for (int32_t i=0;iu.scatter.size;i++) { + /* do not put to my scatter_buf. my data gets unpacked into my out buffer in linear_bottom(). */ + if (i == request->u.scatter.my_rank) { + continue; + } + + ptl_size_t offset = request->u.scatter.packed_size * i; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)", + __FILE__, __LINE__, request->u.scatter.my_rank, + offset, i, request->u.scatter.packed_size); + + ret = PtlTriggeredPut(request->u.scatter.scatter_mdh, + (ptl_size_t)request->u.scatter.scatter_buf + offset, + request->u.scatter.packed_size, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, i), + mca_coll_portals4_component.pt_idx, + request->u.scatter.scatter_match_bits, + 0, + NULL, + 0, + request->u.scatter.scatter_cth, + expected_chained_rtrs); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + } else { + /* non-root, so do nothing */ + + /* operations on the sync counter */ + expected_rtrs = 0; + expected_acks = 0; + + /* operations on the scatter counter */ + expected_puts = 1; /* scatter put from root */ + expected_chained_rtrs = 0; + expected_chained_acks = 0; + } + + expected_ops = expected_chained_rtrs + expected_puts; + + /**********************************************/ + /* only non-root ranks are PUT to, so only */ + /* non-root ranks must PUT a Recv-ACK to root */ + /**********************************************/ + if (!i_am_root) { + ret = PtlTriggeredPut(request->u.scatter.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank), + mca_coll_portals4_component.pt_idx, + request->u.scatter.sync_match_bits, + 0, + NULL, + 0, + request->u.scatter.scatter_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + expected_ops += expected_chained_acks; + + if (!request->u.scatter.is_sync) { + /******************************************/ + /* put to finish pt when all ops complete */ + /******************************************/ + ret = PtlTriggeredPut(mca_coll_portals4_component.zero_md_h, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.scatter.my_rank), + mca_coll_portals4_component.finish_pt_idx, + 0, + 0, + NULL, + (uintptr_t) request, + request->u.scatter.scatter_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + /**************************************/ + /* all non-root ranks put RTR to root */ + /**************************************/ + if (!i_am_root) { + ret = PtlPut(request->u.scatter.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank), + mca_coll_portals4_component.pt_idx, + request->u.scatter.sync_match_bits, + 0, + NULL, + 0); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + if (request->u.scatter.is_sync) { + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "calling CTWait(expected_ops=%d)\n", expected_ops); + + /********************************/ + /* Wait for all ops to complete */ + /********************************/ + ret = PtlCTWait(request->u.scatter.scatter_cth, expected_ops, &ct); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "completed CTWait(expected_ops=%d)\n", expected_ops); + } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + if (NULL != request->u.scatter.scatter_buf) + free(request->u.scatter.scatter_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank)); + + ret = cleanup_scatter_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = cleanup_sync_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + if (NULL != request->u.scatter.unpack_dst_buf) { + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + ompi_coll_portals4_create_recv_converter (&request->u.scatter.recv_converter, + request->u.scatter.unpack_dst_buf, + ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), + request->u.scatter.unpack_dst_count, + request->u.scatter.unpack_dst_dtype); + + iov.iov_len = request->u.scatter.packed_size; + if (request->u.scatter.my_rank == request->u.scatter.root_rank) { + /* unpack my data from the location in scatter_buf where is was packed */ + uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * request->u.scatter.my_rank; + iov.iov_base = (IOVBASE_TYPE *)((char *)request->u.scatter.scatter_buf + offset); + } else { + iov.iov_base = (IOVBASE_TYPE *)request->u.scatter.scatter_buf; + } + opal_convertor_unpack(&request->u.scatter.recv_converter, &iov, &iov_count, &max_data); + + OBJ_DESTRUCT(&request->u.scatter.recv_converter); + } + + if (request->u.scatter.free_after) + free(request->u.scatter.scatter_buf); + + request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + + OPAL_THREAD_LOCK(&ompi_request_lock); + ompi_request_complete(&request->super, true); + OPAL_THREAD_UNLOCK(&ompi_request_lock); + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + request->super.req_status.MPI_ERROR = ret; + + if (request->u.scatter.free_after) + free(request->u.scatter.scatter_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +int +ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + int ret, line; + + ompi_coll_portals4_request_t *request; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm))); + + /* + * allocate a portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request); + if (NULL == request) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.scatter.is_sync = 1; + + /* + * initiate the scatter + * + * this request is marked synchronous (is_sync==1), so PtlCTWait() + * will be called to wait for completion. + */ + ret = ompi_coll_portals4_scatter_intra_linear_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = ompi_coll_portals4_scatter_intra_linear_bottom(comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + /* + * return the portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_RETURN(request); + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + + +int +ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t **ompi_request, + mca_coll_base_module_t *module) +{ + int ret, line; + + ompi_coll_portals4_request_t *request; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm))); + + /* + * allocate a portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request); + if (NULL == request) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + *ompi_request = &request->super; + request->u.scatter.is_sync = 0; + + /* + * initiate the scatter + * + * this request is marked asynchronous (is_sync==0), so + * portals4_progress() will handle completion. + */ + ret = ompi_coll_portals4_scatter_intra_linear_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + + +int +ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank)); + + /* + * cleanup the scatter + */ + ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} diff --git a/ompi/mca/coll/sm/coll_sm_reduce.c b/ompi/mca/coll/sm/coll_sm_reduce.c index d60f029b071..c731b87d2b8 100644 --- a/ompi/mca/coll/sm/coll_sm_reduce.c +++ b/ompi/mca/coll/sm/coll_sm_reduce.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -187,9 +187,9 @@ static int reduce_inorder(const void *sbuf, void* rbuf, int count, size_t total_size, max_data, bytes; mca_coll_sm_in_use_flag_t *flag; mca_coll_sm_data_index_t *index; - size_t ddt_size; + size_t ddt_size, segsize; size_t segment_ddt_count, segment_ddt_bytes, zero = 0; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t extent, gap; /* Setup some identities */ @@ -205,10 +205,7 @@ static int reduce_inorder(const void *sbuf, void* rbuf, int count, /* ddt_size is the packed size (e.g., MPI_SHORT_INT is 6) */ ompi_datatype_type_size(dtype, &ddt_size); /* extent is from lb to ub (e.g., MPI_SHORT_INT is 8) */ - ompi_datatype_get_extent(dtype, &lb, &extent); - /* true_extent is extent of actual type map, ignoring lb and ub - (e.g., MPI_SHORT_INT is 8) */ - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + ompi_datatype_type_extent(dtype, &extent); segment_ddt_count = mca_coll_sm_component.sm_fragment_size / ddt_size; iov.iov_len = segment_ddt_bytes = segment_ddt_count * ddt_size; total_size = ddt_size * count; @@ -266,12 +263,13 @@ static int reduce_inorder(const void *sbuf, void* rbuf, int count, "segment_ddt_count" instances (i.e., the number of instances that can be held in a single fragment) */ - free_buffer = (char*)malloc(true_extent + - (segment_ddt_count - 1) * extent); + segsize = opal_datatype_span(&dtype->super, segment_ddt_count, &gap); + + free_buffer = (char*)malloc(segsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - reduce_temp_buffer = free_buffer - true_lb; + reduce_temp_buffer = free_buffer - gap; /* Trickery here: we use a potentially smaller count than the user count -- use the largest count that is <= @@ -312,15 +310,16 @@ static int reduce_inorder(const void *sbuf, void* rbuf, int count, as the sbuf */ if (MPI_IN_PLACE == sbuf && (size - 1) != rank) { - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); + segsize = opal_datatype_span(&dtype->super, count, &gap); + inplace_temp = (char*)malloc(segsize); if (NULL == inplace_temp) { if (NULL != free_buffer) { free(free_buffer); } return OMPI_ERR_OUT_OF_RESOURCE; } - sbuf = inplace_temp - true_lb; - ompi_datatype_copy_content_same_ddt(dtype, count, (char *) sbuf, (char *) rbuf); + sbuf = inplace_temp - gap; + ompi_datatype_copy_content_same_ddt(dtype, count, (char *)sbuf, (char *)rbuf); } else { inplace_temp = NULL; } diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 224de223fb2..092056839de 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -21,6 +21,7 @@ #include "ompi/mca/mca.h" #include "ompi/request/request.h" #include "ompi/mca/coll/base/coll_base_functions.h" +#include "opal/util/output.h" /* also need the dynamic rule structures */ #include "coll_tuned_dynamic_rules.h" diff --git a/ompi/mca/fbtl/base/fbtl_base_find_available.c b/ompi/mca/fbtl/base/fbtl_base_find_available.c index 21049443d0b..c6ecabc3c71 100644 --- a/ompi/mca/fbtl/base/fbtl_base_find_available.c +++ b/ompi/mca/fbtl/base/fbtl_base_find_available.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +27,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "opal/class/opal_list.h" +#include "opal/util/output.h" #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" #include "ompi/mca/fbtl/fbtl.h" diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c b/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c index 1eefad95bed..26e60065a5a 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c @@ -28,74 +28,19 @@ ssize_t mca_fbtl_plfs_preadv (mca_io_ompio_file_t *fh ) { - Plfs_fd *pfd = NULL; + Plfs_fd *pfd = fh->f_fs_ptr; plfs_error_t plfs_ret; - pfd = fh->f_fs_ptr; ssize_t total_bytes_read=0; - - int i, block=1; - struct iovec *iov = NULL; - int iov_count = 0; - OMPI_MPI_OFFSET_TYPE iov_offset = 0; + int i; + ssize_t bytes_read; if (NULL == fh->f_io_array) { return OMPI_ERROR; } - iov = (struct iovec *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i=0 ; if_num_of_io_entries ; i++) { - if (0 == iov_count) { - iov[iov_count].iov_base = fh->f_io_array[i].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i].length; - iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; - iov_count ++; - } - - if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { - block ++; - iov = (struct iovec *)realloc - (iov, OMPIO_IOVEC_INITIAL_SIZE * block * - sizeof(struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - - if (fh->f_num_of_io_entries != i+1) { - if (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) { - iov[iov_count].iov_base = - fh->f_io_array[i+1].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i+1].length; - iov_count ++; - continue; - } - } - - // Find the total number of bytes to be read. - size_t bytes = 0; - for (int i = 0; i < iov_count; ++i) { - bytes += iov[i].iov_len; - } - - // Allocate a temporary buffer to hold the data - char *buffer; - buffer = (char *) malloc (bytes); - if (buffer == NULL) { - return OMPI_ERROR; - } - - // Read the data - ssize_t bytes_read; - plfs_ret = plfs_read( pfd, buffer, bytes, iov_offset, &bytes_read ); + plfs_ret = plfs_read( pfd, fh->f_io_array[i].memory_address, fh->f_io_array[i].length, + (off_t )fh->f_io_array[i].offset, &bytes_read ); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fbtl_plfs_preadv: Error in plfs_read:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; @@ -104,27 +49,6 @@ ssize_t mca_fbtl_plfs_preadv (mca_io_ompio_file_t *fh ) if (bytes_read < 0) return OMPI_ERROR; total_bytes_read += bytes_read; - // Copy the data from BUFFER into the memory specified by IOV - bytes = bytes_read; - for (int i = 0; i < iov_count; ++i) { - size_t copy = MIN (iov[i].iov_len, bytes); - (void) memcpy ((void *) iov[i].iov_base, (void *) buffer, copy); - buffer += copy; - bytes -= copy; - if (bytes == 0) { - break; - } - } - iov_count = 0; - if ( NULL != buffer ) { - free (buffer); - buffer=NULL; - } - } - - if (NULL != iov) { - free (iov); - iov = NULL; } return total_bytes_read; diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c b/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c index 5c79971478e..cd63c9db5a2 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c @@ -28,102 +28,26 @@ ssize_t mca_fbtl_plfs_pwritev (mca_io_ompio_file_t *fh ) { - Plfs_fd *pfd = NULL; + Plfs_fd *pfd = fh->f_fs_ptr; plfs_error_t plfs_ret; - pfd = fh->f_fs_ptr; ssize_t total_bytes_written=0; - - int i, block = 1; - struct iovec *iov = NULL; - int iov_count = 0; - OMPI_MPI_OFFSET_TYPE iov_offset = 0; + ssize_t bytes_written; + int i; if (NULL == fh->f_io_array) { return OMPI_ERROR; } - iov = (struct iovec *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i=0 ; if_num_of_io_entries ; i++) { - if (0 == iov_count) { - iov[iov_count].iov_base = fh->f_io_array[i].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i].length; - iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; - iov_count ++; - } - - if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { - block ++; - iov = (struct iovec *)realloc - (iov, OMPIO_IOVEC_INITIAL_SIZE * block * - sizeof(struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - - if (fh->f_num_of_io_entries != i+1) { - if (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) { - iov[iov_count].iov_base = - fh->f_io_array[i+1].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i+1].length; - iov_count ++; - continue; - } - } - - // Find the total number of bytes to be written. - size_t bytes = 0; - for (int i = 0; i < iov_count; ++i) { - bytes += iov[i].iov_len; - } - - // Allocate a temporary buffer to hold the data - char *buffer=NULL; - buffer = (char *) malloc (bytes); - if (buffer == NULL) { - return OMPI_ERROR; - } - - // Copy the data into BUFFER. - size_t to_copy = bytes; - char *bp = buffer; - for (int i = 0; i < iov_count; ++i) { - size_t copy = MIN (iov[i].iov_len, to_copy); - bp = mempcpy ((void *) bp, (void *) iov[i].iov_base, copy); - to_copy -= copy; - if (to_copy == 0) { - break; - } - } - - // Write the data - ssize_t bytes_written; - - plfs_ret = plfs_write( pfd, buffer, bytes, iov_offset, 0, &bytes_written ); + plfs_ret = plfs_write( pfd, fh->f_io_array[i].memory_address, + fh->f_io_array[i].length, + (off_t) fh->f_io_array[i].offset, + fh->f_rank, &bytes_written ); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fbtl_plfs_pwritev: Error in plfs_write:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; } total_bytes_written += bytes_written; - iov_count = 0; - if ( NULL != buffer ) { - free ( buffer ); - buffer=NULL; - } - } - - if (NULL != iov) { - free (iov); - iov = NULL; } return total_bytes_written; diff --git a/ompi/mca/fbtl/posix/configure.m4 b/ompi/mca/fbtl/posix/configure.m4 index 8a6d548ea58..a03bd1a6f83 100644 --- a/ompi/mca/fbtl/posix/configure.m4 +++ b/ompi/mca/fbtl/posix/configure.m4 @@ -32,6 +32,9 @@ AC_DEFUN([MCA_ompi_fbtl_posix_CONFIG],[ [aio_write], [rt], [fbtl_posix_happy="yes"])]) + AC_CHECK_FUNCS([pwritev],[],[]) + AC_CHECK_FUNCS([preadv],[],[]) + AS_IF([test "$fbtl_posix_happy" = "yes"], [$1], [$2]) diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c index ceb8b1d984c..27dc589ee0a 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -80,6 +80,12 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) } } +#if defined(HAVE_PREADV) + ret_code = preadv (fh->fd, iov, iov_count, iov_offset); + if ( 0 < ret_code ) { + bytes_read+=ret_code; + } +#else if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { opal_output(1, "lseek:%s", strerror(errno)); free(iov); @@ -89,6 +95,7 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) if ( 0 < ret_code ) { bytes_read+=ret_code; } +#endif else if ( ret_code == -1 ) { opal_output(1, "readv:%s", strerror(errno)); free(iov); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c index 5208716d111..fbf69489ff8 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -93,6 +93,13 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) } */ +#if defined (HAVE_PWRITEV) + ret_code = pwritev (fh->fd, iov, iov_count, iov_offset); + if ( 0 < ret_code ) { + bytes_written += ret_code; + } + +#else if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { opal_output(1, "lseek:%s", strerror(errno)); free(iov); @@ -102,6 +109,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) if ( 0 < ret_code ) { bytes_written += ret_code; } +#endif else if (-1 == ret_code ) { opal_output(1, "writev:%s", strerror(errno)); free (iov); diff --git a/ompi/mca/fcoll/base/fcoll_base_file_select.c b/ompi/mca/fcoll/base/fcoll_base_file_select.c index 0d8aa3ff009..3c260074f21 100644 --- a/ompi/mca/fcoll/base/fcoll_base_file_select.c +++ b/ompi/mca/fcoll/base/fcoll_base_file_select.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -272,6 +272,11 @@ int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, char *name) return 1; } } + if (!strcmp (name, "dynamic_gen2")) { + if ( LUSTRE == file->f_fstype ) { + return 1; + } + } if (!strcmp (name, "two_phase")) { if ((int)file->f_cc_size < file->f_bytes_per_agg && file->f_cc_size < file->f_stripe_size) { diff --git a/ompi/mca/fcoll/dynamic_gen2/Makefile.am b/ompi/mca/fcoll/dynamic_gen2/Makefile.am new file mode 100644 index 00000000000..f4910ac5e97 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2015 University of Houston. All rights reserved. +# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_dynamic_gen2.h \ + fcoll_dynamic_gen2_module.c \ + fcoll_dynamic_gen2_component.c \ + fcoll_dynamic_gen2_file_read_all.c \ + fcoll_dynamic_gen2_file_write_all.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_dynamic_gen2_DSO +component_noinst = +component_install = mca_fcoll_dynamic_gen2.la +else +component_noinst = libmca_fcoll_dynamic_gen2.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_dynamic_gen2_la_SOURCES = $(sources) +mca_fcoll_dynamic_gen2_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_dynamic_gen2_la_SOURCES =$(sources) +libmca_fcoll_dynamic_gen2_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h new file mode 100644 index 00000000000..dfd8d16e924 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_DYNAMIC_EXPORT_H +#define MCA_FCOLL_DYNAMIC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_dynamic_gen2_priority; +extern int mca_fcoll_dynamic_gen2_num_groups; +extern int mca_fcoll_dynamic_gen2_write_chunksize; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_gen2_component; + +/* API functions */ + +int mca_fcoll_dynamic_gen2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_dynamic_gen2_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_dynamic_gen2_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_dynamic_gen2_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_dynamic_gen2_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + + +int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + + +END_C_DECLS + +#endif /* MCA_FCOLL_DYNAMIC_EXPORT_H */ diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c new file mode 100644 index 00000000000..055b6b244b2 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c @@ -0,0 +1,106 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" +#include "mpi.h" + +/* + * Public string showing the fcoll ompi_dynamic_gen2 component version number + */ +const char *mca_fcoll_dynamic_gen2_component_version_string = + "Open MPI dynamic_gen2 collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_dynamic_gen2_priority = 10; +int mca_fcoll_dynamic_gen2_num_groups = 1; +int mca_fcoll_dynamic_gen2_write_chunksize = -1; + +/* + * Local function + */ +static int dynamic_gen2_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_gen2_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + .fcollm_version = { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "dynamic_gen2", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_register_component_params = dynamic_gen2_register, + }, + .fcollm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .fcollm_init_query = mca_fcoll_dynamic_gen2_component_init_query, + .fcollm_file_query = mca_fcoll_dynamic_gen2_component_file_query, + .fcollm_file_unquery = mca_fcoll_dynamic_gen2_component_file_unquery, +}; + + +static int +dynamic_gen2_register(void) +{ + mca_fcoll_dynamic_gen2_priority = 10; + (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, + "priority", "Priority of the dynamic_gen2 fcoll component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_priority); + + mca_fcoll_dynamic_gen2_num_groups = 1; + (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, + "num_groups", "Number of subgroups created by the dynamic_gen2 component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_num_groups); + + mca_fcoll_dynamic_gen2_write_chunksize = -1; + (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, + "write_chunksize", "Chunk size written at once. Default: stripe_size of the file system", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_write_chunksize); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c new file mode 100644 index 00000000000..f34858ed34b --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c @@ -0,0 +1,1074 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define DEBUG_ON 0 + +/*Used for loading file-offsets per aggregator*/ +typedef struct mca_io_ompio_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_io_ompio_local_io_array; + + +static int read_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted); + + + +int +mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + MPI_Aint position = 0; + MPI_Aint total_bytes = 0; /* total bytes to be read */ + MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ + int index = 0, ret=OMPI_SUCCESS; + int cycles = 0; + int i=0, j=0, l=0; + int n=0; /* current position in total_bytes_per_process array */ + MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current + value from total_bytes_per_process */ + int *sorted_file_offsets=NULL, entries_per_aggregator=0; + int bytes_received = 0; + int blocks = 0; + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + size_t current_position = 0; + struct iovec *local_iov_array=NULL, *global_iov_array=NULL; + char *receive_buf = NULL; + MPI_Aint *memory_displacements=NULL; + /* global iovec at the readers that contain the iovecs created from + file_set_view */ + uint32_t total_fview_count = 0; + int local_count = 0; + int *fview_count = NULL, *disp_index=NULL, *temp_disp_index=NULL; + int current_index=0, temp_index=0; + int **blocklen_per_process=NULL; + MPI_Aint **displs_per_process=NULL; + char *global_buf = NULL; + MPI_Aint global_count = 0; + mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int dynamic_gen2_num_io_procs; + size_t max_data = 0; + MPI_Aint *total_bytes_per_process = NULL; + ompi_datatype_t **sendtype = NULL; + MPI_Request *send_req=NULL, recv_req=NULL; + int my_aggregator =-1; + bool recvbuf_is_contiguous=false; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; + double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; + double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; + mca_io_ompio_print_entry nentry; +#endif + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + + opal_datatype_type_size ( &datatype->super, &ftype_size ); + opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); + + if ( (ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && + 0 == lb ) { + recvbuf_is_contiguous = true; + } + + + if (! recvbuf_is_contiguous ) { + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + else { + max_data = count * datatype->super.size; + } + + if ( MPI_STATUS_IGNORE != status ) { + status->_ucount = max_data; + } + + fh->f_get_num_aggregators ( &dynamic_gen2_num_io_procs); + ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, + dynamic_gen2_num_io_procs, + max_data); + if (OMPI_SUCCESS != ret){ + goto exit; + } + my_aggregator = fh->f_procs_in_group[fh->f_aggregator_index]; + + /************************************************************************** + ** 2. Determine the total amount of data to be written + **************************************************************************/ + total_bytes_per_process = (MPI_Aint*)malloc(fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&max_data, + 1, + MPI_LONG, + total_bytes_per_process, + 1, + MPI_LONG, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + for (i=0 ; if_procs_per_group ; i++) { + total_bytes += total_bytes_per_process[i]; + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + + /********************************************************************* + *** 3. Generate the File offsets/lengths corresponding to this write + ********************************************************************/ + ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *) fh, + max_data, + &local_iov_array, + &local_count); + + if (ret != OMPI_SUCCESS){ + goto exit; + } + + /************************************************************* + *** 4. Allgather the File View information at all processes + *************************************************************/ + + fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&local_count, + 1, + MPI_INT, + fview_count, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs[0] = 0; + total_fview_count = fview_count[0]; + for (i=1 ; if_procs_per_group ; i++) { + total_fview_count += fview_count[i]; + displs[i] = displs[i-1] + fview_count[i-1]; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + for (i=0 ; if_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + i, + fview_count[i], + displs[i]); +} +} +#endif + + /* allocate the global iovec */ + if (0 != total_fview_count) { + global_iov_array = (struct iovec*)malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == global_iov_array) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgatherv_array (local_iov_array, + local_count, + fh->f_iov_type, + global_iov_array, + fview_count, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + /**************************************************************************************** + *** 5. Sort the global offset/lengths list based on the offsets. + *** The result of the sort operation is the 'sorted', an integer array, + *** which contains the indexes of the global_iov_array based on the offset. + *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset + *** in the file, and that one is followed by global_iov_array[z].offset, than + *** sorted[0] = x, sorted[1]=y and sorted[2]=z; + ******************************************************************************************/ + if (0 != total_fview_count) { + sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + fh->f_sort_iovec (global_iov_array, total_fview_count, sorted); + } + + if (NULL != local_iov_array) { + free (local_iov_array); + local_iov_array = NULL; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + for (i=0 ; if_rank, + global_iov_array[sorted[i]].iov_base, + global_iov_array[sorted[i]].iov_len); + } + } +#endif + + /************************************************************* + *** 6. Determine the number of cycles required to execute this + *** operation + *************************************************************/ + fh->f_get_bytes_per_agg ( (int *) &bytes_per_cycle); + cycles = ceil((double)total_bytes/bytes_per_cycle); + + if ( my_aggregator == fh->f_rank) { + disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); + if (NULL == blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); + if (NULL == displs_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for (i=0;if_procs_per_group;i++){ + blocklen_per_process[i] = NULL; + displs_per_process[i] = NULL; + } + + send_req = (MPI_Request *) malloc (fh->f_procs_per_group * sizeof(MPI_Request)); + if (NULL == send_req){ + opal_output ( 1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == global_buf){ + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); + if (NULL == sendtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for(l=0;lf_procs_per_group;l++){ + sendtype[l] = MPI_DATATYPE_NULL; + } + } + + + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rexch = MPI_Wtime(); +#endif + n = 0; + bytes_remaining = 0; + current_index = 0; + + for (index = 0; index < cycles; index++) { + /********************************************************************** + *** 7a. Getting ready for next cycle: initializing and freeing buffers + **********************************************************************/ + if (my_aggregator == fh->f_rank) { + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + fh->f_num_of_io_entries = 0; + + if (NULL != sendtype){ + for (i =0; i< fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy(&sendtype[i]); + sendtype[i] = MPI_DATATYPE_NULL; + } + } + } + + for(l=0;lf_procs_per_group;l++){ + disp_index[l] = 1; + + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + if (NULL == blocklen_per_process[l]) { + opal_output (1, "OUT OF MEMORY for blocklen\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == displs_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if(NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + } /* (my_aggregator == fh->f_rank */ + + /************************************************************************** + *** 7b. Determine the number of bytes to be actually read in this cycle + **************************************************************************/ + if (cycles-1 == index) { + bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %d**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } +#endif + + /***************************************************************** + *** 7c. Calculate how much data will be contributed in this cycle + *** by each process + *****************************************************************/ + bytes_received = 0; + + while (bytes_to_read_in_cycle) { + /* This next block identifies which process is the holder + ** of the sorted[current_index] element; + */ + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + + if (bytes_remaining) { + /* Finish up a partially used buffer from the previous cycle */ + if (bytes_remaining <= bytes_to_read_in_cycle) { + /* Data fits completely into the block */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); + + blocklen_per_process[n] = (int *) realloc + ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *) realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_remaining; + } + current_index ++; + bytes_to_read_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + /* the remaining data from the previous cycle is larger than the + bytes_to_write_in_cycle, so we have to segment again */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len + - bytes_remaining); + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining -= bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + } + else { + /* No partially used entry available, have to start a new one */ + if (bytes_to_read_in_cycle < + (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { + /* This entry has more data than we can sendin one cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + } + + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining = global_iov_array[sorted[current_index]].iov_len - + bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + else { + /* Next data entry is less than bytes_to_write_in_cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = + global_iov_array[sorted[current_index]].iov_len; + displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + global_iov_array[sorted[current_index]].iov_base; + blocklen_per_process[n] = + (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *)realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += + global_iov_array[sorted[current_index]].iov_len; + } + bytes_to_read_in_cycle -= + global_iov_array[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } /* end while (bytes_to_read_in_cycle) */ + + /************************************************************************* + *** 7d. Calculate the displacement on where to put the data and allocate + *** the recieve buffer (global_buf) + *************************************************************************/ + if (my_aggregator == fh->f_rank) { + entries_per_aggregator=0; + for (i=0;if_procs_per_group; i++){ + for (j=0;j 0) + entries_per_aggregator++ ; + } + } + if (entries_per_aggregator > 0){ + file_offsets_for_agg = (mca_io_ompio_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + /*Moving file offsets to an IO array!*/ + temp_index = 0; + global_count = 0; + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + file_offsets_for_agg[temp_index].length = + blocklen_per_process[i][j]; + global_count += blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + displs_per_process[i][j]; + temp_index++; + } + } + } + } + else{ + continue; + } + + /* Sort the displacements for each aggregator */ + read_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; if_io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + fh->f_num_of_io_entries = 0; + fh->f_io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + fh->f_io_array[0].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + fh->f_io_array[0].memory_address = + global_buf+memory_displacements[sorted_file_offsets[0]]; + fh->f_num_of_io_entries++; + for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else{ + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[i]]; + fh->f_num_of_io_entries++; + } + } + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_read_time = MPI_Wtime(); +#endif + + if (fh->f_num_of_io_entries) { + if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { + opal_output (1, "READ FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_read_time = MPI_Wtime(); + read_time += end_read_time - start_read_time; +#endif + /********************************************************** + ******************** DONE READING ************************ + *********************************************************/ + + temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for (i=0; if_procs_per_group;i++){ + send_req[i] = MPI_REQUEST_NULL; + if ( 0 < disp_index[i] ) { + ompi_datatype_create_hindexed(disp_index[i], + blocklen_per_process[i], + displs_per_process[i], + MPI_BYTE, + &sendtype[i]); + ompi_datatype_commit(&sendtype[i]); + ret = MCA_PML_CALL (isend(global_buf, + 1, + sendtype[i], + fh->f_procs_in_group[i], + 123, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &send_req[i])); + if(OMPI_SUCCESS != ret){ + goto exit; + } + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + } + + /********************************************************** + *** 7f. Scatter the Data from the readers + *********************************************************/ + if ( recvbuf_is_contiguous ) { + receive_buf = &((char*)buf)[position]; + } + else if (bytes_received) { + /* allocate a receive buffer and copy the data that needs + to be received into it in case the data is non-contigous + in memory */ + receive_buf = malloc (bytes_received); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = MCA_PML_CALL(irecv(receive_buf, + bytes_received, + MPI_BYTE, + my_aggregator, + 123, + fh->f_comm, + &recv_req)); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + + if (my_aggregator == fh->f_rank){ + ret = ompi_request_wait_all (fh->f_procs_per_group, + send_req, + MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + + ret = ompi_request_wait (&recv_req, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + position += bytes_received; + + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!recvbuf_is_contiguous ) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = bytes_received; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + } /* end for (index=0; index < cycles; index ++) */ + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rexch = MPI_Wtime(); + read_exch += end_rexch - start_rexch; + nentry.time[0] = read_time; + nentry.time[1] = rcomm_time; + nentry.time[2] = read_exch; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; + else + nentry.aggregator = 0; + nentry.nprocs_for_coll = dynamic_gen2_num_io_procs; + if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){ + fh->f_register_print_entry(READ_PRINT_QUEUE, + nentry); + } +#endif + +exit: + if (!recvbuf_is_contiguous) { + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != global_iov_array) { + free (global_iov_array); + global_iov_array = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array=NULL; + } + + if (NULL != displs) { + free (displs); + displs = NULL; + } + if (my_aggregator == fh->f_rank) { + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + if (NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements= NULL; + } + if (NULL != sendtype){ + for (i = 0; i < fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy(&sendtype[i]); + } + } + free(sendtype); + sendtype=NULL; + } + + if (NULL != disp_index){ + free(disp_index); + disp_index = NULL; + } + + if ( NULL != blocklen_per_process){ + for(l=0;lf_procs_per_group;l++){ + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + } + + free(blocklen_per_process); + blocklen_per_process = NULL; + } + + if (NULL != displs_per_process){ + for (l=0; if_procs_per_group; l++){ + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + } + free(displs_per_process); + displs_per_process = NULL; + } + if ( NULL != send_req ) { + free ( send_req ); + send_req = NULL; + } + } + return ret; +} + + +static int read_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > + io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} + + + diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c new file mode 100644 index 00000000000..409fdc4c006 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c @@ -0,0 +1,1695 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + + +#define DEBUG_ON 0 +#define FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG 123 + +/*Used for loading file-offsets per aggregator*/ +typedef struct mca_io_ompio_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_io_ompio_local_io_array; + +typedef struct mca_io_ompio_aggregator_data { + int *disp_index, *sorted, *fview_count, n; + int **blocklen_per_process; + MPI_Aint **displs_per_process, total_bytes, bytes_per_cycle, total_bytes_written; + MPI_Comm comm; + char *buf, *global_buf, *prev_global_buf; + ompi_datatype_t **recvtype, **prev_recvtype; + struct iovec *global_iov_array; + int current_index, current_position; + int bytes_to_write_in_cycle, bytes_remaining, procs_per_group; + int *procs_in_group, iov_index; + bool sendbuf_is_contiguous, prev_sendbuf_is_contiguous; + int bytes_sent, prev_bytes_sent; + struct iovec *decoded_iov; + int bytes_to_write, prev_bytes_to_write; + mca_io_ompio_io_array_t *io_array, *prev_io_array; + int num_io_entries, prev_num_io_entries; + char *send_buf, *prev_send_buf; +} mca_io_ompio_aggregator_data; + + +#define SWAP_REQUESTS(_r1,_r2) { \ + ompi_request_t **_t=_r1; \ + _r1=_r2; \ + _r2=_t;} + +#define SWAP_AGGR_POINTERS(_aggr,_num) { \ + int _i; \ + char *_t; \ + for (_i=0; _i<_num; _i++ ) { \ + _aggr[_i]->prev_io_array=_aggr[_i]->io_array; \ + _aggr[_i]->prev_num_io_entries=_aggr[_i]->num_io_entries; \ + _aggr[_i]->prev_send_buf=_aggr[_i]->send_buf; \ + _aggr[_i]->prev_bytes_sent=_aggr[_i]->bytes_sent; \ + _aggr[_i]->prev_sendbuf_is_contiguous=_aggr[_i]->sendbuf_is_contiguous; \ + _aggr[_i]->prev_bytes_to_write=_aggr[_i]->bytes_to_write; \ + _t=_aggr[_i]->prev_global_buf; \ + _aggr[_i]->prev_global_buf=_aggr[_i]->global_buf; \ + _aggr[_i]->global_buf=_t; \ + _t=(char *)_aggr[_i]->recvtype; \ + _aggr[_i]->recvtype=_aggr[_i]->prev_recvtype; \ + _aggr[_i]->prev_recvtype=(ompi_datatype_t **)_t; } \ +} + + + +static int shuffle_init ( int index, int cycles, int aggregator, int rank, + mca_io_ompio_aggregator_data *data, + ompi_request_t **reqs ); +static int write_init (mca_io_ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize ); + +int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *decoded_iov, int iov_count, + struct iovec *local_iov_array, int local_count, + struct iovec ***broken_decoded_iovs, int **broken_iov_counts, + struct iovec ***broken_iov_arrays, int **broken_counts, + MPI_Aint **broken_total_lengths, + int stripe_count, int stripe_size); + + +int mca_fcoll_dynamic_gen2_get_configuration (mca_io_ompio_file_t *fh, int *dynamic_gen2_num_io_procs, + int **ret_aggregators); + + +static int local_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted); + +int mca_fcoll_dynamic_gen2_split_iov_array ( mca_io_ompio_file_t *fh, mca_io_ompio_io_array_t *work_array, + int num_entries, int *last_array_pos, int *last_pos_in_field, + int chunk_size ); + + +int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int index = 0; + int cycles = 0; + int ret =0, l, i, j, bytes_per_cycle; + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + struct iovec *local_iov_array=NULL; + uint32_t total_fview_count = 0; + int local_count = 0; + ompi_request_t **reqs1=NULL,**reqs2=NULL; + ompi_request_t **curr_reqs=NULL,**prev_reqs=NULL; + mca_io_ompio_aggregator_data **aggr_data=NULL; + + int *displs = NULL; + int dynamic_gen2_num_io_procs; + size_t max_data = 0; + MPI_Aint *total_bytes_per_process = NULL; + + struct iovec **broken_iov_arrays=NULL; + struct iovec **broken_decoded_iovs=NULL; + int *broken_counts=NULL; + int *broken_iov_counts=NULL; + MPI_Aint *broken_total_lengths=NULL; + + int *aggregators=NULL; + int write_chunksize, *result_counts=NULL; + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; + double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0; + double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; + mca_io_ompio_print_entry nentry; +#endif + + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + fh->f_get_bytes_per_agg ( (int *)&bytes_per_cycle ); + /* since we want to overlap 2 iterations, define the bytes_per_cycle to be half of what + the user requested */ + bytes_per_cycle =bytes_per_cycle/2; + + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *) fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + if (OMPI_SUCCESS != ret ){ + goto exit; + } + + if ( MPI_STATUS_IGNORE != status ) { + status->_ucount = max_data; + } + + /* difference to the first generation of this function: + ** dynamic_gen2_num_io_procs should be the number of io_procs per group + ** consequently.Initially, we will have only 1 group. + */ + if ( fh->f_stripe_count > 1 ) { + dynamic_gen2_num_io_procs = fh->f_stripe_count; + } + else { + fh->f_get_num_aggregators ( &dynamic_gen2_num_io_procs ); + } + + + if ( fh->f_stripe_size == 0 ) { + // EDGAR: just a quick heck for testing + fh->f_stripe_size = 65536; + } + if ( -1 == mca_fcoll_dynamic_gen2_write_chunksize ) { + write_chunksize = fh->f_stripe_size; + } + else { + write_chunksize = mca_fcoll_dynamic_gen2_write_chunksize; + } + + + ret = mca_fcoll_dynamic_gen2_get_configuration (fh, &dynamic_gen2_num_io_procs, &aggregators); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + aggr_data = (mca_io_ompio_aggregator_data **) malloc ( dynamic_gen2_num_io_procs * + sizeof(mca_io_ompio_aggregator_data*)); + + for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { + // At this point we know the number of aggregators. If there is a correlation between + // number of aggregators and number of IO nodes, we know how many aggr_data arrays we need + // to allocate. + aggr_data[i] = (mca_io_ompio_aggregator_data *) calloc ( 1, sizeof(mca_io_ompio_aggregator_data)); + aggr_data[i]->procs_per_group = fh->f_procs_per_group; + aggr_data[i]->procs_in_group = fh->f_procs_in_group; + aggr_data[i]->comm = fh->f_comm; + aggr_data[i]->buf = (char *)buf; // should not be used in the new version. + aggr_data[i]->sendbuf_is_contiguous = false; //safe assumption for right now + aggr_data[i]->prev_sendbuf_is_contiguous = false; //safe assumption for right now + } + + /********************************************************************* + *** 2. Generate the local offsets/lengths array corresponding to + *** this write operation + ********************************************************************/ + ret = fh->f_generate_current_file_view( (struct mca_io_ompio_file_t *) fh, + max_data, + &local_iov_array, + &local_count); + if (ret != OMPI_SUCCESS){ + goto exit; + } + + /************************************************************************* + ** 2b. Separate the local_iov_array entries based on the number of aggregators + *************************************************************************/ + // broken_iov_arrays[0] contains broken_counts[0] entries to aggregator 0, + // broken_iov_arrays[1] contains broken_counts[1] entries to aggregator 1, etc. + ret = mca_fcoll_dynamic_gen2_break_file_view ( decoded_iov, iov_count, + local_iov_array, local_count, + &broken_decoded_iovs, &broken_iov_counts, + &broken_iov_arrays, &broken_counts, + &broken_total_lengths, + dynamic_gen2_num_io_procs, fh->f_stripe_size); + + + /************************************************************************** + ** 3. Determine the total amount of data to be written and no. of cycles + **************************************************************************/ + total_bytes_per_process = (MPI_Aint*)malloc + (dynamic_gen2_num_io_procs * fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif + if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) { + ret = fh->f_comm->c_coll.coll_allgather (broken_total_lengths, + dynamic_gen2_num_io_procs, + MPI_LONG, + total_bytes_per_process, + dynamic_gen2_num_io_procs, + MPI_LONG, + fh->f_comm, + fh->f_comm->c_coll.coll_allgather_module); + } + else { + ret = fh->f_allgather_array (broken_total_lengths, + dynamic_gen2_num_io_procs, + MPI_LONG, + total_bytes_per_process, + dynamic_gen2_num_io_procs, + MPI_LONG, + 0, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + + if( OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + + cycles=0; + for ( i=0; if_procs_per_group ; j++) { + broken_total_lengths[i] += total_bytes_per_process[j*dynamic_gen2_num_io_procs + i]; + } +#if DEBUG_ON + printf("%d: Overall broken_total_lengths[%d] = %ld\n", fh->f_rank, i, broken_total_lengths[i]); +#endif + if ( ceil((double)broken_total_lengths[i]/bytes_per_cycle) > cycles ) { + cycles = ceil((double)broken_total_lengths[i]/bytes_per_cycle); + } + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + + result_counts = (int *) malloc ( dynamic_gen2_num_io_procs * fh->f_procs_per_group * sizeof(int) ); + if ( NULL == result_counts ) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif + if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) { + ret = fh->f_comm->c_coll.coll_allgather(broken_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + result_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + fh->f_comm, + fh->f_comm->c_coll.coll_allgather_module); + } + else { + ret = fh->f_allgather_array (broken_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + result_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + 0, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + if( OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + + /************************************************************* + *** 4. Allgather the offset/lengths array from all processes + *************************************************************/ + for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { + aggr_data[i]->total_bytes = broken_total_lengths[i]; + aggr_data[i]->decoded_iov = broken_decoded_iovs[i]; + aggr_data[i]->fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == aggr_data[i]->fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for ( j=0; j f_procs_per_group; j++ ) { + aggr_data[i]->fview_count[j] = result_counts[dynamic_gen2_num_io_procs*j+i]; + } + displs = (int*) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs[0] = 0; + total_fview_count = aggr_data[i]->fview_count[0]; + for (j=1 ; jf_procs_per_group ; j++) { + total_fview_count += aggr_data[i]->fview_count[j]; + displs[j] = displs[j-1] + aggr_data[i]->fview_count[j-1]; + } + +#if DEBUG_ON + printf("total_fview_count : %d\n", total_fview_count); + if (aggregators[i] == fh->f_rank) { + for (j=0 ; jf_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + j, + aggr_data[i]->fview_count[j], + displs[j]); + } + } +#endif + + /* allocate the global iovec */ + if (0 != total_fview_count) { + aggr_data[i]->global_iov_array = (struct iovec*) malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == aggr_data[i]->global_iov_array){ + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif + if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) { + ret = fh->f_comm->c_coll.coll_allgatherv (broken_iov_arrays[i], + broken_counts[i], + fh->f_iov_type, + aggr_data[i]->global_iov_array, + aggr_data[i]->fview_count, + displs, + fh->f_iov_type, + fh->f_comm, + fh->f_comm->c_coll.coll_allgatherv_module ); + } + else { + ret = fh->f_allgatherv_array (broken_iov_arrays[i], + broken_counts[i], + fh->f_iov_type, + aggr_data[i]->global_iov_array, + aggr_data[i]->fview_count, + displs, + fh->f_iov_type, + aggregators[i], + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + + /**************************************************************************************** + *** 5. Sort the global offset/lengths list based on the offsets. + *** The result of the sort operation is the 'sorted', an integer array, + *** which contains the indexes of the global_iov_array based on the offset. + *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset + *** in the file, and that one is followed by global_iov_array[z].offset, than + *** sorted[0] = x, sorted[1]=y and sorted[2]=z; + ******************************************************************************************/ + if (0 != total_fview_count) { + aggr_data[i]->sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == aggr_data[i]->sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + fh->f_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted); + } + + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array = NULL; + } + + if (NULL != displs){ + free(displs); + displs=NULL; + } + + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + uint32_t tv=0; + for (tv=0 ; tvf_rank, + aggr_data[i]->global_iov_array[aggr_data[i]->sorted[tv]].iov_base, + aggr_data[i]->global_iov_array[aggr_data[i]->sorted[tv]].iov_len); + } + } +#endif + /************************************************************* + *** 6. Determine the number of cycles required to execute this + *** operation + *************************************************************/ + + aggr_data[i]->bytes_per_cycle = bytes_per_cycle; + + if (aggregators[i] == fh->f_rank) { + aggr_data[i]->disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == aggr_data[i]->disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + aggr_data[i]->blocklen_per_process = (int **)calloc (fh->f_procs_per_group, sizeof (int*)); + if (NULL == aggr_data[i]->blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + aggr_data[i]->displs_per_process = (MPI_Aint **)calloc (fh->f_procs_per_group, sizeof (MPI_Aint*)); + if (NULL == aggr_data[i]->displs_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + + aggr_data[i]->global_buf = (char *) malloc (bytes_per_cycle); + aggr_data[i]->prev_global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == aggr_data[i]->global_buf || NULL == aggr_data[i]->prev_global_buf){ + opal_output(1, "OUT OF MEMORY"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + sizeof(ompi_datatype_t *)); + aggr_data[i]->prev_recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + sizeof(ompi_datatype_t *)); + if (NULL == aggr_data[i]->recvtype || NULL == aggr_data[i]->prev_recvtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for(l=0;lf_procs_per_group;l++){ + aggr_data[i]->recvtype[l] = MPI_DATATYPE_NULL; + aggr_data[i]->prev_recvtype[l] = MPI_DATATYPE_NULL; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + } + + reqs1 = (ompi_request_t **)malloc ((fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs *sizeof(ompi_request_t *)); + reqs2 = (ompi_request_t **)malloc ((fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs *sizeof(ompi_request_t *)); + if ( NULL == reqs1 || NULL == reqs2 ) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for (l=0,i=0; i < dynamic_gen2_num_io_procs; i++ ) { + for ( j=0; j< (fh->f_procs_per_group+1); j++ ) { + reqs1[l] = MPI_REQUEST_NULL; + reqs2[l] = MPI_REQUEST_NULL; + l++; + } + } + + curr_reqs = reqs1; + prev_reqs = reqs2; + + /* Initialize communication for iteration 0 */ + if ( cycles > 0 ) { + for ( i=0; if_rank, aggr_data[i], + &curr_reqs[i*(fh->f_procs_per_group + 1)] ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + } + } + + + for (index = 1; index < cycles; index++) { + SWAP_REQUESTS(curr_reqs,prev_reqs); + SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); + + /* Initialize communication for iteration i */ + for ( i=0; if_rank, aggr_data[i], + &curr_reqs[i*(fh->f_procs_per_group + 1)] ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + } + + /* Finish communication for iteration i-1 */ + ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, + prev_reqs, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + + /* Write data for iteration i-1 */ + for ( i=0; iprev_sendbuf_is_contiguous && aggr_data[i]->prev_bytes_sent) { + free (aggr_data[i]->prev_send_buf); + } + } + + } /* end for (index = 0; index < cycles; index++) */ + + + /* Finish communication for iteration i = cycles-1 */ + if ( cycles > 0 ) { + SWAP_REQUESTS(curr_reqs,prev_reqs); + SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); + + ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, + prev_reqs, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + /* Write data for iteration i=cycles-1 */ + for ( i=0; iprev_sendbuf_is_contiguous && aggr_data[i]->prev_bytes_sent) { + free (aggr_data[i]->prev_send_buf); + } + } + } + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_exch = MPI_Wtime(); + exch_write += end_exch - start_exch; + nentry.time[0] = write_time; + nentry.time[1] = comm_time; + nentry.time[2] = exch_write; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; + else + nentry.aggregator = 0; + nentry.nprocs_for_coll = dynamic_gen2_num_io_procs; + if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){ + fh->f_register_print_entry(WRITE_PRINT_QUEUE, + nentry); + } +#endif + + +exit : + + if ( NULL != aggr_data ) { + + for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { + if (aggregators[i] == fh->f_rank) { + if (NULL != aggr_data[i]->recvtype){ + for (j =0; j< aggr_data[i]->procs_per_group; j++) { + if ( MPI_DATATYPE_NULL != aggr_data[i]->recvtype[j] ) { + ompi_datatype_destroy(&aggr_data[i]->recvtype[j]); + } + if ( MPI_DATATYPE_NULL != aggr_data[i]->prev_recvtype[j] ) { + ompi_datatype_destroy(&aggr_data[i]->prev_recvtype[j]); + } + + } + free(aggr_data[i]->recvtype); + free(aggr_data[i]->prev_recvtype); + } + + free (aggr_data[i]->disp_index); + free (aggr_data[i]->global_buf); + free (aggr_data[i]->prev_global_buf); + for(l=0;lprocs_per_group;l++){ + free (aggr_data[i]->blocklen_per_process[l]); + free (aggr_data[i]->displs_per_process[l]); + } + + free (aggr_data[i]->blocklen_per_process); + free (aggr_data[i]->displs_per_process); + } + free (aggr_data[i]->sorted); + free (aggr_data[i]->global_iov_array); + free (aggr_data[i]->fview_count); + free (aggr_data[i]->decoded_iov); + + free (aggr_data[i]); + } + free (aggr_data); + } + free(displs); + free(decoded_iov); + free(broken_counts); + free(broken_total_lengths); + free(broken_iov_counts); + free(broken_decoded_iovs); // decoded_iov arrays[i] were freed as aggr_data[i]->decoded_iov; + if ( NULL != broken_iov_arrays ) { + for (i=0; if_procs_in_group); + fh->f_procs_in_group=NULL; + fh->f_procs_per_group=0; + free(reqs1); + free(reqs2); + free(result_counts); + + + return OMPI_SUCCESS; +} + + +static int write_init (mca_io_ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize ) +{ + int ret=OMPI_SUCCESS; + int last_array_pos=0; + int last_pos=0; + + + if ( aggregator == fh->f_rank && aggr_data->prev_num_io_entries) { + while ( aggr_data->prev_bytes_to_write > 0 ) { + aggr_data->prev_bytes_to_write -= mca_fcoll_dynamic_gen2_split_iov_array (fh, aggr_data->prev_io_array, + aggr_data->prev_num_io_entries, + &last_array_pos, &last_pos, + write_chunksize ); +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_write_time = MPI_Wtime(); +#endif + if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { + free ( aggr_data->prev_io_array); + opal_output (1, "dynamic_gen2_write_all: fbtl_pwritev failed\n"); + ret = OMPI_ERROR; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_write_time = MPI_Wtime(); + write_time += end_write_time - start_write_time; +#endif + } + free ( fh->f_io_array ); + free ( aggr_data->prev_io_array); + } + +exit: + + fh->f_io_array=NULL; + fh->f_num_of_io_entries=0; + + return ret; +} + +static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_io_ompio_aggregator_data *data, + ompi_request_t **reqs ) +{ + int bytes_sent = 0; + int blocks=0, temp_pindex; + int i, j, l, ret; + int entries_per_aggregator=0; + mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; + int *sorted_file_offsets=NULL; + int temp_index=0; + MPI_Aint *memory_displacements=NULL; + int *temp_disp_index=NULL; + MPI_Aint global_count = 0; + + data->num_io_entries = 0; + data->bytes_sent = 0; + data->io_array=NULL; + data->send_buf=NULL; + /********************************************************************** + *** 7a. Getting ready for next cycle: initializing and freeing buffers + **********************************************************************/ + if (aggregator == rank) { + + if (NULL != data->recvtype){ + for (i =0; i< data->procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != data->recvtype[i] ) { + ompi_datatype_destroy(&data->recvtype[i]); + data->recvtype[i] = MPI_DATATYPE_NULL; + } + } + } + + for(l=0;lprocs_per_group;l++){ + data->disp_index[l] = 1; + + free(data->blocklen_per_process[l]); + free(data->displs_per_process[l]); + + data->blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + data->displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == data->displs_per_process[l] || NULL == data->blocklen_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + } /* (aggregator == rank */ + + /************************************************************************** + *** 7b. Determine the number of bytes to be actually written in this cycle + **************************************************************************/ + if (cycles-1 == index) { + data->bytes_to_write_in_cycle = data->total_bytes - data->bytes_per_cycle*index; + } + else { + data->bytes_to_write_in_cycle = data->bytes_per_cycle; + } + data->bytes_to_write = data->bytes_to_write_in_cycle; +#if DEBUG_ON + if (aggregator == rank) { + printf ("****%d: CYCLE %d Bytes %lld**********\n", + rank, + index, + data->bytes_to_write_in_cycle); + } +#endif + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + +#if DEBUG_ON + printf("bytes_to_write_in_cycle: %ld, cycle : %d\n", data->bytes_to_write_in_cycle, + index); +#endif + + /***************************************************************** + *** 7c. Calculate how much data will be contributed in this cycle + *** by each process + *****************************************************************/ + + /* The blocklen and displs calculation only done at aggregators!*/ + while (data->bytes_to_write_in_cycle) { + + /* This next block identifies which process is the holder + ** of the sorted[current_index] element; + */ + blocks = data->fview_count[0]; + for (j=0 ; jprocs_per_group ; j++) { + if (data->sorted[data->current_index] < blocks) { + data->n = j; + break; + } + else { + blocks += data->fview_count[j+1]; + } + } + + if (data->bytes_remaining) { + /* Finish up a partially used buffer from the previous cycle */ + + if (data->bytes_remaining <= data->bytes_to_write_in_cycle) { + /* The data fits completely into the block */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base + + (data->global_iov_array[data->sorted[data->current_index]].iov_len + - data->bytes_remaining); + + /* In this cases the length is consumed so allocating for + next displacement and blocklength*/ + data->blocklen_per_process[data->n] = (int *) realloc + ((void *)data->blocklen_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(int)); + data->displs_per_process[data->n] = (MPI_Aint *) realloc + ((void *)data->displs_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(MPI_Aint)); + data->blocklen_per_process[data->n][data->disp_index[data->n]] = 0; + data->displs_per_process[data->n][data->disp_index[data->n]] = 0; + data->disp_index[data->n] += 1; + } + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->bytes_remaining; + } + data->current_index ++; + data->bytes_to_write_in_cycle -= data->bytes_remaining; + data->bytes_remaining = 0; +// continue; + } + else { + /* the remaining data from the previous cycle is larger than the + data->bytes_to_write_in_cycle, so we have to segment again */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base + + (data->global_iov_array[data->sorted[data->current_index]].iov_len + - data->bytes_remaining); + } + + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->bytes_to_write_in_cycle; + } + data->bytes_remaining -= data->bytes_to_write_in_cycle; + data->bytes_to_write_in_cycle = 0; + break; + } + } + else { + /* No partially used entry available, have to start a new one */ + if (data->bytes_to_write_in_cycle < + (MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) { + /* This entry has more data than we can sendin one cycle */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base ; + } + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->bytes_to_write_in_cycle; + + } + data->bytes_remaining = data->global_iov_array[data->sorted[data->current_index]].iov_len - + data->bytes_to_write_in_cycle; + data->bytes_to_write_in_cycle = 0; + break; + } + else { + /* Next data entry is less than data->bytes_to_write_in_cycle */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = + data->global_iov_array[data->sorted[data->current_index]].iov_len; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (OPAL_PTRDIFF_TYPE) + data->global_iov_array[data->sorted[data->current_index]].iov_base; + + /*realloc for next blocklength + and assign this displacement and check for next displs as + the total length of this entry has been consumed!*/ + data->blocklen_per_process[data->n] = + (int *) realloc ((void *)data->blocklen_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(int)); + data->displs_per_process[data->n] = (MPI_Aint *)realloc + ((void *)data->displs_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(MPI_Aint)); + data->blocklen_per_process[data->n][data->disp_index[data->n]] = 0; + data->displs_per_process[data->n][data->disp_index[data->n]] = 0; + data->disp_index[data->n] += 1; + } + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->global_iov_array[data->sorted[data->current_index]].iov_len; + } + data->bytes_to_write_in_cycle -= + data->global_iov_array[data->sorted[data->current_index]].iov_len; + data->current_index ++; +// continue; + } + } + } + + + /************************************************************************* + *** 7d. Calculate the displacement on where to put the data and allocate + *** the recieve buffer (global_buf) + *************************************************************************/ + if (aggregator == rank) { + entries_per_aggregator=0; + for (i=0;iprocs_per_group; i++){ + for (j=0;jdisp_index[i];j++){ + if (data->blocklen_per_process[i][j] > 0) + entries_per_aggregator++ ; + } + } + +#if DEBUG_ON + printf("%d: cycle: %d, bytes_sent: %d\n ",rank,index, + bytes_sent); + printf("%d : Entries per aggregator : %d\n",rank,entries_per_aggregator); +#endif + + if (entries_per_aggregator > 0){ + file_offsets_for_agg = (mca_io_ompio_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + /*Moving file offsets to an IO array!*/ + temp_index = 0; + + for (i=0;iprocs_per_group; i++){ + for(j=0;jdisp_index[i];j++){ + if (data->blocklen_per_process[i][j] > 0){ + file_offsets_for_agg[temp_index].length = + data->blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + data->displs_per_process[i][j]; + temp_index++; + +#if DEBUG_ON + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + data->procs_in_group[i],j, + data->blocklen_per_process[i][j],j, + data->displs_per_process[i][j], + rank); +#endif + } + } + } + + /* Sort the displacements for each aggregator*/ + local_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + + /*create contiguous memory displacements + based on blocklens on the same displs array + and map it to this aggregator's actual + file-displacements (this is in the io-array created above)*/ + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; iprocs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + /*Now update the displacements array with memory offsets*/ + global_count = 0; + for (i=0;idispls_per_process[temp_pindex][temp_disp_index[temp_pindex]] = + memory_displacements[sorted_file_offsets[i]]; + if (temp_disp_index[temp_pindex] < data->disp_index[temp_pindex]) + temp_disp_index[temp_pindex] += 1; + else{ + printf("temp_disp_index[%d]: %d is greater than disp_index[%d]: %d\n", + temp_pindex, temp_disp_index[temp_pindex], + temp_pindex, data->disp_index[temp_pindex]); + } + global_count += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + + if (NULL != temp_disp_index){ + free(temp_disp_index); + temp_disp_index = NULL; + } + +#if DEBUG_ON + + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + for (i=0;iprocs_per_group; i++){ + for(j=0;jdisp_index[i];j++){ + if (data->blocklen_per_process[i][j] > 0){ + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + data->procs_in_group[i],j, + data->blocklen_per_process[i][j],j, + data->displs_per_process[i][j], + rank); + + } + } + } + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + for (i=0; iprocs_per_group; i++) { + size_t datatype_size; + reqs[i] = MPI_REQUEST_NULL; + if ( 0 < data->disp_index[i] ) { + ompi_datatype_create_hindexed(data->disp_index[i], + data->blocklen_per_process[i], + data->displs_per_process[i], + MPI_BYTE, + &data->recvtype[i]); + ompi_datatype_commit(&data->recvtype[i]); + opal_datatype_type_size(&data->recvtype[i]->super, &datatype_size); + + if (datatype_size){ + ret = MCA_PML_CALL(irecv(data->global_buf, + 1, + data->recvtype[i], + data->procs_in_group[i], + FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG+index, + data->comm, + &reqs[i])); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + } + } + } /* end if (entries_per_aggr > 0 ) */ + }/* end if (aggregator == rank ) */ + + if ( data->sendbuf_is_contiguous ) { + data->send_buf = &((char*)data->buf)[data->total_bytes_written]; + } + else if (bytes_sent) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + data->send_buf = malloc (bytes_sent); + if (NULL == data->send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + remaining = bytes_sent; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (data->decoded_iov[data->iov_index].iov_base) + data->current_position; + + if (remaining >= + (data->decoded_iov[data->iov_index].iov_len - data->current_position)) { + memcpy (data->send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + data->decoded_iov[data->iov_index].iov_len - data->current_position); + remaining = remaining - + (data->decoded_iov[data->iov_index].iov_len - data->current_position); + temp_position = temp_position + + (data->decoded_iov[data->iov_index].iov_len - data->current_position); + data->iov_index = data->iov_index + 1; + data->current_position = 0; + } + else { + memcpy (data->send_buf+temp_position, + (IOVBASE_TYPE *) mem_address, + remaining); + data->current_position += remaining; + remaining = 0; + } + } + } + data->total_bytes_written += bytes_sent; + data->bytes_sent = bytes_sent; + /* Gather the sendbuf from each process in appropritate locations in + aggregators*/ + + if (bytes_sent){ + ret = MCA_PML_CALL(isend(data->send_buf, + bytes_sent, + MPI_BYTE, + aggregator, + FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG+index, + MCA_PML_BASE_SEND_STANDARD, + data->comm, + &reqs[data->procs_per_group])); + + + if ( OMPI_SUCCESS != ret ){ + goto exit; + } + + } + +#if DEBUG_ON + if (aggregator == rank){ + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + for (i=0 ; iglobal_buf)[i]); + } +#endif + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + /********************************************************** + *** 7f. Create the io array, and pass it to fbtl + *********************************************************/ + + if (aggregator == rank && entries_per_aggregator>0) { + + + data->io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == data->io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + data->num_io_entries = 0; + /*First entry for every aggregator*/ + data->io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + data->io_array[0].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + data->io_array[0].memory_address = + data->global_buf+memory_displacements[sorted_file_offsets[0]]; + data->num_io_entries++; + + for (i=1;iio_array[data->num_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else { + data->io_array[data->num_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + data->io_array[data->num_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + data->io_array[data->num_io_entries].memory_address = + data->global_buf+memory_displacements[sorted_file_offsets[i]]; + data->num_io_entries++; + } + + } + +#if DEBUG_ON + printf("*************************** %d\n", num_of_io_entries); + for (i=0 ; i= rest ) { + blocklen = rest; + temp_offset = offset+rest; + temp_len = len - rest; + } + else { + blocklen = len; + temp_offset = 0; + temp_len = 0; + } + + broken_file_iovs[owner][broken_file_counts[owner]].iov_base = (void *)offset; + broken_file_iovs[owner][broken_file_counts[owner]].iov_len = blocklen; +#if DEBUG_ON + printf("%d: owner=%d b_file_iovs[%d].base=%ld .len=%d \n", rank, owner, + broken_file_counts[owner], + broken_file_iovs[owner][broken_file_counts[owner]].iov_base, + broken_file_iovs[owner][broken_file_counts[owner]].iov_len ); +#endif + do { + if ( memlen >= blocklen ) { + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_base = (void *) memoffset; + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_len = blocklen; + memoffset += blocklen; + memlen -= blocklen; + blocklen = 0; + + if ( 0 == memlen ) { + j++; + if ( j < mem_count ) { + memoffset = (off_t) mem_iov[j].iov_base; + memlen = mem_iov[j].iov_len; + } + else + break; + } + } + else { + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_base = (void *) memoffset; + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_len = memlen; + blocklen -= memlen; + + j++; + if ( j < mem_count ) { + memoffset = (off_t) mem_iov[j].iov_base; + memlen = mem_iov[j].iov_len; + } + else + break; + } +#if DEBUG_ON + printf("%d: owner=%d b_mem_iovs[%d].base=%ld .len=%d\n", rank, owner, + broken_mem_counts[owner], + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_base, + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_len); +#endif + + broken_mem_counts[owner]++; + if ( broken_mem_counts[owner] >= max_lengths[owner][0] ) { + broken_mem_iovs[owner] = (struct iovec*) realloc ( broken_mem_iovs[owner], + mem_count * block[owner][0] * + sizeof(struct iovec )); + max_lengths[owner][0] = mem_count * block[owner][0]; + block[owner][0]++; + } + + } while ( blocklen > 0 ); + + broken_file_counts[owner]++; + if ( broken_file_counts[owner] >= max_lengths[owner][1] ) { + broken_file_iovs[owner] = (struct iovec*) realloc ( broken_file_iovs[owner], + file_count * block[owner][1] * + sizeof(struct iovec )); + max_lengths[owner][1] = file_count * block[owner][1]; + block[owner][1]++; + } + + offset = temp_offset; + len = temp_len; + } while( temp_len > 0 ); + + i++; + } + + + /* Step 2: recalculating the total lengths per aggregator */ + for ( i=0; i< stripe_count; i++ ) { + for ( j=0; jf_stripe_count; + if ( num_io_procs < 1 ) { + num_io_procs = 1; + } + } + if ( num_io_procs > fh->f_size ) { + num_io_procs = fh->f_size; + } + + fh->f_procs_per_group = fh->f_size; + fh->f_procs_in_group = (int *) malloc ( sizeof(int) * fh->f_size ); + if ( NULL == fh->f_procs_in_group) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size; i++ ) { + fh->f_procs_in_group[i]=i; + } + + + aggregators = (int *) malloc ( num_io_procs * sizeof(int)); + if ( NULL == aggregators ) { + // fh->procs_in_group will be freed with the fh structure. No need to do it here. + return OMPI_ERR_OUT_OF_RESOURCE; + } + for ( i=0; if_size / num_io_procs; + } + + *dynamic_gen2_num_io_procs = num_io_procs; + *ret_aggregators = aggregators; + + return OMPI_SUCCESS; +} + + +int mca_fcoll_dynamic_gen2_split_iov_array ( mca_io_ompio_file_t *fh, mca_io_ompio_io_array_t *io_array, int num_entries, + int *ret_array_pos, int *ret_pos, int chunk_size ) +{ + + int array_pos = *ret_array_pos; + int pos = *ret_pos; + size_t bytes_written = 0; + size_t bytes_to_write = chunk_size; + + if ( 0 == array_pos && 0 == pos ) { + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc ( num_entries * sizeof(mca_io_ompio_io_array_t)); + if ( NULL == fh->f_io_array ){ + opal_output (1,"Could not allocate memory\n"); + return -1; + } + } + + int i=0; + while (bytes_to_write > 0 ) { + fh->f_io_array[i].memory_address = &(((char *)io_array[array_pos].memory_address)[pos]); + fh->f_io_array[i].offset = &(((char *)io_array[array_pos].offset)[pos]); + + if ( (io_array[array_pos].length - pos ) >= bytes_to_write ) { + fh->f_io_array[i].length = bytes_to_write; + } + else { + fh->f_io_array[i].length = io_array[array_pos].length - pos; + } + + pos += fh->f_io_array[i].length; + bytes_written += fh->f_io_array[i].length; + bytes_to_write-= fh->f_io_array[i].length; + i++; + + if ( pos == (int)io_array[array_pos].length ) { + pos = 0; + if ((array_pos + 1) < num_entries) { + array_pos++; + } + else { + break; + } + } + } + + fh->f_num_of_io_entries = i; + *ret_array_pos = array_pos; + *ret_pos = pos; + return bytes_written; +} + + +static int local_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > + io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c new file mode 100644 index 00000000000..16070a9cbf8 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" + +#include + +#include "mpi.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t dynamic_gen2 = { + mca_fcoll_dynamic_gen2_module_init, + mca_fcoll_dynamic_gen2_module_finalize, + mca_fcoll_dynamic_gen2_file_read_all, + NULL, /* iread_all */ + mca_fcoll_dynamic_gen2_file_write_all, + NULL, /*iwrite_all */ + NULL, /* progress */ + NULL /* request_free */ +}; + +int +mca_fcoll_dynamic_gen2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_dynamic_gen2_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_dynamic_gen2_priority; + if (0 >= mca_fcoll_dynamic_gen2_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "dynamic_gen2")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &dynamic_gen2; +} + +int mca_fcoll_dynamic_gen2_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_dynamic_gen2_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_dynamic_gen2_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic_gen2/owner.txt b/ompi/mca/fcoll/dynamic_gen2/owner.txt new file mode 100644 index 00000000000..2e9726c28a4 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: UH +status: active diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c index 0c29c98de75..0abf6963e0b 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c @@ -677,8 +677,8 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, } if (req_off < real_off + real_size) { count[i]++; - MPI_Address(read_buf+req_off-real_off, - &(others_req[i].mem_ptrs[j])); + PMPI_Address(read_buf+req_off-real_off, + &(others_req[i].mem_ptrs[j])); send_size[i] += (int)(OMPIO_MIN(real_off + real_size - req_off, (OMPI_MPI_OFFSET_TYPE)req_len)); diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c index ac42ec1ecf5..f1cd3089efd 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c @@ -766,8 +766,8 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, size,i, count[i]); #endif - MPI_Address(write_buf+req_off-off, - &(others_req[i].mem_ptrs[j])); + PMPI_Address(write_buf+req_off-off, + &(others_req[i].mem_ptrs[j])); #if DEBUG_ON printf("%d : mem_ptrs : %ld\n", fh->f_rank, others_req[i].mem_ptrs[j]); diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_support_fns.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_support_fns.c index f83b11fc2c5..b6b658ed3f9 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_support_fns.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_support_fns.c @@ -173,7 +173,7 @@ int mca_fcoll_two_phase_calc_aggregator(mca_io_ompio_file_t *fh, fprintf(stderr, "rank_index(%d) >= num_aggregators(%d)fd_size=%lld off=%lld\n", rank_index,num_aggregators,fd_size,off); - MPI_Abort(MPI_COMM_WORLD, 1); + ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); } diff --git a/ompi/mca/fs/base/base.h b/ompi/mca/fs/base/base.h index 21f2b75d423..b83c93890a0 100644 --- a/ompi/mca/fs/base/base.h +++ b/ompi/mca/fs/base/base.h @@ -48,6 +48,7 @@ OMPI_DECLSPEC int mca_fs_base_init_file (struct mca_io_ompio_file_t *file); OMPI_DECLSPEC int mca_fs_base_get_param (struct mca_io_ompio_file_t *file, int keyval); OMPI_DECLSPEC void mca_fs_base_get_parent_dir (char *filename, char **dirnamep); +OMPI_DECLSPEC int mca_fs_base_get_fstype(char *fname); /* * Globals */ diff --git a/ompi/mca/fs/base/fs_base_get_parent_dir.c b/ompi/mca/fs/base/fs_base_get_parent_dir.c index fcfa7d3e0a7..66b3d4636a1 100644 --- a/ompi/mca/fs/base/fs_base_get_parent_dir.c +++ b/ompi/mca/fs/base/fs_base_get_parent_dir.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -25,9 +25,11 @@ #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" +#include "opal/util/path.h" #include "ompi/mca/fs/fs.h" #include "ompi/mca/fs/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" #ifdef HAVE_SYS_STATFS_H #include /* or */ @@ -93,3 +95,29 @@ void mca_fs_base_get_parent_dir ( char *filename, char **dirnamep) *dirnamep = dir; return; } + +int mca_fs_base_get_fstype(char *fname ) +{ + int ompio_type = UFS; + char *fstype=NULL; + bool ret = opal_path_nfs ( fname, &fstype ); + + if ( false == ret ) { + char *dir; + mca_fs_base_get_parent_dir (fname, &dir ); + ret = opal_path_nfs (dir, &fstype); + if ( false == ret ) { + return ompio_type; + } + } + if ( 0 == strncasecmp(fstype, "lustre", sizeof("lustre")) ) { + ompio_type = LUSTRE; + } + else if ( 0 == strncasecmp(fstype, "pvfs2", sizeof("pvfs2"))) { + ompio_type = PVFS2; + } + + free (fstype); + return ompio_type; +} + diff --git a/ompi/mca/fs/lustre/fs_lustre.c b/ompi/mca/fs/lustre/fs_lustre.c index 6bd661b9c74..c41e349cd0a 100644 --- a/ompi/mca/fs/lustre/fs_lustre.c +++ b/ompi/mca/fs/lustre/fs_lustre.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -93,22 +93,8 @@ mca_fs_lustre_component_file_query (mca_io_ompio_file_t *fh, int *priority) tmp = strchr (fh->f_filename, ':'); if (!tmp) { if (OMPIO_ROOT == fh->f_rank) { - do { - err = statfs (fh->f_filename, &fsbuf); - } while (err && (errno == ESTALE)); - - if (err && (errno == ENOENT)) { - mca_fs_base_get_parent_dir (fh->f_filename, &dir); - err = statfs (dir, &fsbuf); - free (dir); - } -#ifndef LL_SUPER_MAGIC -#define LL_SUPER_MAGIC 0x0BD00BD0 -#endif - if (fsbuf.f_type == LL_SUPER_MAGIC) { - fh->f_fstype = LUSTRE; - } - } + fh->f_fstype = mca_fs_base_get_fstype ( fh->f_filename ); + } fh->f_comm->c_coll.coll_bcast (&(fh->f_fstype), 1, MPI_INT, diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c index b873564bc86..34430d389d2 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_open.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -139,25 +139,21 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, return OMPI_ERROR; } - if (mca_fs_lustre_stripe_size > 0) { - fh->f_stripe_size = mca_fs_lustre_stripe_size; - } - else { - lump = alloc_lum(); - if (NULL == lump ){ + lump = alloc_lum(); + if (NULL == lump ){ fprintf(stderr,"Cannot allocate memory for extracting stripe size\n"); return OMPI_ERROR; - } - rc = llapi_file_get_stripe(filename, lump); - if (rc != 0) { - opal_output(1, "get_stripe failed: %d (%s)\n", errno, strerror(errno)); - return OMPI_ERROR; - } - fh->f_stripe_size = lump->lmm_stripe_size; - + } + rc = llapi_file_get_stripe(filename, lump); + if (rc != 0) { + opal_output(1, "get_stripe failed: %d (%s)\n", errno, strerror(errno)); + return OMPI_ERROR; + } + fh->f_stripe_size = lump->lmm_stripe_size; + fh->f_stripe_count = lump->lmm_stripe_count; + // if ( NULL != lump ) { // free ( lump ); // } - } return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/plfs/fs_plfs.h b/ompi/mca/fs/plfs/fs_plfs.h index 82b96981932..69221d4c9b8 100644 --- a/ompi/mca/fs/plfs/fs_plfs.h +++ b/ompi/mca/fs/plfs/fs_plfs.h @@ -30,7 +30,6 @@ #include extern int mca_fs_plfs_priority; -extern int mca_fs_plfs_num_hostdir; BEGIN_C_DECLS diff --git a/ompi/mca/fs/plfs/fs_plfs_component.c b/ompi/mca/fs/plfs/fs_plfs_component.c index c3ac966c80b..6df5f7db22b 100644 --- a/ompi/mca/fs/plfs/fs_plfs_component.c +++ b/ompi/mca/fs/plfs/fs_plfs_component.c @@ -39,7 +39,6 @@ const char *mca_fs_plfs_component_version_string = static int plfs_register(void); int mca_fs_plfs_priority = 20; -int mca_fs_plfs_num_hostdir = -1; /* * Instantiate the public struct with all of our public information @@ -77,12 +76,6 @@ plfs_register(void) MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_plfs_priority); - mca_fs_plfs_num_hostdir = -1; - (void) mca_base_component_var_register(&mca_fs_plfs_component.fsm_version, - "num_hostdir", "number of host directories of a file over plfs", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_plfs_num_hostdir); return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/plfs/fs_plfs_file_close.c b/ompi/mca/fs/plfs/fs_plfs_file_close.c index 9a14b3752c9..03b92af91af 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_close.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_close.c @@ -49,7 +49,9 @@ mca_fs_plfs_file_close (mca_io_ompio_file_t *fh) getcwd( wpath, sizeof(wpath) ); sprintf( wpath,"%s/%s",wpath,fh->f_filename ); - if(-1 == access(fh->f_filename, F_OK)) { + plfs_ret = plfs_access(wpath, F_OK); + if ( PLFS_SUCCESS != plfs_ret ) { + opal_output(0, "fs_plfs_file_close: Error in plfs_access:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; // file doesn't exist } @@ -66,7 +68,14 @@ mca_fs_plfs_file_close (mca_io_ompio_file_t *fh) return OMPI_ERROR; } - plfs_ret = plfs_close(fh->f_fs_ptr, 0, 0, amode ,NULL, &flags); + plfs_ret = plfs_sync(fh->f_fs_ptr); + if (PLFS_SUCCESS != plfs_ret) { + opal_output(0, "fs_plfs_file_close: Error in plfs_sync:\n%s\n", strplfserr(plfs_ret)); + return OMPI_ERROR; + } + + + plfs_ret = plfs_close(fh->f_fs_ptr, fh->f_rank, 0, amode ,NULL, &flags); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fs_plfs_file_close: Error in plfs_close:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; diff --git a/ompi/mca/fs/plfs/fs_plfs_file_open.c b/ompi/mca/fs/plfs/fs_plfs_file_open.c index 351f24c0ec5..c8dd294820a 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_open.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_open.c @@ -53,8 +53,6 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, plfs_error_t plfs_ret; Plfs_fd *pfd = NULL; char wpath[1024]; - size_t len = sizeof(int); - char key[] = "num_hostdirs"; rank = ompi_comm_rank ( comm ); @@ -89,7 +87,7 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, if (access_mode & MPI_MODE_CREATE) amode = amode | O_CREAT; - plfs_ret = plfs_open( &pfd, wpath, amode, 0, perm, NULL ); + plfs_ret = plfs_open( &pfd, wpath, amode, fh->f_rank, perm, NULL ); fh->f_fs_ptr = pfd; } @@ -99,7 +97,7 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, } if (0 != rank) { - plfs_ret = plfs_open( &pfd, wpath, amode, 0, perm, NULL ); + plfs_ret = plfs_open( &pfd, wpath, amode, fh->f_rank, perm, NULL ); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fs_plfs_file_open: Error in plfs_open:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; @@ -109,12 +107,5 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, } } - if (mca_fs_plfs_num_hostdir > 0) { - plfs_ret = plfs_setxattr( pfd, &mca_fs_plfs_num_hostdir, key, len ); - if (PLFS_SUCCESS != plfs_ret) { - opal_output(0, "fs_plfs_file_open: Error in plfs_setxattr:\n%s\n", strplfserr(plfs_ret)); - return OMPI_ERROR; - } - } return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.c b/ompi/mca/fs/pvfs2/fs_pvfs2.c index c28ce4fae3a..7f8abb9ec56 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -96,18 +96,7 @@ mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) tmp = strchr (fh->f_filename, ':'); if (!tmp) { if (OMPIO_ROOT == fh->f_rank) { - do { - err = statfs (fh->f_filename, &fsbuf); - } while (err && (errno == ESTALE)); - - if (err && (errno == ENOENT)) { - mca_fs_base_get_parent_dir (fh->f_filename, &dir); - err = statfs (dir, &fsbuf); - free (dir); - } - if (fsbuf.f_type == PVFS2_SUPER_MAGIC) { - fh->f_fstype = PVFS2; - } + fh->f_fstype = mca_fs_base_get_fstype ( fh->f_filename ); } fh->f_comm->c_coll.coll_bcast (&(fh->f_fstype), 1, diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_component.c b/ompi/mca/fs/pvfs2/fs_pvfs2_component.c index 319ca605df8..ef9bf933cc2 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_component.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_component.c @@ -13,6 +13,8 @@ * Copyright (c) 2008-2013 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +70,7 @@ mca_fs_base_component_2_0_0_t mca_fs_pvfs2_component = { .fsm_init_query = mca_fs_pvfs2_component_init_query, /* get thread level */ .fsm_file_query = mca_fs_pvfs2_component_file_query, /* get priority and actions */ .fsm_file_unquery = mca_fs_pvfs2_component_file_unquery, /* undo what was done by previous function */ -};. +}; static int pvfs2_register(void) diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c index f95e7f8aa94..23d6b928762 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c @@ -180,6 +180,7 @@ mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, if (fs_pvfs2_stripe_size > 0 && fs_pvfs2_stripe_width > 0) { fh->f_stripe_size = fs_pvfs2_stripe_size; + fh->f_stripe_count = fs_pvfs2_stripe_width; } return OMPI_SUCCESS; diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c index 1f9fe5c1234..28ab8fab8ff 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_open.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -91,5 +91,8 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm, } } + fh->f_stripe_size=0; + fh->f_stripe_count=1; + return OMPI_SUCCESS; } diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index cb5a50a5c17..7d93402606a 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -1030,13 +1030,29 @@ int ompi_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, fh->f_flags |= OMPIO_AGGREGATOR_IS_SET; if (-1 == num_aggregators) { - mca_io_ompio_create_groups(fh,bytes_per_proc); + if ( SIMPLE == mca_io_ompio_grouping_option || + NO_REFINEMENT == mca_io_ompio_grouping_option ) { + fh->f_aggregator_index = 0; + fh->f_final_num_aggrs = fh->f_init_num_aggrs; + fh->f_procs_per_group = fh->f_init_procs_per_group; + + fh->f_procs_in_group = (int*)malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == fh->f_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (j=0 ; jf_procs_per_group ; j++) { + fh->f_procs_in_group[j] = fh->f_init_procs_in_group[j]; + } + } + else { + mca_io_ompio_create_groups(fh,bytes_per_proc); + } return OMPI_SUCCESS; } //Forced number of aggregators - else - { /* calculate the offset at which each group of processes will start */ procs_per_group = ceil ((float)fh->f_size/num_aggregators); @@ -1062,7 +1078,6 @@ int ompi_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, fh->f_final_num_aggrs = num_aggregators; return OMPI_SUCCESS; - } } @@ -1987,6 +2002,7 @@ int ompi_io_ompio_empty_print_queue(int queue_type){ ret = ompi_io_ompio_set_print_queue(&q, queue_type); assert (ret != OMPI_ERROR); + (void)ret; // silence compiler warning if (q->count == 0) return 1; else @@ -2003,6 +2019,7 @@ int ompi_io_ompio_full_print_queue(int queue_type){ ret = ompi_io_ompio_set_print_queue(&q, queue_type); assert ( ret != OMPI_ERROR); + (void)ret; // silence compiler warning if (q->count < QUEUESIZE) return 0; else diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 703ed0bbc85..f7ef9d64321 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -101,15 +101,22 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define OMPIO_MERGE 1 #define OMPIO_SPLIT 2 #define OMPIO_RETAIN 3 + #define DATA_VOLUME 1 #define UNIFORM_DISTRIBUTION 2 -#define OMPIO_UNIFORM_DIST_THRESHOLD 0.5 #define CONTIGUITY 3 -#define OMPIO_CONTG_THRESHOLD 1048576 #define OPTIMIZE_GROUPING 4 -#define OMPIO_PROCS_PER_GROUP_TAG 0 -#define OMPIO_PROCS_IN_GROUP_TAG 1 -#define OMPIO_MERGE_THRESHOLD 0.5 +#define SIMPLE 5 +#define NO_REFINEMENT 6 + + +#define OMPIO_UNIFORM_DIST_THRESHOLD 0.5 +#define OMPIO_CONTG_THRESHOLD 1048576 +#define OMPIO_CONTG_FACTOR 8 +#define OMPIO_DEFAULT_STRIPE_SIZE 1048576 +#define OMPIO_PROCS_PER_GROUP_TAG 0 +#define OMPIO_PROCS_IN_GROUP_TAG 1 +#define OMPIO_MERGE_THRESHOLD 0.5 /*---------------------------*/ @@ -299,6 +306,7 @@ struct mca_io_ompio_file_t { void *f_fs_ptr; int f_atomicity; size_t f_stripe_size; + int f_stripe_count; size_t f_cc_size; int f_bytes_per_agg; enum ompio_fs_type f_fstype; @@ -543,6 +551,9 @@ int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh); int mca_io_ompio_fview_based_grouping(mca_io_ompio_file_t *fh, int *num_groups, contg *contg_groups); +int mca_io_ompio_simple_grouping(mca_io_ompio_file_t *fh, + int *num_groups, + contg *contg_groups); int mca_io_ompio_finalize_initial_grouping(mca_io_ompio_file_t *fh, int num_groups, diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 4c2c0868211..af8918985ca 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -38,7 +38,7 @@ int mca_io_ompio_record_offset_info = 0; int mca_io_ompio_coll_timing_info = 0; int mca_io_ompio_sharedfp_lazy_open = 1; -int mca_io_ompio_grouping_option=0; +int mca_io_ompio_grouping_option=5; /* * Private functions @@ -202,10 +202,13 @@ static int register_component(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_sharedfp_lazy_open); - mca_io_ompio_grouping_option = 0; + mca_io_ompio_grouping_option = 5; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "grouping_option", - "Option for grouping of processes in the aggregator selection", + "Option for grouping of processes in the aggregator selection " + "1: Data volume based grouping 2: maximizing group size uniformity 3: maximimze " + "data contiguity 4: hybrid optimization 5: simple (default) " + "6: skip refinement step", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 0f5d75a31c0..91cbf949bfa 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -105,6 +105,8 @@ int mca_io_ompio_set_view_internal(mca_io_ompio_file_t *fh, fh->f_disp = disp; fh->f_offset = disp; fh->f_total_bytes = 0; + fh->f_index_in_file_view=0; + fh->f_position_in_file_view=0; ompi_io_ompio_decode_datatype (fh, newfiletype, @@ -148,19 +150,29 @@ int mca_io_ompio_set_view_internal(mca_io_ompio_file_t *fh, } } - if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh, + if ( SIMPLE != mca_io_ompio_grouping_option ) { + if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh, &num_groups, contg_groups)){ - opal_output(1, "mca_io_ompio_fview_based_grouping() failed\n"); - free(contg_groups); - return OMPI_ERROR; + opal_output(1, "mca_io_ompio_fview_based_grouping() failed\n"); + free(contg_groups); + return OMPI_ERROR; + } } - if( !( (fh->f_comm->c_flags & OMPI_COMM_CART) && - (num_groups == 1 || num_groups == fh->f_size)) ) { - mca_io_ompio_finalize_initial_grouping(fh, - num_groups, - contg_groups); + else { + if( OMPI_SUCCESS != mca_io_ompio_simple_grouping(fh, + &num_groups, + contg_groups)){ + opal_output(1, "mca_io_ompio_simple_grouping() failed\n"); + free(contg_groups); + return OMPI_ERROR; + } } + + + mca_io_ompio_finalize_initial_grouping(fh, + num_groups, + contg_groups); for( i = 0; i < fh->f_size; i++){ free(contg_groups[i].procs_in_contg_group); } @@ -231,7 +243,7 @@ int mca_io_ompio_file_get_view (struct ompi_file_t *fp, OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) { - int uniform = 0, global_uniform = 0; + int uniform = 0; OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0}; OMPI_MPI_OFFSET_TYPE global_avg[3] = {0,0,0}; int i = 0; @@ -268,6 +280,10 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) global_avg[0] = global_avg[0]/fh->f_size; global_avg[1] = global_avg[1]/fh->f_size; +#if 0 + /* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */ + int global_uniform=0; + if ( global_avg[0] == avg[0] && global_avg[1] == avg[1] && 0 == avg[2] && @@ -293,10 +309,52 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) /* yes, everybody agrees on having a uniform file view */ fh->f_flags |= OMPIO_UNIFORM_FVIEW; } - +#endif return global_avg[0]; } +int mca_io_ompio_simple_grouping(mca_io_ompio_file_t *fh, + int *num_groups, + contg *contg_groups) +{ + size_t stripe_size = (size_t) fh->f_stripe_size; + int group_size = 0; + int k=0, p=0, g=0; + int total_procs = 0; + + if ( 0 < fh->f_stripe_size ) { + stripe_size = OMPIO_DEFAULT_STRIPE_SIZE; + } + + if ( 0 != fh->f_cc_size && stripe_size > fh->f_cc_size ) { + group_size = (((int)stripe_size/(int)fh->f_cc_size) > fh->f_size ) ? fh->f_size : ((int)stripe_size/(int)fh->f_cc_size); + *num_groups = fh->f_size / group_size; + } + else if ( fh->f_cc_size <= OMPIO_CONTG_FACTOR * stripe_size) { + *num_groups = fh->f_size/OMPIO_CONTG_FACTOR > 0 ? (fh->f_size/OMPIO_CONTG_FACTOR) : 1 ; + group_size = OMPIO_CONTG_FACTOR; + } + else { + *num_groups = fh->f_size; + group_size = 1; + } + + for ( k=0, p=0; p<*num_groups; p++ ) { + if ( p == (*num_groups - 1) ) { + contg_groups[p].procs_per_contg_group = fh->f_size - total_procs; + } + else { + contg_groups[p].procs_per_contg_group = group_size; + total_procs +=group_size; + } + for ( g=0; gf_rank; - if( OMPIO_ROOT == fh->f_rank){ - start_offsets_lens = (OMPI_MPI_OFFSET_TYPE* )malloc (3 * fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); - if (NULL == start_offsets_lens) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - end_offsets = (OMPI_MPI_OFFSET_TYPE* )malloc (fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); - if (NULL == end_offsets) { - opal_output (1, "OUT OF MEMORY\n"); - free(start_offsets_lens); - return OMPI_ERR_OUT_OF_RESOURCE; - } - + start_offsets_lens = (OMPI_MPI_OFFSET_TYPE* )malloc (3 * fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); + if (NULL == start_offsets_lens) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; } - //Gather start offsets across processes in a group on aggregator - fh->f_comm->c_coll.coll_gather (start_offset_len, - 3, - OMPI_OFFSET_DATATYPE, - start_offsets_lens, - 3, - OMPI_OFFSET_DATATYPE, - OMPIO_ROOT, - fh->f_comm, - fh->f_comm->c_coll.coll_gather_module); - + end_offsets = (OMPI_MPI_OFFSET_TYPE* )malloc (fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); + if (NULL == end_offsets) { + opal_output (1, "OUT OF MEMORY\n"); + free(start_offsets_lens); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + //Allgather start offsets across processes in a group on aggregator + fh->f_comm->c_coll.coll_allgather (start_offset_len, + 3, + OMPI_OFFSET_DATATYPE, + start_offsets_lens, + 3, + OMPI_OFFSET_DATATYPE, + fh->f_comm, + fh->f_comm->c_coll.coll_allgather_module); + //Calculate contg chunk size and contg subgroups - if(OMPIO_ROOT == fh->f_rank){ - for( k = 0 ; k < fh->f_size; k++){ - end_offsets[k] = start_offsets_lens[3*k] + start_offsets_lens[3*k+1]; - contg_groups[k].contg_chunk_size = 0; + for( k = 0 ; k < fh->f_size; k++){ + end_offsets[k] = start_offsets_lens[3*k] + start_offsets_lens[3*k+1]; + contg_groups[k].contg_chunk_size = 0; + } + k = 0; + while( k < fh->f_size){ + if( k == 0){ + contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; + contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; + g++; + contg_groups[p].procs_per_contg_group = g; + k++; } - k = 0; - while( k < fh->f_size){ - if( k == 0){ - contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; - contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; - g++; - contg_groups[p].procs_per_contg_group = g; - k++; - } - else if( start_offsets_lens[3*k] == end_offsets[k - 1] ){ - contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; - contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; - g++; - contg_groups[p].procs_per_contg_group = g; - k++; - } - else{ - p++; - g = 0; - contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; - contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; - g++; - contg_groups[p].procs_per_contg_group = g; - k++; - } + else if( start_offsets_lens[3*k] == end_offsets[k - 1] ){ + contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; + contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; + g++; + contg_groups[p].procs_per_contg_group = g; + k++; + } + else{ + p++; + g = 0; + contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; + contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; + g++; + contg_groups[p].procs_per_contg_group = g; + k++; } - - *num_groups = p+1; - if (NULL != start_offsets_lens) { - free (start_offsets_lens); - start_offsets_lens = NULL; - } - if (NULL != end_offsets) { - free (end_offsets); - end_offsets = NULL; - } } - - //bcast num_groups to all procs - fh->f_comm->c_coll.coll_bcast (num_groups, - 1, - MPI_INT, - OMPIO_ROOT, - fh->f_comm, - fh->f_comm->c_coll.coll_bcast_module); - - + + *num_groups = p+1; + free (start_offsets_lens); + free (end_offsets); + return OMPI_SUCCESS; } @@ -408,105 +446,34 @@ int mca_io_ompio_finalize_initial_grouping(mca_io_ompio_file_t *fh, int z = 0; int y = 0; - int r = 0; - - MPI_Request *sendreq = NULL , *req = NULL; - - - req = (MPI_Request *)malloc (2* sizeof(MPI_Request)); - if (NULL == req) { - return OMPI_ERR_OUT_OF_RESOURCE; - } fh->f_init_num_aggrs = num_groups; fh->f_init_aggr_list = (int*)malloc (fh->f_init_num_aggrs * sizeof(int)); if (NULL == fh->f_init_aggr_list) { opal_output (1, "OUT OF MEMORY\n"); - free(req); return OMPI_ERR_OUT_OF_RESOURCE; } - if(OMPIO_ROOT == fh->f_rank){ - sendreq = (MPI_Request *)malloc ( 2 *fh->f_size * sizeof(MPI_Request)); - if (NULL == sendreq) { - free(req); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for( z = 0 ;z < num_groups; z++){ - for( y = 0; y < contg_groups[z].procs_per_contg_group; y++){ - MCA_PML_CALL(isend(&contg_groups[z].procs_per_contg_group, - 1, - MPI_INT, - contg_groups[z].procs_in_contg_group[y], - OMPIO_PROCS_PER_GROUP_TAG, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &sendreq[r++])); - - //send initial grouping distribution to all processes in the group - MCA_PML_CALL(isend(contg_groups[z].procs_in_contg_group, - contg_groups[z].procs_per_contg_group, - MPI_INT, - contg_groups[z].procs_in_contg_group[y], - OMPIO_PROCS_IN_GROUP_TAG, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &sendreq[r++])); - } - } + for( z = 0 ;z < num_groups; z++){ + for( y = 0; y < contg_groups[z].procs_per_contg_group; y++){ + if ( fh->f_rank == contg_groups[z].procs_in_contg_group[y] ) { + fh->f_init_procs_per_group = contg_groups[z].procs_per_contg_group; + fh->f_init_procs_in_group = (int*)malloc (fh->f_init_procs_per_group * sizeof(int)); + if (NULL == fh->f_init_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memcpy ( fh->f_init_procs_in_group, contg_groups[z].procs_in_contg_group, + contg_groups[z].procs_per_contg_group * sizeof (int)); + + } + } } - //All processes receive initial procs per group from OMPIO_ROOT - MCA_PML_CALL(irecv(&fh->f_init_procs_per_group, - 1, - MPI_INT, - OMPIO_ROOT, - OMPIO_PROCS_PER_GROUP_TAG, - fh->f_comm, - &req[0])); - - ompi_request_wait (&req[0], MPI_STATUS_IGNORE); - fh->f_init_procs_in_group = (int*)malloc (fh->f_init_procs_per_group * sizeof(int)); - if (NULL == fh->f_init_procs_in_group) { - opal_output (1, "OUT OF MEMORY\n"); - free(req); - if (NULL != sendreq) { - free(sendreq); - } - return OMPI_ERR_OUT_OF_RESOURCE; + for( z = 0 ;z < num_groups; z++){ + fh->f_init_aggr_list[z] = contg_groups[z].procs_in_contg_group[0]; } - //All processes receive initial process distribution from OMPIO_ROOT - MCA_PML_CALL(irecv(fh->f_init_procs_in_group, - fh->f_init_procs_per_group, - MPI_INT, - OMPIO_ROOT, - OMPIO_PROCS_IN_GROUP_TAG, - fh->f_comm, - &req[1])); - - ompi_request_wait (&req[1], MPI_STATUS_IGNORE); - free (req); - if(OMPIO_ROOT == fh->f_rank){ - ompi_request_wait_all (r, sendreq, MPI_STATUSES_IGNORE); - free (sendreq); - } - - - /*set initial aggregator list */ - //OMPIO_ROOT broadcasts aggr list - if(OMPIO_ROOT == fh->f_rank){ - for( z = 0 ;z < num_groups; z++){ - fh->f_init_aggr_list[z] = contg_groups[z].procs_in_contg_group[0]; - } - } - - fh->f_comm->c_coll.coll_bcast (fh->f_init_aggr_list, - num_groups, - MPI_INT, - OMPIO_ROOT, - fh->f_comm, - fh->f_comm->c_coll.coll_bcast_module); + return OMPI_SUCCESS; } diff --git a/ompi/mca/io/romio314/configure.m4 b/ompi/mca/io/romio314/configure.m4 index 431d489e23e..6ebe85263f0 100644 --- a/ompi/mca/io/romio314/configure.m4 +++ b/ompi/mca/io/romio314/configure.m4 @@ -52,7 +52,7 @@ AC_DEFUN([MCA_ompi_io_romio314_CONFIG],[ $2], [AC_MSG_RESULT([yes]) - AS_IF([test -n "$with_io_romio_flags" -a "$with_io_romio_flags" != "no"], + AS_IF([test -n "$with_io_romio_flags" && test "$with_io_romio_flags" != "no"], [io_romio314_flags="$with_io_romio_flags $io_romio314_flags"], [io_romio314_flags=]) # If ROMIO is going to end up in a DSO, all we need is @@ -69,7 +69,7 @@ AC_DEFUN([MCA_ompi_io_romio314_CONFIG],[ AS_IF([test "$enable_static" = "yes"], [io_romio314_static=enable], [io_romio314_static=disable])]) - AS_IF([test -n "$prefix" -a "$prefix" != "NONE"], + AS_IF([test -n "$prefix" && test "$prefix" != "NONE"], [io_romio314_prefix_arg="--prefix=$prefix"], [io_romio314_prefix_arg=]) diff --git a/ompi/mca/mtl/mxm/mtl_mxm.c b/ompi/mca/mtl/mxm/mtl_mxm.c index 09f9c2bb4e7..27de674a9bf 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm.c +++ b/ompi/mca/mtl/mxm/mtl_mxm.c @@ -220,8 +220,8 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address { char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version); char *modex_name = malloc(strlen(modex_component_name) + 5); - unsigned char *modex_buf_ptr; - size_t modex_cur_size; + uint8_t *modex_buf_ptr; + int32_t modex_cur_size; size_t modex_buf_size; size_t *address_len_buf_ptr; int modex_name_id = 0; @@ -233,7 +233,7 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address /* Receive address length */ sprintf(modex_name, "%s-len", modex_component_name); OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name, - (char**)&address_len_buf_ptr, + (uint8_t **)&address_len_buf_ptr, &modex_cur_size); if (OMPI_SUCCESS != rc) { MXM_ERROR("Failed to receive ep address length"); @@ -254,7 +254,7 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address while (modex_buf_size < *address_len_p) { sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name, - (char**)&modex_buf_ptr, + &modex_buf_ptr, &modex_cur_size); if (OMPI_SUCCESS != rc) { MXM_ERROR("Open MPI couldn't distribute EP connection details"); @@ -598,7 +598,7 @@ int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, size_t i; #if MXM_API >= MXM_VERSION(3,1) - if (ompi_mtl_mxm.bulk_disconnect && nprocs == ompi_proc_world_size ()) { + if (ompi_mtl_mxm.bulk_disconnect && ((int)nprocs) == ompi_proc_world_size ()) { mxm_ep_powerdown(ompi_mtl_mxm.ep); } #endif diff --git a/ompi/mca/mtl/ofi/help-mtl-ofi.txt b/ompi/mca/mtl/ofi/help-mtl-ofi.txt index 84752d9d391..2338d548f01 100644 --- a/ompi/mca/mtl/ofi/help-mtl-ofi.txt +++ b/ompi/mca/mtl/ofi/help-mtl-ofi.txt @@ -1,6 +1,6 @@ # -*- text -*- # -# Copyright (c) 2013-2014 Intel, Inc. All rights reserved +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved # # $COPYRIGHT$ # @@ -8,12 +8,3 @@ # # $HEADER$ # -[ofi init] -Initialization of OFI library failed. - - Error: %s -# -[debug level] -Unable to set OFI debug level. - - Error: %s diff --git a/ompi/mca/mtl/ofi/mtl_ofi.c b/ompi/mca/mtl/ofi/mtl_ofi.c index 1f4abb72ba8..ed6aae6bc44 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.c +++ b/ompi/mca/mtl/ofi/mtl_ofi.c @@ -110,6 +110,15 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, */ for (i = 0; i < nprocs; ++i) { endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t); + if (NULL == endpoint) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl/ofi: could not allocate endpoint" + " structure\n", + __FILE__, __LINE__); + ret = OMPI_ERROR; + goto bail; + } + endpoint->mtl_ofi_module = &ompi_mtl_ofi; endpoint->peer_fiaddr = fi_addrs[i]; diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index bb577051fc3..1128aca3d26 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -50,10 +50,6 @@ BEGIN_C_DECLS extern mca_mtl_ofi_module_t ompi_mtl_ofi; extern mca_base_framework_t ompi_mtl_base_framework; -extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, - size_t nprocs, - struct ompi_proc_t **procs); - extern int ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t **procs); @@ -235,7 +231,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */ ompi_proc = ompi_comm_peer_lookup(comm, dest); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ompi_ret) return ompi_ret; @@ -266,6 +262,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_trecv failed: %s(%zd)", __FILE__, __LINE__, fi_strerror(-ret), ret); + free(ack_req); return ompi_mtl_ofi_get_error(ret); } } else { @@ -284,6 +281,10 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_tinject failed: %s(%zd)", __FILE__, __LINE__, fi_strerror(-ret), ret); + if (ack_req) { + fi_cancel((fid_t)ompi_mtl_ofi.ep, &ack_req->ctx); + free(ack_req); + } return ompi_mtl_ofi_get_error(ret); } @@ -460,7 +461,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, if (ompi_mtl_ofi.any_addr == ofi_req->remote_addr) { src = MTL_OFI_GET_SOURCE(wc->tag); ompi_proc = ompi_comm_peer_lookup(ofi_req->comm, src); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(ofi_req->mtl, ompi_proc); ofi_req->remote_addr = endpoint->peer_fiaddr; } MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep, @@ -532,7 +533,7 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl, if (MPI_ANY_SOURCE != src) { ompi_proc = ompi_comm_peer_lookup(comm, src); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); remote_addr = endpoint->peer_fiaddr; } else { remote_addr = ompi_mtl_ofi.any_addr; @@ -744,7 +745,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl, */ if (MPI_ANY_SOURCE != src) { ompi_proc = ompi_comm_peer_lookup( comm, src ); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); remote_proc = endpoint->peer_fiaddr; } @@ -829,7 +830,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, */ if (MPI_ANY_SOURCE != src) { ompi_proc = ompi_comm_peer_lookup( comm, src ); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); remote_proc = endpoint->peer_fiaddr; } @@ -864,11 +865,13 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, * The search request completed but no matching message was found. */ *matched = 0; + free(ofi_req); return OMPI_SUCCESS; } else if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_trecvmsg failed: %s(%zd)", __FILE__, __LINE__, fi_strerror(-ret), ret); + free(ofi_req); return ompi_mtl_ofi_get_error(ret); } @@ -894,6 +897,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, } else { (*message) = MPI_MESSAGE_NULL; + free(ofi_req); } return OMPI_SUCCESS; @@ -961,7 +965,6 @@ ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl, return OMPI_SUCCESS; } - END_C_DECLS #endif /* MTL_OFI_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 4a50de81450..1469fe767b7 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights @@ -240,6 +240,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, hints->domain_attr->threading = FI_THREAD_UNSPEC; hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; + hints->domain_attr->resource_mgmt = FI_RM_ENABLED; + hints->domain_attr->av_type = FI_AV_MAP; /** * FI_VERSION provides binary backward and forward compatibility support diff --git a/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h b/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h index 2799d495b58..788d0919168 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -11,10 +11,12 @@ #ifndef OMPI_MTL_OFI_ENDPOINT_H #define OMPI_MTL_OFI_ENDPOINT_H -#include "mtl_ofi.h" - BEGIN_C_DECLS +extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t **procs); + OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint_t); /** @@ -35,7 +37,15 @@ struct mca_mtl_ofi_endpoint_t { }; typedef struct mca_mtl_ofi_endpoint_t mca_mtl_ofi_endpoint_t; -OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint); + +static inline mca_mtl_ofi_endpoint_t *ompi_mtl_ofi_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +{ + if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) { + ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc); + } + + return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; +} END_C_DECLS #endif diff --git a/ompi/mca/mtl/ofi/mtl_ofi_request.h b/ompi/mca/mtl/ofi/mtl_ofi_request.h index ee544073cc7..5e2faad6456 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_request.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_request.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -55,6 +55,10 @@ struct ompi_mtl_ofi_request_t { /* lookup source of an ANY_SOURCE Recv */ struct ompi_communicator_t *comm; + /** Reference to the MTL used to lookup */ + /* source of an ANY_SOURCE Recv */ + struct mca_mtl_base_module_t* mtl; + /** Pack buffer */ void *buffer; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index e56a4398965..1b1bdb1e1c5 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -84,7 +84,7 @@ typedef struct mca_mtl_ofi_component_t { { \ match_bits = contextid; \ match_bits = (match_bits << 16); \ - match_bits |= source; \ + match_bits |= (uint64_t)source; \ match_bits = (match_bits << 32); \ match_bits |= (MTL_OFI_TAG_MASK & tag) | type; \ } @@ -106,7 +106,7 @@ typedef struct mca_mtl_ofi_component_t { match_bits = (match_bits << 32); \ mask_bits |= MTL_OFI_SOURCE_MASK; \ } else { \ - match_bits |= source; \ + match_bits |= (uint64_t)source; \ match_bits = (match_bits << 32); \ } \ \ diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index 41a9a6d6652..9985d912690 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -178,6 +178,7 @@ portals4_init_interface(void) me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK; + ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, @@ -279,7 +280,7 @@ create_maptable(size_t nprocs, maptable[i].phys.pid = modex_id->phys.pid; maptable[i].phys.nid = modex_id->phys.nid; opal_output_verbose(50, ompi_mtl_base_framework.framework_output, - "logical: global rank=%d pid=%d nid=%d\n", + "logical: global rank=%d pid=%x nid=%x\n", (int)i, maptable[i].phys.pid, maptable[i].phys.nid); } @@ -311,6 +312,8 @@ create_endpoint(ompi_proc_t *proc) return OMPI_ERR_OUT_OF_RESOURCE; } else { if (ompi_mtl_portals4.use_logical) { + endpoint->phys.nid = 0; + endpoint->phys.pid = 0; endpoint->rank = proc->super.proc_name.vpid; } else { int ret; @@ -552,15 +555,31 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl) #endif ompi_mtl_portals4_recv_short_fini(); - PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); - PtlMDRelease(ompi_mtl_portals4.zero_md_h); - PtlMDRelease(ompi_mtl_portals4.send_md_h); + if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) { + PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.zero_md_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.send_md_h); + } + if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); + } + if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) { + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) { + PtlEQFree(ompi_mtl_portals4.send_eq_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) { + PtlEQFree(ompi_mtl_portals4.recv_eq_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.ni_h, PTL_INVALID_HANDLE)) { + PtlNIFini(ompi_mtl_portals4.ni_h); + } - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); - PtlEQFree(ompi_mtl_portals4.send_eq_h); - PtlEQFree(ompi_mtl_portals4.recv_eq_h); - PtlNIFini(ompi_mtl_portals4.ni_h); PtlFini(); return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 2cde423a313..731e60188b9 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -202,18 +202,26 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4; ((int)((match_bits & MTL_PORTALS4_SOURCE_MASK) >> 24)) +/* hda_data bit manipulation + * + * 0 1234567 01234567 01234567 0123 4567 01234567 01234567 01234567 01234567 + * | | | + * ^| | context id | message tag + * || | | + * +---- is_sync + */ + #define MTL_PORTALS4_SYNC_MSG 0x8000000000000000ULL -#define MTL_PORTALS4_SET_HDR_DATA(hdr_data, opcount, length, sync) \ +#define MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, contextid, sync) \ { \ hdr_data = (sync) ? 1 : 0; \ - hdr_data = (hdr_data << 15); \ - hdr_data |= opcount & 0x7FFFULL; \ - hdr_data = (hdr_data << 48); \ - hdr_data |= (length & 0xFFFFFFFFFFFFULL); \ + hdr_data = (hdr_data << 39); \ + hdr_data |= contextid; \ + hdr_data = (hdr_data << 24); \ + hdr_data |= (MTL_PORTALS4_TAG_MASK & tag); \ } -#define MTL_PORTALS4_GET_LENGTH(hdr_data) ((size_t)(hdr_data & 0xFFFFFFFFFFFFULL)) #define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \ (0 != (MTL_PORTALS4_SYNC_MSG & hdr_data)) diff --git a/ompi/mca/mtl/portals4/mtl_portals4_probe.c b/ompi/mca/mtl/portals4/mtl_portals4_probe.c index fbeda2124e7..a87f72087eb 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_probe.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_probe.c @@ -41,7 +41,7 @@ completion_fn(ptl_event_t *ev, ompi_mtl_portals4_base_request_t *ptl_base_reques ptl_request->status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits); ptl_request->status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); ptl_request->status.MPI_ERROR = MPI_SUCCESS; - ptl_request->status._ucount = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); + ptl_request->status._ucount += ev->mlength; if (ev->type != PTL_EVENT_SEARCH) { ptl_request->message = ompi_mtl_portals4_message_alloc(ev); } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index de4b4834533..b40edef0412 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -35,29 +35,45 @@ #include "mtl_portals4_message.h" static int -read_msg(void *start, ptl_size_t length, ptl_process_t target, +triggered_read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { - ptl_md_t md; int ret; - /* FIX ME: This needs to be on the send eq... */ - md.start = start; - md.length = length; - md.options = 0; - md.eq_handle = ompi_mtl_portals4.send_eq_h; - md.ct_handle = PTL_CT_NONE; + ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &request->ct_h); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlGet failed: %d", + __FILE__, __LINE__, ret); + return OMPI_ERR_OUT_OF_RESOURCE; + } - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &request->md_h); + ret = PtlTriggeredGet(ompi_mtl_portals4.send_md_h, + (ptl_size_t) start, + length, + target, + ompi_mtl_portals4.read_idx, + match_bits, + remote_offset, + request, + request->ct_h, 1); if (OPAL_UNLIKELY(PTL_OK != ret)) { + PtlCTFree(request->ct_h); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d", + "%s:%d: PtlTriggeredGet failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } + return OMPI_SUCCESS; +} + +static int +read_msg(void *start, ptl_size_t length, ptl_process_t target, + ptl_match_bits_t match_bits, ptl_size_t remote_offset, + ompi_mtl_portals4_recv_request_t *request) +{ + int ret; #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { @@ -66,9 +82,9 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, } #endif - ret = PtlGet(request->md_h, - 0, - md.length, + ret = PtlGet(ompi_mtl_portals4.send_md_h, + (ptl_size_t) start, + length, target, ompi_mtl_portals4.read_idx, match_bits, @@ -78,7 +94,6 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); - PtlMDRelease(request->md_h); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -113,59 +128,61 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, goto callback_error; } - ptl_request->me_h = PTL_INVALID_HANDLE; + if (!ptl_request->is_triggered) { + ptl_request->me_h = PTL_INVALID_HANDLE; - msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); - ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = - MTL_PORTALS4_GET_SOURCE(ev->match_bits); - ptl_request->super.super.ompi_req->req_status.MPI_TAG = - MTL_PORTALS4_GET_TAG(ev->match_bits); - if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "truncate expected: %ld %ld", - msg_length, ptl_request->delivery_len); - ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; - } + msg_length = ev->mlength; + ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = + MTL_PORTALS4_GET_SOURCE(ev->match_bits); + ptl_request->super.super.ompi_req->req_status.MPI_TAG = + MTL_PORTALS4_GET_TAG(ev->match_bits); + if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "truncate expected: %ld %ld", + msg_length, ptl_request->delivery_len); + ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; + } #if OPAL_ENABLE_DEBUG - ptl_request->hdr_data = ev->hdr_data; + ptl_request->hdr_data = ev->hdr_data; #endif - if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { - /* If it's not a short message and we're doing rndv, we + if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { + /* If it's not a short message and we're doing rndv, we only have the first part of the message. Issue the get to pull the second part of the message. */ - ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, - ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, - ev->initiator, - ev->hdr_data, - ompi_mtl_portals4.eager_limit, - ptl_request); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); - goto callback_error; - } + ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, + ((msg_length > ptl_request->delivery_len) ? + ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, + ev->initiator, + ev->hdr_data, + ompi_mtl_portals4.eager_limit, + ptl_request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); + goto callback_error; + } - } else { - /* If we're either using the eager protocol or were a + } else { + /* If we're either using the eager protocol or were a short message, all data has been received, so complete the message. */ - ret = ompi_mtl_datatype_unpack(ptl_request->convertor, - ev->start, - ev->mlength); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: ompi_mtl_datatype_unpack failed: %d", - __FILE__, __LINE__, ret); - ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; - } - ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; + ret = ompi_mtl_datatype_unpack(ptl_request->convertor, + ev->start, + ev->mlength); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_mtl_datatype_unpack failed: %d", + __FILE__, __LINE__, ret); + ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; + } + ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu (0x%lx) completed, expected", - ptl_request->opcount, ptl_request->hdr_data)); - ptl_request->super.super.completion_callback(&ptl_request->super.super); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Recv %lu (0x%lx) completed, expected", + ptl_request->opcount, ptl_request->hdr_data)); + ptl_request->super.super.completion_callback(&ptl_request->super.super); + } } break; @@ -178,12 +195,14 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); - PtlMDRelease(ptl_request->md_h); goto callback_error; } + if (ptl_request->is_triggered) + PtlCTFree(ptl_request->ct_h); + /* set the received length in the status, now that we know - excatly how much data was sent. */ + exactly how much data was sent. */ ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; if (ompi_mtl_portals4.protocol == rndv) { ptl_request->super.super.ompi_req->req_status._ucount += @@ -208,7 +227,6 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, __FILE__, __LINE__, ret); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; } - PtlMDRelease(ptl_request->md_h); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) completed, reply", @@ -228,95 +246,96 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, goto callback_error; } - ptl_request->me_h = PTL_INVALID_HANDLE; + if (!ptl_request->is_triggered) { + ptl_request->me_h = PTL_INVALID_HANDLE; - msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); - ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = - MTL_PORTALS4_GET_SOURCE(ev->match_bits); - ptl_request->super.super.ompi_req->req_status.MPI_TAG = - MTL_PORTALS4_GET_TAG(ev->match_bits); - if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "truncate unexpected: %ld %ld %d", - msg_length, ptl_request->delivery_len, - MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)); - ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; - } + msg_length = ev->mlength; + ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = + MTL_PORTALS4_GET_SOURCE(ev->match_bits); + ptl_request->super.super.ompi_req->req_status.MPI_TAG = + MTL_PORTALS4_GET_TAG(ev->match_bits); + if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "truncate unexpected: %ld %ld %d", + msg_length, ptl_request->delivery_len, + MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)); + ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; + } #if OPAL_ENABLE_DEBUG - ptl_request->hdr_data = ev->hdr_data; + ptl_request->hdr_data = ev->hdr_data; #endif - /* overflow case. Short messages have the buffer stashed + /* overflow case. Short messages have the buffer stashed somewhere. Long messages left in buffer at the source */ - if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) { - ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; - if (ev->mlength > 0) { - struct iovec iov; - uint32_t iov_count = 1; - size_t max_data; - iov.iov_base = (char*) ev->start; - iov.iov_len = ev->mlength; - max_data = iov.iov_len; - - ret = opal_convertor_unpack(ptl_request->convertor, - &iov, &iov_count, - &max_data ); - if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); - if (OPAL_UNLIKELY(ret < 0)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: opal_convertor_unpack failed: %d", - __FILE__, __LINE__, ret); - goto callback_error; + if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) { + ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; + if (ev->mlength > 0) { + struct iovec iov; + uint32_t iov_count = 1; + size_t max_data; + iov.iov_base = (char*) ev->start; + iov.iov_len = ev->mlength; + max_data = iov.iov_len; + + ret = opal_convertor_unpack(ptl_request->convertor, + &iov, &iov_count, + &max_data ); + if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); + if (OPAL_UNLIKELY(ret < 0)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: opal_convertor_unpack failed: %d", + __FILE__, __LINE__, ret); + goto callback_error; + } } - } - /* if it's a sync, send the ack */ - if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) { - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu (0x%lx) sending sync ack", - ptl_request->opcount, ptl_request->hdr_data)); - ret = PtlPut(ompi_mtl_portals4.zero_md_h, - 0, - 0, - PTL_NO_ACK_REQ, - ev->initiator, - ompi_mtl_portals4.read_idx, - ev->hdr_data, - 0, - NULL, - 0); - if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlPut failed: %d", - __FILE__, __LINE__, ret); - goto callback_error; + /* if it's a sync, send the ack */ + if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) { + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Recv %lu (0x%lx) sending sync ack", + ptl_request->opcount, ptl_request->hdr_data)); + ret = PtlPut(ompi_mtl_portals4.zero_md_h, + 0, + 0, + PTL_NO_ACK_REQ, + ev->initiator, + ompi_mtl_portals4.read_idx, + ev->hdr_data, + 0, + NULL, + 0); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlPut failed: %d", + __FILE__, __LINE__, ret); + goto callback_error; + } } - } - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu (0x%lx) completed, unexpected short (0x%lx)", - ptl_request->opcount, ptl_request->hdr_data, (long) ev->start)); - ptl_request->super.super.completion_callback(&ptl_request->super.super); - - } else { - if (ev->mlength > 0) { - /* if rndv or triggered, copy the eager part to the right place */ - memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength); - } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Recv %lu (0x%lx) completed, unexpected short (0x%lx)", + ptl_request->opcount, ptl_request->hdr_data, (long) ev->start)); + ptl_request->super.super.completion_callback(&ptl_request->super.super); + + } else { + if (ev->mlength > 0) { + /* if rndv or triggered, copy the eager part to the right place */ + memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength); + } - ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, - ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ev->mlength, - ev->initiator, - ev->hdr_data, - ev->mlength, - ptl_request); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); - goto callback_error; + ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, + ((msg_length > ptl_request->delivery_len) ? + ptl_request->delivery_len : msg_length) - ev->mlength, + ev->initiator, + ev->hdr_data, + ev->mlength, + ptl_request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); + goto callback_error; + } } } - break; case PTL_EVENT_LINK: @@ -348,6 +367,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, mca_mtl_request_t *mtl_request) { ptl_match_bits_t match_bits, ignore_bits; + ptl_hdr_data_t hdr_data; int ret = OMPI_SUCCESS; ptl_process_t remote_proc; ompi_mtl_portals4_recv_request_t *ptl_request = @@ -374,11 +394,28 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, src, tag); + MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, comm->c_contextid, 0); + ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } + ptl_request->is_triggered = + ((ompi_mtl_portals4.protocol == eager) || + (ompi_mtl_portals4.eager_limit >= length) || + (MPI_ANY_SOURCE == src) || + (MPI_ANY_TAG == tag)) ? false : true; + + if (ptl_request->is_triggered) { + ret = triggered_read_msg(ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, + ptl_request->delivery_len - ompi_mtl_portals4.eager_limit, + remote_proc, + hdr_data, + ompi_mtl_portals4.eager_limit, + ptl_request); + } + ptl_request->super.type = portals4_req_recv; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; #if OPAL_ENABLE_DEBUG @@ -393,20 +430,26 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n", + "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", ptl_request->opcount, remote_proc.phys.nid, remote_proc.phys.pid, - (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request)); + (int64_t)length, match_bits, ignore_bits, hdr_data, (unsigned long) ptl_request)); me.start = start; me.length = length; - me.ct_handle = PTL_CT_NONE; + if (ptl_request->is_triggered) + me.ct_handle = ptl_request->ct_h; + else + me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE; + if (ptl_request->is_triggered) + me.options |= PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; + if (length <= ompi_mtl_portals4.eager_limit) { me.options |= PTL_ME_EVENT_LINK_DISABLE; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index 7a90ff46537..a615fb5b3ba 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -68,8 +68,9 @@ typedef struct ompi_mtl_portals4_send_request_t ompi_mtl_portals4_send_request_t struct ompi_mtl_portals4_recv_request_t { ompi_mtl_portals4_base_request_t super; void *buffer_ptr; - ptl_handle_md_t md_h; ptl_handle_me_t me_h; + ptl_handle_ct_t ct_h; + bool is_triggered; struct opal_convertor_t *convertor; void *delivery_ptr; size_t delivery_len; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 4ee2e775322..647d3fad96a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -189,8 +189,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_SHORT_MSG); - MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, - (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0); + MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, contextid, (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0); if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { me.start = NULL; @@ -274,7 +273,7 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_LONG_MSG); - MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0); + MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, contextid, 0); me.start = start; me.length = length; diff --git a/ompi/mca/mtl/psm2/Makefile.am b/ompi/mca/mtl/psm2/Makefile.am index 145213340c6..fa3c5201bb6 100644 --- a/ompi/mca/mtl/psm2/Makefile.am +++ b/ompi/mca/mtl/psm2/Makefile.am @@ -22,7 +22,7 @@ EXTRA_DIST = post_configure.sh AM_CPPFLAGS = $(mtl_psm2_CPPFLAGS) -dist_ompidata_DATA = help-mtl-psm.txt +dist_ompidata_DATA = help-mtl-psm2.txt mtl_psm2_sources = \ mtl_psm2.c \ diff --git a/ompi/mca/mtl/psm2/help-mtl-psm.txt b/ompi/mca/mtl/psm2/help-mtl-psm2.txt similarity index 62% rename from ompi/mca/mtl/psm2/help-mtl-psm.txt rename to ompi/mca/mtl/psm2/help-mtl-psm2.txt index 23c3d75b0d2..16c5116a2f9 100644 --- a/ompi/mca/mtl/psm2/help-mtl-psm.txt +++ b/ompi/mca/mtl/psm2/help-mtl-psm2.txt @@ -1,24 +1,25 @@ # -*- text -*- # # Copyright (C) 2009. QLogic Corporation. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # -[psm init] -Initialization of PSM library failed. +[psm2 init] +Initialization of PSM2 library failed. Error: %s # [debug level] -Unable to set PSM debug level. +Unable to set PSM2 debug level. Error: %s # [unable to open endpoint] -PSM was unable to open an endpoint. Please make sure that the network link is +PSM2 was unable to open an endpoint. Please make sure that the network link is active on the node and the hardware is functioning. Error: %s @@ -30,10 +31,10 @@ the environment). Local host: %s # [error polling network] -Error %s occurred in attempting to make network progress (psm_mq_ipeek). +Error %s occurred in attempting to make network progress (psm2_mq_ipeek). # [error posting receive] -Unable to post application receive buffer (psm_mq_irecv or psm_mq_imrecv). +Unable to post application receive buffer (psm2_mq_irecv or psm2_mq_imrecv). Error: %s Buffer: %p diff --git a/ompi/mca/mtl/psm2/mtl_psm2.c b/ompi/mca/mtl/psm2/mtl_psm2.c index 34fe8ae8923..55d0dde4e18 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2.c +++ b/ompi/mca/mtl/psm2/mtl_psm2.c @@ -36,7 +36,7 @@ mca_mtl_psm2_module_t ompi_mtl_psm2 = { .super = { - /* NTH: PSM supports 16 bit context ids */ + /* NTH: PSM2 supports 16 bit context ids */ .mtl_max_contextid = (1UL << 16) - 1, .mtl_max_tag = (1UL << 30), /* must allow negatives */ @@ -59,27 +59,27 @@ mca_mtl_psm2_module_t ompi_mtl_psm2 = { }; static -psm_error_t -ompi_mtl_psm2_errhandler(psm_ep_t ep, const psm_error_t error, - const char *error_string, psm_error_token_t token) +psm2_error_t +ompi_mtl_psm2_errhandler(psm2_ep_t ep, const psm2_error_t error, + const char *error_string, psm2_error_token_t token) { switch (error) { - /* We don't want PSM to default to exiting when the following errors occur */ - case PSM_EP_DEVICE_FAILURE: - case PSM_EP_NO_DEVICE: - case PSM_EP_NO_PORTS_AVAIL: - case PSM_EP_NO_NETWORK: - case PSM_EP_INVALID_UUID_KEY: - opal_show_help("help-mtl-psm.txt", + /* We don't want PSM2 to default to exiting when the following errors occur */ + case PSM2_EP_DEVICE_FAILURE: + case PSM2_EP_NO_DEVICE: + case PSM2_EP_NO_PORTS_AVAIL: + case PSM2_EP_NO_NETWORK: + case PSM2_EP_INVALID_UUID_KEY: + opal_show_help("help-mtl-psm2.txt", "unable to open endpoint", true, - psm_error_get_string(error)); + psm2_error_get_string(error)); break; /* We can't handle any other errors than the ones above */ default: - opal_output(0, "Open MPI detected an unexpected PSM error in opening " + opal_output(0, "Open MPI detected an unexpected PSM2 error in opening " "an endpoint: %s\n", error_string); - return psm_error_defer(token); + return psm2_error_defer(token); break; } return error; @@ -88,24 +88,24 @@ ompi_mtl_psm2_errhandler(psm_ep_t ep, const psm_error_t error, int ompi_mtl_psm2_progress( void ); int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { - psm_error_t err; - psm_ep_t ep; /* endpoint handle */ - psm_mq_t mq; - psm_epid_t epid; /* unique lid+port identifier */ - psm_uuid_t unique_job_key; - struct psm_ep_open_opts ep_opt; + psm2_error_t err; + psm2_ep_t ep; /* endpoint handle */ + psm2_mq_t mq; + psm2_epid_t epid; /* unique lid+port identifier */ + psm2_uuid_t unique_job_key; + struct psm2_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; char env_string[256]; int rc; generated_key = getenv("OMPI_MCA_orte_precondition_transports"); - memset(uu, 0, sizeof(psm_uuid_t)); + memset(uu, 0, sizeof(psm2_uuid_t)); if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) { - opal_show_help("help-mtl-psm.txt", + opal_show_help("help-mtl-psm2.txt", "no uuid present", true, generated_key ? "could not be parsed from" : "not present in", ompi_process_info.nodename); @@ -114,9 +114,9 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { } /* Handle our own errors for opening endpoints */ - psm_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler); + psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler); - /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware + /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware * contexts correctly. */ snprintf(env_string, sizeof(env_string), "%d", local_rank); @@ -125,31 +125,31 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { setenv("MPI_LOCALNRANKS", env_string, 0); /* Setup the endpoint options. */ - psm_ep_open_opts_get_defaults(&ep_opt); + psm2_ep_open_opts_get_defaults(&ep_opt); ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9; - ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */ + ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */ - /* Open PSM endpoint */ - err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); + /* Open PSM2 endpoint */ + err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { - opal_show_help("help-mtl-psm.txt", + opal_show_help("help-mtl-psm2.txt", "unable to open endpoint", true, - psm_error_get_string(err)); + psm2_error_get_string(err)); return OMPI_ERROR; } /* Future errors are handled by the default error handler */ - psm_error_register_handler(ompi_mtl_psm2.ep, PSM_ERRHANDLER_DEFAULT); + psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); - err = psm_mq_init(ep, + err = psm2_mq_init(ep, 0xffff000000000000ULL, NULL, 0, &mq); if (err) { - opal_show_help("help-mtl-psm.txt", - "psm init", true, - psm_error_get_string(err)); + opal_show_help("help-mtl-psm2.txt", + "psm2 init", true, + psm2_error_get_string(err)); return OMPI_ERROR; } @@ -160,7 +160,7 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_mtl_psm2_component.super.mtl_version, &ompi_mtl_psm2.epid, - sizeof(psm_epid_t)); + sizeof(psm2_epid_t)); if (OMPI_SUCCESS != rc) { opal_output(0, "Open MPI couldn't send PSM2 epid to head node process"); @@ -168,7 +168,7 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { } - /* register the psm progress function */ + /* register the psm2 progress function */ opal_progress_register(ompi_mtl_psm2_progress); return OMPI_SUCCESS; @@ -176,29 +176,29 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { int ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) { - psm_error_t err; + psm2_error_t err; opal_progress_unregister(ompi_mtl_psm2_progress); /* free resources */ - err = psm_mq_finalize(ompi_mtl_psm2.mq); + err = psm2_mq_finalize(ompi_mtl_psm2.mq); if (err) { - opal_output(0, "Error in psm_mq_finalize (error %s)\n", - psm_error_get_string(err)); + opal_output(0, "Error in psm2_mq_finalize (error %s)\n", + psm2_error_get_string(err)); return OMPI_ERROR; } - err = psm_ep_close(ompi_mtl_psm2.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9); + err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9); if (err) { - opal_output(0, "Error in psm_ep_close (error %s)\n", - psm_error_get_string(err)); + opal_output(0, "Error in psm2_ep_close (error %s)\n", + psm2_error_get_string(err)); return OMPI_ERROR; } - err = psm_finalize(); + err = psm2_finalize(); if (err) { - opal_output(0, "Error in psm_finalize (error %s)\n", - psm_error_get_string(err)); + opal_output(0, "Error in psm2_finalize (error %s)\n", + psm2_error_get_string(err)); return OMPI_ERROR; } @@ -207,18 +207,18 @@ ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) { static const char * -ompi_mtl_psm2_connect_error_msg(psm_error_t err) +ompi_mtl_psm2_connect_error_msg(psm2_error_t err) { switch (err) { /* See if we expect the error */ - case PSM_EPID_UNREACHABLE: - case PSM_EPID_INVALID_NODE: - case PSM_EPID_INVALID_MTU: - case PSM_EPID_INVALID_UUID_KEY: - case PSM_EPID_INVALID_VERSION: - case PSM_EPID_INVALID_CONNECT: - return psm_error_get_string(err); + case PSM2_EPID_UNREACHABLE: + case PSM2_EPID_INVALID_NODE: + case PSM2_EPID_INVALID_MTU: + case PSM2_EPID_INVALID_UUID_KEY: + case PSM2_EPID_INVALID_VERSION: + case PSM2_EPID_INVALID_CONNECT: + return psm2_error_get_string(err); break; - case PSM_EPID_UNKNOWN: + case PSM2_EPID_UNKNOWN: return "Connect status could not be determined " "because of other errors"; default: @@ -241,23 +241,23 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, { int i,j; int rc; - psm_epid_t *epids_in = NULL; + psm2_epid_t *epids_in = NULL; int *mask_in = NULL; - psm_epid_t *epid; - psm_epaddr_t *epaddrs_out = NULL; - psm_error_t *errs_out = NULL, err; + psm2_epid_t *epid; + psm2_epaddr_t *epaddrs_out = NULL; + psm2_error_t *errs_out = NULL, err; size_t size; - int proc_errors[PSM_ERROR_LAST] = { 0 }; + int proc_errors[PSM2_ERROR_LAST] = { 0 }; int timeout_in_secs; assert(mtl == &ompi_mtl_psm2.super); rc = OMPI_ERR_OUT_OF_RESOURCE; - errs_out = (psm_error_t *) malloc(nprocs * sizeof(psm_error_t)); + errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t)); if (errs_out == NULL) { goto bail; } - epids_in = (psm_epid_t *) malloc(nprocs * sizeof(psm_epid_t)); + epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t)); if (epids_in == NULL) { goto bail; } @@ -265,7 +265,7 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, if (mask_in == NULL) { goto bail; } - epaddrs_out = (psm_epaddr_t *) malloc(nprocs * sizeof(psm_epaddr_t)); + epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t)); if (epaddrs_out == NULL) { goto bail; } @@ -281,7 +281,7 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version, &procs[i]->super.proc_name, (void**)&epid, &size); - if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) { + if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) { return OMPI_ERROR; } epids_in[i] = *epid; @@ -290,9 +290,9 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs); - psm_error_register_handler(ompi_mtl_psm2.ep, PSM_ERRHANDLER_NOP); + psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP); - err = psm_ep_connect(ompi_mtl_psm2.ep, + err = psm2_ep_connect(ompi_mtl_psm2.ep, nprocs, epids_in, mask_in, @@ -302,19 +302,19 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, if (err) { char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err); if (errstr == NULL) { - opal_output(0, "PSM returned unhandled/unknown connect error: %s\n", - psm_error_get_string(err)); + opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n", + psm2_error_get_string(err)); } for (i = 0; i < (int) nprocs; i++) { if (0 == mask_in[i]) { continue; } - psm_error_t thiserr = errs_out[i]; + psm2_error_t thiserr = errs_out[i]; errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr); if (proc_errors[thiserr] == 0) { proc_errors[thiserr] = 1; - opal_output(0, "PSM EP connect error (%s):", + opal_output(0, "PSM2 EP connect error (%s):", errstr ? errstr : "unknown connect error"); for (j = 0; j < (int) nprocs; j++) { if (errs_out[j] == thiserr) { @@ -330,9 +330,9 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, } else { /* Default error handling is enabled, errors will not be returned to - * user. PSM prints the error and the offending endpoint's hostname + * user. PSM2 prints the error and the offending endpoint's hostname * and exits with -1 */ - psm_error_register_handler(ompi_mtl_psm2.ep, PSM_ERRHANDLER_DEFAULT); + psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); /* Fill in endpoint data */ for (i = 0; i < (int) nprocs; i++) { @@ -393,41 +393,41 @@ ompi_mtl_psm2_del_comm(struct mca_mtl_base_module_t *mtl, int ompi_mtl_psm2_progress( void ) { - psm_error_t err; + psm2_error_t err; mca_mtl_psm2_request_t* mtl_psm2_request; - psm_mq_status2_t psm_status; - psm_mq_req_t req; + psm2_mq_status2_t psm2_status; + psm2_mq_req_t req; int completed = 1; do { - err = psm_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL); - if (err == PSM_MQ_INCOMPLETE) { + err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL); + if (err == PSM2_MQ_INCOMPLETE) { return completed; - } else if (err != PSM_OK) { + } else if (err != PSM2_OK) { goto error; } completed++; - err = psm_mq_test2(&req, &psm_status); - if (err != PSM_OK) { + err = psm2_mq_test2(&req, &psm2_status); + if (err != PSM2_OK) { goto error; } - mtl_psm2_request = (mca_mtl_psm2_request_t*) psm_status.context; + mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context; if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) { mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE = - psm_status.msg_tag.tag2; + psm2_status.msg_tag.tag1; mtl_psm2_request->super.ompi_req->req_status.MPI_TAG = - psm_status.msg_tag.tag1; + psm2_status.msg_tag.tag0; mtl_psm2_request->super.ompi_req->req_status._ucount = - psm_status.nbytes; + psm2_status.nbytes; ompi_mtl_datatype_unpack(mtl_psm2_request->convertor, mtl_psm2_request->buf, - psm_status.msg_length); + psm2_status.msg_length); } if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) { @@ -436,12 +436,12 @@ int ompi_mtl_psm2_progress( void ) { } } - switch (psm_status.error_code) { - case PSM_OK: + switch (psm2_status.error_code) { + case PSM2_OK: mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; break; - case PSM_MQ_TRUNCATION: + case PSM2_MQ_TRUNCATION: mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; break; @@ -456,8 +456,8 @@ int ompi_mtl_psm2_progress( void ) { while (1); error: - opal_show_help("help-mtl-psm.txt", + opal_show_help("help-mtl-psm2.txt", "error polling network", true, - psm_error_get_string(err)); + psm2_error_get_string(err)); return 1; } diff --git a/ompi/mca/mtl/psm2/mtl_psm2_cancel.c b/ompi/mca/mtl/psm2/mtl_psm2_cancel.c index dad4649dd02..22a8f827c83 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_cancel.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_cancel.c @@ -26,28 +26,28 @@ int ompi_mtl_psm2_cancel(struct mca_mtl_base_module_t* mtl, struct mca_mtl_request_t *mtl_request, int flag) { - psm_error_t err; - psm_mq_status_t status; + psm2_error_t err; + psm2_mq_status_t status; mca_mtl_psm2_request_t *mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; - /* PSM does not support canceling sends */ + /* PSM2 does not support canceling sends */ if(OMPI_mtl_psm2_ISEND == mtl_psm2_request->type) { return OMPI_SUCCESS; } - err = psm_mq_cancel(&mtl_psm2_request->psm_request); - if(PSM_OK == err) { - err = psm_mq_test(&mtl_psm2_request->psm_request, &status); - if(PSM_OK == err) { + err = psm2_mq_cancel(&mtl_psm2_request->psm2_request); + if(PSM2_OK == err) { + err = psm2_mq_test(&mtl_psm2_request->psm2_request, &status); + if(PSM2_OK == err) { mtl_request->ompi_req->req_status._cancelled = true; mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super); return OMPI_SUCCESS; } else { return OMPI_ERROR; } - } else if(PSM_MQ_INCOMPLETE == err) { + } else if(PSM2_MQ_INCOMPLETE == err) { return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c index 28174728a91..3ae41513602 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_component.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -32,7 +32,7 @@ #include "mtl_psm2_types.h" #include "mtl_psm2_request.h" -#include "psm.h" +#include "psm2.h" #include #include @@ -80,7 +80,7 @@ ompi_mtl_psm2_component_register(void) ompi_mtl_psm2.connect_timeout = 180; (void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version, "connect_timeout", - "PSM connection timeout value in seconds", + "PSM2 connection timeout value in seconds", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, @@ -116,7 +116,7 @@ static int ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority) { /* - * if we get here it means that PSM is available so give high priority + * if we get here it means that PSM2 is available so give high priority */ *priority = param_priority; @@ -165,14 +165,14 @@ static mca_mtl_base_module_t * ompi_mtl_psm2_component_init(bool enable_progress_threads, bool enable_mpi_threads) { - psm_error_t err; - int verno_major = PSM_VERNO_MAJOR; - int verno_minor = PSM_VERNO_MINOR; + psm2_error_t err; + int verno_major = PSM2_VERNO_MAJOR; + int verno_minor = PSM2_VERNO_MINOR; int local_rank = -1, num_local_procs = 0; int num_total_procs = 0; /* Compute the total number of processes on this host and our local rank - * on that node. We need to provide PSM with these values so it can + * on that node. We need to provide PSM2 with these values so it can * allocate hardware contexts appropriately across processes. */ if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) { @@ -190,27 +190,27 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, return NULL; } - err = psm_error_register_handler(NULL /* no ep */, - PSM_ERRHANDLER_NOP); + err = psm2_error_register_handler(NULL /* no ep */, + PSM2_ERRHANDLER_NOP); if (err) { - opal_output(0, "Error in psm_error_register_handler (error %s)\n", - psm_error_get_string(err)); + opal_output(0, "Error in psm2_error_register_handler (error %s)\n", + psm2_error_get_string(err)); return NULL; } if (num_local_procs == num_total_procs) { - setenv("PSM_DEVICES", "self,shm", 0); + setenv("PSM2_DEVICES", "self,shm", 0); } - err = psm_init(&verno_major, &verno_minor); + err = psm2_init(&verno_major, &verno_minor); if (err) { - opal_show_help("help-mtl-psm.txt", - "psm init", true, - psm_error_get_string(err)); + opal_show_help("help-mtl-psm2.txt", + "psm2 init", true, + psm2_error_get_string(err)); return NULL; } - /* Complete PSM initialization */ + /* Complete PSM2 initialization */ ompi_mtl_psm2_module_init(local_rank, num_local_procs); ompi_mtl_psm2.super.mtl_request_size = diff --git a/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h b/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h index aeb6bccadc1..d90ca227d98 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h +++ b/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h @@ -45,17 +45,17 @@ struct mca_mtl_psm2_endpoint_t { struct mca_mtl_psm2_module_t* mtl_psm2_module; /**< MTL instance that created this connection */ - psm_epid_t peer_epid; + psm2_epid_t peer_epid; /**< The unique epid for the opened port */ - psm_epaddr_t peer_addr; + psm2_epaddr_t peer_addr; /**< The connected endpoint handle*/ }; typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t; OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint); -static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +static inline mca_mtl_psm2_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) { if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) { ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc); diff --git a/ompi/mca/mtl/psm2/mtl_psm2_probe.c b/ompi/mca/mtl/psm2/mtl_psm2_probe.c index 7bcb358fcaf..b81317507be 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_probe.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_probe.c @@ -35,26 +35,26 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl, int *flag, struct ompi_status_public_t *status) { - psm_mq_tag_t mqtag, tagsel; - psm_mq_status2_t mqstat; - psm_error_t err; + psm2_mq_tag_t mqtag, tagsel; + psm2_mq_status2_t mqstat; + psm2_error_t err; - PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); - err = psm_mq_iprobe2(ompi_mtl_psm2.mq, - PSM_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat); - if (err == PSM_OK) { + err = psm2_mq_iprobe2(ompi_mtl_psm2.mq, + PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat); + if (err == PSM2_OK) { *flag = 1; if(MPI_STATUS_IGNORE != status) { - status->MPI_SOURCE = mqstat.msg_tag.tag2; - status->MPI_TAG = mqstat.msg_tag.tag1; + status->MPI_SOURCE = mqstat.msg_tag.tag1; + status->MPI_TAG = mqstat.msg_tag.tag0; status->_ucount = mqstat.nbytes; switch (mqstat.error_code) { - case PSM_OK: + case PSM2_OK: status->MPI_ERROR = OMPI_SUCCESS; break; - case PSM_MQ_TRUNCATION: + case PSM2_MQ_TRUNCATION: status->MPI_ERROR = MPI_ERR_TRUNCATE; break; default: @@ -64,7 +64,7 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl, return OMPI_SUCCESS; } - else if (err == PSM_MQ_INCOMPLETE) { + else if (err == PSM2_MQ_INCOMPLETE) { *flag = 0; return OMPI_SUCCESS; } @@ -83,27 +83,27 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl, struct ompi_status_public_t *status) { struct ompi_message_t* msg; - psm_mq_tag_t mqtag, tagsel; - psm_mq_status2_t mqstat; - psm_mq_req_t mqreq; - psm_error_t err; + psm2_mq_tag_t mqtag, tagsel; + psm2_mq_status2_t mqstat; + psm2_mq_req_t mqreq; + psm2_error_t err; - PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); - err = psm_mq_improbe2(ompi_mtl_psm2.mq, - PSM_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat); - if (err == PSM_OK) { + err = psm2_mq_improbe2(ompi_mtl_psm2.mq, + PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat); + if (err == PSM2_OK) { if(MPI_STATUS_IGNORE != status) { - status->MPI_SOURCE = mqstat.msg_tag.tag2; - status->MPI_TAG = mqstat.msg_tag.tag1; + status->MPI_SOURCE = mqstat.msg_tag.tag1; + status->MPI_TAG = mqstat.msg_tag.tag0; status->_ucount = mqstat.nbytes; switch (mqstat.error_code) { - case PSM_OK: + case PSM2_OK: status->MPI_ERROR = OMPI_SUCCESS; break; - case PSM_MQ_TRUNCATION: + case PSM2_MQ_TRUNCATION: status->MPI_ERROR = MPI_ERR_TRUNCATE; break; default: @@ -118,13 +118,13 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl, msg->comm = comm; msg->req_ptr = mqreq; - msg->peer = mqstat.msg_tag.tag2; + msg->peer = mqstat.msg_tag.tag1; msg->count = mqstat.nbytes; *message = msg; *matched = 1; return OMPI_SUCCESS; - } else if(err == PSM_MQ_INCOMPLETE) { + } else if(err == PSM2_MQ_INCOMPLETE) { *matched = 0; *message = MPI_MESSAGE_NULL; return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_recv.c b/ompi/mca/mtl/psm2/mtl_psm2_recv.c index 94c0a955ded..a62e3db3bb6 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_recv.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_recv.c @@ -39,10 +39,10 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, struct mca_mtl_request_t *mtl_request) { int ret; - psm_error_t err; + psm2_error_t err; mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; - psm_mq_tag_t mqtag; - psm_mq_tag_t tagsel; + psm2_mq_tag_t mqtag; + psm2_mq_tag_t tagsel; size_t length; ret = ompi_mtl_datatype_recv_buf(convertor, @@ -56,22 +56,22 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, mtl_psm2_request->convertor = convertor; mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; - PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); - err = psm_mq_irecv2(ompi_mtl_psm2.mq, - PSM_MQ_ANY_ADDR, + err = psm2_mq_irecv2(ompi_mtl_psm2.mq, + PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, 0, mtl_psm2_request->buf, length, mtl_psm2_request, - &mtl_psm2_request->psm_request); + &mtl_psm2_request->psm2_request); if (err) { - opal_show_help("help-mtl-psm.txt", + opal_show_help("help-mtl-psm2.txt", "error posting receive", true, - psm_error_get_string(err), + psm2_error_get_string(err), mtl_psm2_request->buf, length); return OMPI_ERROR; } @@ -89,11 +89,11 @@ ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl, mca_mtl_psm2_request_t *mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; size_t length; - psm_error_t err; + psm2_error_t err; int ret; - mtl_psm2_request->psm_request = - (psm_mq_req_t)(*message)->req_ptr; + mtl_psm2_request->psm2_request = + (psm2_mq_req_t)(*message)->req_ptr; ret = ompi_mtl_datatype_recv_buf(convertor, &mtl_psm2_request->buf, @@ -107,14 +107,14 @@ ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl, mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; - err = psm_mq_imrecv(ompi_mtl_psm2.mq, 0, + err = psm2_mq_imrecv(ompi_mtl_psm2.mq, 0, mtl_psm2_request->buf, length, mtl_psm2_request, - &mtl_psm2_request->psm_request); + &mtl_psm2_request->psm2_request); if(err) { - opal_show_help("help-mtl-psm.txt", + opal_show_help("help-mtl-psm2.txt", "error posting receive", true, - psm_error_get_string(err), + psm2_error_get_string(err), mtl_psm2_request->buf, length); return OMPI_ERROR; } diff --git a/ompi/mca/mtl/psm2/mtl_psm2_request.h b/ompi/mca/mtl/psm2/mtl_psm2_request.h index bc669eec8ce..7e6410afb37 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_request.h +++ b/ompi/mca/mtl/psm2/mtl_psm2_request.h @@ -32,8 +32,8 @@ typedef enum { struct mca_mtl_psm2_request_t { struct mca_mtl_request_t super; mca_mtl_psm2_request_type_t type; - psm_mq_req_t psm_request; - /* psm_segment_t psm_segment[1]; */ + psm2_mq_req_t psm2_request; + /* psm2_segment_t psm2_segment[1]; */ void *buf; size_t length; struct opal_convertor_t *convertor; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_send.c b/ompi/mca/mtl/psm2/mtl_psm2_send.c index 73cf7698737..d4ed8136bf6 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_send.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_send.c @@ -36,18 +36,18 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode) { - psm_error_t err; + psm2_error_t err; mca_mtl_psm2_request_t mtl_psm2_request; - psm_mq_tag_t mqtag; + psm2_mq_tag_t mqtag; uint32_t flags = 0; int ret; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); - mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc); + mca_mtl_psm2_endpoint_t* psm2_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc); assert(mtl == &ompi_mtl_psm2.super); - PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, &mtl_psm2_request.buf, @@ -62,10 +62,10 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, if (OMPI_SUCCESS != ret) return ret; if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) - flags |= PSM_MQ_FLAG_SENDSYNC; + flags |= PSM2_MQ_FLAG_SENDSYNC; - err = psm_mq_send2(ompi_mtl_psm2.mq, - psm_endpoint->peer_addr, + err = psm2_mq_send2(ompi_mtl_psm2.mq, + psm2_endpoint->peer_addr, flags, &mqtag, mtl_psm2_request.buf, @@ -75,7 +75,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, free(mtl_psm2_request.buf); } - return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; + return err == PSM2_OK ? OMPI_SUCCESS : OMPI_ERROR; } int @@ -88,18 +88,18 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, bool blocking, mca_mtl_request_t * mtl_request) { - psm_error_t psm_error; - psm_mq_tag_t mqtag; + psm2_error_t psm2_error; + psm2_mq_tag_t mqtag; uint32_t flags = 0; int ret; mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); - mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc); + mca_mtl_psm2_endpoint_t* psm2_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc); assert(mtl == &ompi_mtl_psm2.super); - PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, @@ -114,16 +114,16 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, if (OMPI_SUCCESS != ret) return ret; if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) - flags |= PSM_MQ_FLAG_SENDSYNC; + flags |= PSM2_MQ_FLAG_SENDSYNC; - psm_error = psm_mq_isend2(ompi_mtl_psm2.mq, - psm_endpoint->peer_addr, + psm2_error = psm2_mq_isend2(ompi_mtl_psm2.mq, + psm2_endpoint->peer_addr, flags, &mqtag, mtl_psm2_request->buf, length, mtl_psm2_request, - &mtl_psm2_request->psm_request); + &mtl_psm2_request->psm2_request); - return psm_error == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; + return psm2_error == PSM2_OK ? OMPI_SUCCESS : OMPI_ERROR; } diff --git a/ompi/mca/mtl/psm2/mtl_psm2_types.h b/ompi/mca/mtl/psm2/mtl_psm2_types.h index a269f0a89c9..31f0deb7ca1 100755 --- a/ompi/mca/mtl/psm2/mtl_psm2_types.h +++ b/ompi/mca/mtl/psm2/mtl_psm2_types.h @@ -45,10 +45,10 @@ struct mca_mtl_psm2_module_t { int32_t connect_timeout; - psm_ep_t ep; - psm_mq_t mq; - psm_epid_t epid; - psm_epaddr_t epaddr; + psm2_ep_t ep; + psm2_mq_t mq; + psm2_epid_t epid; + psm2_epaddr_t epaddr; }; typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t; @@ -62,29 +62,29 @@ typedef struct mca_mtl_psm2_component_t mca_mtl_psm2_component_t; OMPI_DECLSPEC extern mca_mtl_psm2_component_t mca_mtl_psm2_component; -#define PSM_MAKE_MQTAG(ctxt,rank,utag,tag) \ +#define PSM2_MAKE_MQTAG(ctxt,rank,utag,tag) \ do { \ - (tag).tag0 = ctxt; \ - (tag).tag1 = utag; \ - (tag).tag2 = rank; \ + (tag).tag0 = utag; \ + (tag).tag1 = rank; \ + (tag).tag2 = ctxt; \ } while (0) -#define PSM_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, _tagsel) \ - do { \ - (tag).tag0 = user_ctxt; \ - (tag).tag1 = user_tag; \ - (tag).tag2 = user_rank; \ +#define PSM2_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, _tagsel) \ + do { \ + (tag).tag0 = user_tag; \ + (tag).tag1 = user_rank; \ + (tag).tag2 = user_ctxt; \ (_tagsel).tag0 = 0xffffffffULL; \ (_tagsel).tag1 = 0xffffffffULL; \ (_tagsel).tag2 = 0xffffffffULL; \ if((user_tag) == MPI_ANY_TAG) \ { \ - (_tagsel).tag1 = 0x80000000ULL; \ - (tag).tag1 = 0x00000000ULL; \ + (_tagsel).tag0 = 0x80000000ULL; \ + (tag).tag0 = 0x00000000ULL; \ } \ if((user_rank) == MPI_ANY_SOURCE) \ { \ - (_tagsel).tag2 = 0x00000000ULL; \ + (_tagsel).tag1 = 0x00000000ULL; \ } \ } while (0) diff --git a/ompi/mca/mtl/psm2/post_configure.sh b/ompi/mca/mtl/psm2/post_configure.sh index 07cf9bddda6..c47eb335b2f 100644 --- a/ompi/mca/mtl/psm2/post_configure.sh +++ b/ompi/mca/mtl/psm2/post_configure.sh @@ -1 +1 @@ -DIRECT_CALL_HEADER="ompi/mca/mtl/psm2/mtl_psm.h" +DIRECT_CALL_HEADER="ompi/mca/mtl/psm2/mtl_psm2.h" diff --git a/ompi/mca/op/x86/.opal_unignore b/ompi/mca/op/x86/.opal_unignore index 814285c7e50..97b20ffb20d 100644 --- a/ompi/mca/op/x86/.opal_unignore +++ b/ompi/mca/op/x86/.opal_unignore @@ -1 +1 @@ -jsquyres +rhc diff --git a/ompi/mca/osc/base/osc_base_obj_convert.h b/ompi/mca/osc/base/osc_base_obj_convert.h index 4163670a085..c6514bbbe00 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.h +++ b/ompi/mca/osc/base/osc_base_obj_convert.h @@ -76,7 +76,7 @@ static inline ompi_op_t * ompi_osc_base_op_create(int op_id) { - ompi_op_t *op = MPI_Op_f2c(op_id); + ompi_op_t *op = PMPI_Op_f2c(op_id); OBJ_RETAIN(op); return op; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 51b14b7057b..68ca022b7ac 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -149,19 +149,20 @@ struct ompi_osc_pt2pt_module_t { uint32_t *epoch_outgoing_frag_count; /** cyclic counter for a unique tage for long messages. */ - unsigned int tag_counter; + uint32_t tag_counter; + uint32_t rtag_counter; /* Number of outgoing fragments that have completed since the begining of time */ - uint32_t outgoing_frag_count; + volatile uint32_t outgoing_frag_count; /* Next outgoing fragment count at which we want a signal on cond */ - uint32_t outgoing_frag_signal_count; + volatile uint32_t outgoing_frag_signal_count; /* Number of incoming fragments that have completed since the begining of time */ - uint32_t active_incoming_frag_count; + volatile uint32_t active_incoming_frag_count; /* Next incoming buffer count at which we want a signal on cond */ - uint32_t active_incoming_frag_signal_count; + volatile uint32_t active_incoming_frag_signal_count; /** Number of targets locked/being locked */ unsigned int passive_target_access_epoch; @@ -408,14 +409,6 @@ int ompi_osc_pt2pt_component_irecv(ompi_osc_pt2pt_module_t *module, int tag, struct ompi_communicator_t *comm); -int ompi_osc_pt2pt_component_isend(ompi_osc_pt2pt_module_t *module, - const void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dest, - int tag, - struct ompi_communicator_t *comm); - /** * ompi_osc_pt2pt_progress_pending_acc: * @@ -657,13 +650,18 @@ static inline int get_tag(ompi_osc_pt2pt_module_t *module) /* the LSB of the tag is used be the receiver to determine if the message is a passive or active target (ie, where to mark completion). */ - int tmp = module->tag_counter + !!(module->passive_target_access_epoch); - - module->tag_counter = (module->tag_counter + 2) & OSC_PT2PT_FRAG_MASK; - - return tmp; + int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4); + return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch); } +static inline int get_rtag(ompi_osc_pt2pt_module_t *module) +{ + /* the LSB of the tag is used be the receiver to determine if the + message is a passive or active target (ie, where to mark + completion). */ + int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->rtag_counter, 4); + return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch); +} /** * ompi_osc_pt2pt_accumulate_lock: * diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c index e169addb549..b8b04796e15 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -147,6 +147,7 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) /* short-circuit the noprecede case */ if (0 != (assert & MPI_MODE_NOPRECEDE)) { + module->comm->c_coll.coll_barrier (module->comm, module->comm->c_coll.coll_barrier_module); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "osc pt2pt: fence end (short circuit)")); return ret; @@ -211,7 +212,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_osc_pt2pt_sync_t *sync = &module->all_sync; - OPAL_THREAD_LOCK(&sync->lock); + OPAL_THREAD_LOCK(&module->lock); /* check if we are already in an access epoch */ if (ompi_osc_pt2pt_access_epoch_active (module)) { diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index 5bb3a070cfc..1205767f016 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -34,27 +34,58 @@ #include /* progress an OSC request */ +static int ompi_osc_pt2pt_comm_complete (ompi_request_t *request) +{ + ompi_osc_pt2pt_module_t *module = + (ompi_osc_pt2pt_module_t*) request->req_complete_cb_data; + + OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, + "isend_completion_cb called")); + + mark_outgoing_completion(module); + + /* put this request on the garbage colletion list */ + osc_pt2pt_gc_add_request (module, request); + + return OMPI_SUCCESS; +} + static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request) { ompi_osc_pt2pt_request_t *pt2pt_request = (ompi_osc_pt2pt_request_t *) request->req_complete_cb_data; - ompi_osc_pt2pt_module_t *module = pt2pt_request->module; OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_req_comm_complete called tag = %d", request->req_status.MPI_TAG)); - mark_outgoing_completion (module); + /* update the cbdata for ompi_osc_pt2pt_comm_complete */ + request->req_complete_cb_data = pt2pt_request->module; if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) { ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR); } - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); + return ompi_osc_pt2pt_comm_complete (request); +} - return OMPI_SUCCESS; +static inline int ompi_osc_pt2pt_data_isend (ompi_osc_pt2pt_module_t *module, const void *buf, + size_t count, ompi_datatype_t *datatype, int dest, + int tag, ompi_osc_pt2pt_request_t *request) +{ + /* increment the outgoing send count */ + ompi_osc_signal_outgoing (module, dest, 1); + + if (NULL != request) { + ++request->outstanding_requests; + return ompi_osc_pt2pt_isend_w_cb (buf, count, datatype, dest, tag, module->comm, + ompi_osc_pt2pt_req_comm_complete, request); + } + + return ompi_osc_pt2pt_isend_w_cb (buf, count, datatype, dest, tag, module->comm, + ompi_osc_pt2pt_comm_complete, module); } + static int ompi_osc_pt2pt_dt_send_complete (ompi_request_t *request) { ompi_datatype_t *datatype = (ompi_datatype_t *) request->req_complete_cb_data; @@ -190,8 +221,8 @@ static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, c ((unsigned long) target_disp * module->disp_unit); int ret; - /* if we are in active target mode wait until all post messages arrive */ - ompi_osc_pt2pt_sync_wait (pt2pt_sync); + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: starting local " + "get accumulate")); ompi_osc_pt2pt_accumulate_lock (module); @@ -222,6 +253,9 @@ static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, c ompi_osc_pt2pt_accumulate_unlock (module); + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: local get " + "accumulate complete")); + if (request) { /* NTH: is it ok to use an ompi error code here? */ ompi_osc_pt2pt_request_complete (request, ret); @@ -282,14 +316,14 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ payload_len = origin_dt->super.size * origin_count; frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -301,9 +335,8 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ tag = get_tag(module); } - /* flush will be called at the end of this function. make sure all post messages have - * arrived. */ - if ((is_long_msg || request) && OMPI_OSC_PT2PT_SYNC_TYPE_PSCW == pt2pt_sync->type) { + if (is_long_msg) { + /* wait for eager sends to be active before starting a long put */ ompi_osc_pt2pt_sync_wait (pt2pt_sync); } @@ -361,18 +394,8 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ header->tag = tag; osc_pt2pt_hton(header, proc); - /* increase the outgoing signal count */ - ompi_osc_signal_outgoing (module, target, 1); - - if (request) { - request->outstanding_requests = 1; - ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_dt, - target, tag, module->comm, ompi_osc_pt2pt_req_comm_complete, - request); - } else { - ret = ompi_osc_pt2pt_component_isend (module,origin_addr, origin_count, origin_dt, target, tag, - module->comm); - } + ret = ompi_osc_pt2pt_data_isend (module,origin_addr, origin_count, origin_dt, target, tag, + request); } } while (0); @@ -380,14 +403,7 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (request || is_long_msg) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int @@ -459,14 +475,14 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, payload_len = origin_dt->super.size * origin_count; frag_len = sizeof(*header) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, true); if (OMPI_SUCCESS != ret) { frag_len = sizeof(*header) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request); if (OMPI_SUCCESS != ret) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(*header) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -475,12 +491,11 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, } is_long_msg = true; - tag = get_tag (module); + tag = get_rtag (module); } - /* flush will be called at the end of this function. make sure all post messages have - * arrived. */ - if ((is_long_msg || request) && OMPI_OSC_PT2PT_SYNC_TYPE_PSCW == pt2pt_sync->type) { + if (is_long_msg) { + /* wait for synchronization before posting a long message */ ompi_osc_pt2pt_sync_wait (pt2pt_sync); } @@ -538,18 +553,8 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "acc: starting long accumulate with tag %d", tag)); - /* increment the outgoing send count */ - ompi_osc_signal_outgoing (module, target, 1); - - if (request) { - request->outstanding_requests = 1; - ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_dt, - target, tag, module->comm, ompi_osc_pt2pt_req_comm_complete, - request); - } else { - ret = ompi_osc_pt2pt_component_isend (module, origin_addr, origin_count, origin_dt, target, tag, - module->comm); - } + ret = ompi_osc_pt2pt_data_isend (module, origin_addr, origin_count, origin_dt, target, tag, + request); } } while (0); @@ -561,14 +566,7 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (is_long_msg || request) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int @@ -639,7 +637,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar } frag_len = sizeof(ompi_osc_pt2pt_header_cswap_t) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, false); if (OMPI_SUCCESS != ret) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -671,9 +669,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar return ret; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - return ret; + return ompi_osc_pt2pt_frag_finish (module, frag); } @@ -787,11 +783,11 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co ddt_len = ompi_datatype_pack_description_length(target_dt); frag_len = sizeof(ompi_osc_pt2pt_header_get_t) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, release_req); if (OMPI_SUCCESS != ret) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, release_req); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -804,9 +800,8 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co /* for bookkeeping the get is "outgoing" */ ompi_osc_signal_outgoing (module, target, 1); - /* flush will be called at the end of this function. make sure all post messages have - * arrived. */ - if (!release_req && OMPI_OSC_PT2PT_SYNC_TYPE_PSCW == pt2pt_sync->type) { + if (!release_req) { + /* wait for epoch to begin before starting rget operation */ ompi_osc_pt2pt_sync_wait (pt2pt_sync); } @@ -857,14 +852,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co *request = &pt2pt_request->super; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (!release_req) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int ompi_osc_pt2pt_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, @@ -977,6 +965,11 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin return OMPI_SUCCESS; } + if (!release_req) { + /* wait for epoch to begin before starting operation */ + ompi_osc_pt2pt_sync_wait (pt2pt_sync); + } + /* optimize the self case. TODO: optimize the local case */ if (ompi_comm_rank (module->comm) == target_rank) { *request = &pt2pt_request->super; @@ -1003,14 +996,14 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin } frag_len = sizeof(*header) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, false, release_req); if (OMPI_SUCCESS != ret) { frag_len = sizeof(*header) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, true, release_req); if (OMPI_SUCCESS != ret) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(*header) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, true, release_req); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -1030,12 +1023,6 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin /* increment the number of outgoing fragments */ ompi_osc_signal_outgoing (module, target_rank, pt2pt_request->outstanding_requests); - /* flush will be called at the end of this function. make sure all post messages have - * arrived. */ - if (!release_req && OMPI_OSC_PT2PT_SYNC_TYPE_PSCW == pt2pt_sync->type) { - ompi_osc_pt2pt_sync_wait (pt2pt_sync); - } - header = (ompi_osc_pt2pt_header_acc_t *) ptr; header->base.flags = 0; header->len = frag_len; @@ -1100,14 +1087,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin *request = (ompi_request_t *) pt2pt_request; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (!release_req) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target_rank); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int ompi_osc_pt2pt_get_accumulate(const void *origin_addr, int origin_count, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index 6a8f53ebc80..41bbe187b55 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -290,6 +290,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* fill in the function pointer part */ memcpy(module, &ompi_osc_pt2pt_module_template, sizeof(ompi_osc_base_module_t)); + module->rtag_counter = 2; /* initialize the objects, so that always free in cleanup */ OBJ_CONSTRUCT(&module->lock, opal_mutex_t); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 6883d79a4a8..dd8152e4286 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -213,7 +213,7 @@ int ompi_osc_pt2pt_control_send (ompi_osc_pt2pt_module_t *module, int target, char *ptr; int ret; - ret = ompi_osc_pt2pt_frag_alloc(module, target, len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, len, &frag, &ptr, false, true); if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { memcpy (ptr, data, len); @@ -1618,6 +1618,7 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request) OPAL_THREAD_UNLOCK(&ompi_request_lock); assert(incoming_length >= sizeof(ompi_osc_pt2pt_header_base_t)); + (void)incoming_length; // silence compiler warning OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "received pt2pt callback for fragment. source = %d, count = %u, type = 0x%x", @@ -1682,33 +1683,6 @@ int ompi_osc_pt2pt_component_irecv (ompi_osc_pt2pt_module_t *module, void *buf, osc_pt2pt_incoming_req_complete, module); } - -static int -isend_completion_cb(ompi_request_t *request) -{ - ompi_osc_pt2pt_module_t *module = - (ompi_osc_pt2pt_module_t*) request->req_complete_cb_data; - - OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, - "isend_completion_cb called")); - - mark_outgoing_completion(module); - - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - - return OMPI_SUCCESS; -} - - -int ompi_osc_pt2pt_component_isend (ompi_osc_pt2pt_module_t *module, const void *buf, - size_t count, struct ompi_datatype_t *datatype, - int dest, int tag, struct ompi_communicator_t *comm) -{ - return ompi_osc_pt2pt_isend_w_cb (buf, count, datatype, dest, tag, comm, - isend_completion_cb, module); -} - int ompi_osc_pt2pt_isend_w_cb (const void *ptr, int count, ompi_datatype_t *datatype, int target, int tag, ompi_communicator_t *comm, ompi_request_complete_fn_t cb, void *ctx) { diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index 515ce82fdf8..da51b7db276 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -33,7 +33,8 @@ struct ompi_osc_pt2pt_frag_t { char *top; /* Number of operations which have started writing into the frag, but not yet completed doing so */ - int32_t pending; + volatile int32_t pending; + int32_t pending_long_sends; ompi_osc_pt2pt_frag_header_t *header; ompi_osc_pt2pt_module_t *module; }; @@ -44,16 +45,74 @@ extern int ompi_osc_pt2pt_frag_start(ompi_osc_pt2pt_module_t *module, ompi_osc_p extern int ompi_osc_pt2pt_frag_flush_target(ompi_osc_pt2pt_module_t *module, int target); extern int ompi_osc_pt2pt_frag_flush_all(ompi_osc_pt2pt_module_t *module); +static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module, + ompi_osc_pt2pt_frag_t* buffer) +{ + opal_atomic_wmb (); + if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { + opal_atomic_mb (); + return ompi_osc_pt2pt_frag_start(module, buffer); + } + + return OMPI_SUCCESS; +} + +static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (ompi_osc_pt2pt_module_t *module, + ompi_osc_pt2pt_peer_t *peer, + size_t request_len) +{ + ompi_osc_pt2pt_frag_t *curr; + + /* to ensure ordering flush the buffer on the peer */ + curr = peer->active_frag; + if (NULL != curr && opal_atomic_cmpset (&peer->active_frag, curr, NULL)) { + /* If there's something pending, the pending finish will + start the buffer. Otherwise, we need to start it now. */ + int ret = ompi_osc_pt2pt_frag_finish (module, curr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return NULL; + } + } + + curr = (ompi_osc_pt2pt_frag_t *) opal_free_list_get (&mca_osc_pt2pt_component.frags); + if (OPAL_UNLIKELY(NULL == curr)) { + return NULL; + } + + curr->target = peer->rank; + + curr->header = (ompi_osc_pt2pt_frag_header_t*) curr->buffer; + curr->top = (char*) (curr->header + 1); + curr->remain_len = mca_osc_pt2pt_component.buffer_size; + curr->module = module; + curr->pending = 1; + + curr->header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_FRAG; + curr->header->base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; + if (module->passive_target_access_epoch) { + curr->header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; + } + curr->header->source = ompi_comm_rank(module->comm); + curr->header->num_ops = 1; + + return curr; +} + /* - * Note: module lock must be held during this operation + * Note: this function takes the module lock + * + * buffered sends will cache the fragment on the peer object associated with the + * target. unbuffered-sends will cause the target fragment to be flushed and + * will not be cached on the peer. this causes the fragment to be flushed as + * soon as it is sent. this allows request-based rma fragments to be completed + * so MPI_Test/MPI_Wait/etc will work as expected. */ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, int target, size_t request_len, ompi_osc_pt2pt_frag_t **buffer, - char **ptr) + char **ptr, bool long_send, bool buffered) { ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); ompi_osc_pt2pt_frag_t *curr; - int ret; /* osc pt2pt headers can have 64-bit values. these will need to be aligned * on an 8-byte boundary on some architectures so we up align the allocation @@ -64,51 +123,33 @@ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, in return OMPI_ERR_OUT_OF_RESOURCE; } + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, + "attempting to allocate buffer for %lu bytes to target %d. long send: %d, " + "buffered: %d", (unsigned long) request_len, target, long_send, buffered)); + OPAL_THREAD_LOCK(&module->lock); - curr = peer->active_frag; - if (NULL == curr || curr->remain_len < request_len) { - opal_free_list_item_t *item = NULL; - - if (NULL != curr) { - curr->remain_len = 0; - peer->active_frag = NULL; - opal_atomic_mb (); - - /* If there's something pending, the pending finish will - start the buffer. Otherwise, we need to start it now. */ - if (0 == OPAL_THREAD_ADD32(&curr->pending, -1)) { - ret = ompi_osc_pt2pt_frag_start(module, curr); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } + if (buffered) { + curr = peer->active_frag; + if (NULL == curr || curr->remain_len < request_len || (long_send && curr->pending_long_sends == 32)) { + curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); + if (OPAL_UNLIKELY(NULL == curr)) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_OUT_OF_RESOURCE; } - } - - item = opal_free_list_get (&mca_osc_pt2pt_component.frags); - if (OPAL_UNLIKELY(NULL == item)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - curr = peer->active_frag = (ompi_osc_pt2pt_frag_t*) item; - - curr->target = target; - - curr->header = (ompi_osc_pt2pt_frag_header_t*) curr->buffer; - curr->top = (char*) (curr->header + 1); - curr->remain_len = mca_osc_pt2pt_component.buffer_size; - curr->module = module; - curr->pending = 1; - curr->header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_FRAG; - curr->header->base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; - if (module->passive_target_access_epoch) { - curr->header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; + curr->pending_long_sends = long_send; + peer->active_frag = curr; + } else { + OPAL_THREAD_ADD32(&curr->header->num_ops, 1); + curr->pending_long_sends += long_send; } - curr->header->source = ompi_comm_rank(module->comm); - curr->header->num_ops = 0; - if (curr->remain_len < request_len) { + OPAL_THREAD_ADD32(&curr->pending, 1); + } else { + curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); + if (OPAL_UNLIKELY(NULL == curr)) { OPAL_THREAD_UNLOCK(&module->lock); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } } @@ -117,24 +158,8 @@ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, in curr->top += request_len; curr->remain_len -= request_len; - OPAL_THREAD_UNLOCK(&module->lock); - - OPAL_THREAD_ADD32(&curr->pending, 1); - OPAL_THREAD_ADD32(&curr->header->num_ops, 1); - return OMPI_SUCCESS; -} - - -/* - * Note: module lock must be held for this operation - */ -static inline int ompi_osc_pt2pt_frag_finish(ompi_osc_pt2pt_module_t *module, - ompi_osc_pt2pt_frag_t* buffer) -{ - if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { - return ompi_osc_pt2pt_frag_start(module, buffer); - } + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 0ddc4cf326e..099aa564624 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -244,6 +244,8 @@ static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module } } + } else { + lock->eager_send_active = true; } return OMPI_SUCCESS; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_request.c b/ompi/mca/osc/pt2pt/osc_pt2pt_request.c index eddccf5b426..6741036e110 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_request.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_request.c @@ -51,6 +51,7 @@ request_construct(ompi_osc_pt2pt_request_t *request) request->super.req_status._cancelled = 0; request->super.req_free = request_free; request->super.req_cancel = request_cancel; + request->outstanding_requests = 0; } OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_request_t, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_request.h b/ompi/mca/osc/pt2pt/osc_pt2pt_request.h index 07b9d53093e..dee5c86892d 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_request.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_request.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -57,6 +57,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_request_t); #define OMPI_OSC_PT2PT_REQUEST_RETURN(req) \ do { \ OMPI_REQUEST_FINI(&(req)->super); \ + (req)->outstanding_requests = 0; \ opal_free_list_return (&mca_osc_pt2pt_component.requests, \ (opal_free_list_item_t *) (req)); \ } while (0) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h index eee29645c22..f4e4adcae0a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -163,8 +163,10 @@ static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync) { int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1); if (0 == new_value) { + OPAL_THREAD_LOCK(&sync->lock); sync->eager_send_active = true; opal_condition_broadcast (&sync->cond); + OPAL_THREAD_UNLOCK(&sync->lock); } } diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index ef0b018409f..1400d396a7f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -180,8 +180,8 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) int my_rank = ompi_comm_rank (module->comm); ompi_osc_rdma_state_t *state = module->state; volatile bool atomic_complete; - ompi_osc_rdma_frag_t *frag; - osc_rdma_counter_t *temp; + ompi_osc_rdma_frag_t *frag = NULL; + osc_rdma_counter_t *temp = NULL; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post: %p, %d, %s", (void*) group, assert, win->w_name); @@ -193,7 +193,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) /* save the group */ OBJ_RETAIN(group); - ompi_group_increment_proc_count(group); OPAL_THREAD_LOCK(&module->lock); @@ -371,7 +370,6 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win /* save the group */ OBJ_RETAIN(group); - ompi_group_increment_proc_count(group); if (!(assert & MPI_MODE_NOCHECK)) { /* look through list of pending posts */ @@ -421,9 +419,11 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_sync_t *sync = &module->all_sync; + ompi_osc_rdma_frag_t *frag = NULL; ompi_osc_rdma_peer_t **peers; + void *scratch_lock = NULL; ompi_group_t *group; - int group_size; + int group_size, ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete: %s", win->w_name); @@ -440,7 +440,6 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) sync->epoch_active = false; /* phase 2 cleanup group */ - ompi_group_decrement_proc_count(group); OBJ_RELEASE(group); peers = sync->peer_list.peers; @@ -457,11 +456,18 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) ompi_osc_rdma_sync_rdma_complete (sync); + if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags)) { + /* need a temporary buffer for performing fetching atomics */ + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &scratch_lock); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + } + /* for each process in the group increment their number of complete messages */ for (int i = 0 ; i < group_size ; ++i) { ompi_osc_rdma_peer_t *peer = peers[i]; intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_complete_msgs); - int ret; if (!ompi_osc_rdma_peer_local_state (peer)) { do { @@ -471,8 +477,8 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) ompi_osc_rdma_atomic_complete, NULL, NULL); } else { /* don't care about the read value so use the scratch lock */ - ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, &module->state->scratch_lock, - target, module->state_handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, scratch_lock, + target, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, NULL, NULL); } @@ -485,6 +491,10 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) } } + if (frag) { + ompi_osc_rdma_frag_complete (frag); + } + /* release our reference to peers in this group */ ompi_osc_rdma_release_peers (peers, group_size); @@ -526,7 +536,6 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win) module->pw_group = NULL; OPAL_THREAD_UNLOCK(&module->lock); - ompi_group_decrement_proc_count(group); OBJ_RELEASE(group); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait complete"); @@ -571,7 +580,6 @@ int ompi_osc_rdma_test_atomic (ompi_win_t *win, int *flag) module->pw_group = NULL; OPAL_THREAD_UNLOCK(&(module->lock)); - ompi_group_decrement_proc_count(group); OBJ_RELEASE(group); return OMPI_SUCCESS; diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 3b9f8f8de27..5d9335613ce 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -15,6 +15,11 @@ #include "osc_rdma_dynamic.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" +#include "opal/align.h" + +static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, + mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, + ompi_osc_rdma_request_t *request); static void ompi_osc_get_data_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, mca_btl_base_registration_handle_t *local_handle, @@ -136,7 +141,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc ompi_osc_rdma_peer_t *peer, uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, int remote_count, ompi_datatype_t *remote_datatype, ompi_osc_rdma_request_t *request, const size_t max_rdma_len, - const ompi_osc_rdma_fn_t rdma_fn,const bool alloc_reqs) + const ompi_osc_rdma_fn_t rdma_fn, const bool alloc_reqs) { ompi_osc_rdma_module_t *module = sync->module; struct iovec local_iovec[OMPI_OSC_RDMA_DECODE_MAX], remote_iovec[OMPI_OSC_RDMA_DECODE_MAX]; @@ -484,7 +489,7 @@ static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_r ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); - OPAL_THREAD_SCOPED_LOCK(&sync->lock, opal_list_append (&sync->aggregations, (opal_list_item_t *) aggregation)); + opal_list_append (&sync->aggregations, (opal_list_item_t *) aggregation); return OMPI_SUCCESS; } @@ -575,11 +580,13 @@ static void ompi_osc_rdma_get_complete (struct mca_btl_base_module_t *btl, struc assert (OPAL_SUCCESS == status); - if (NULL != frag) { + if (request->buffer || NULL != frag) { if (OPAL_LIKELY(OMPI_SUCCESS == status)) { memcpy (origin_addr, (void *) source, request->len); } + } + if (NULL != frag) { ompi_osc_rdma_frag_complete (frag); } else { ompi_osc_rdma_deregister (sync->module, local_handle); @@ -621,6 +628,27 @@ int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer) } +static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, + mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, + ompi_osc_rdma_request_t *request) { + ompi_osc_rdma_module_t *module = sync->module; + ompi_osc_rdma_request_t *subreq; + int ret; + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, subreq); + subreq->internal = true; + subreq->type = OMPI_OSC_RDMA_TYPE_RDMA; + subreq->parent_request = request; + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + + ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OMPI_OSC_RDMA_REQUEST_RETURN(subreq); + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + } + + return ret; +} static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, @@ -639,33 +667,81 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p aligned_source_bound = (source_address + size + btl_alignment_mask) & ~btl_alignment_mask; aligned_len = aligned_source_bound - aligned_source_base; - request->offset = source_address - aligned_source_base; - request->len = size; - request->origin_addr = target_buffer; - request->sync = sync; - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating get of %lu bytes from remote ptr %" PRIx64 " to local ptr %p", size, source_address, target_buffer); if ((module->selected_btl->btl_register_mem && size > module->selected_btl->btl_get_local_registration_threshold) || (((uint64_t) target_buffer | size | source_address) & btl_alignment_mask)) { + ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - /* check for alignment */ - if (!(((uint64_t) target_buffer | size | source_address) & btl_alignment_mask)) { - (void) ompi_osc_rdma_register (module, peer->data_endpoint, target_buffer, size, MCA_BTL_REG_FLAG_LOCAL_WRITE, + if (OMPI_ERR_VALUE_OUT_OF_BOUNDS == ret) { + /* region is too large for a buffered read */ + size_t subsize; + + if ((source_address & btl_alignment_mask) && (source_address & btl_alignment_mask) == ((intptr_t) target_buffer & btl_alignment_mask)) { + /* remote region has the same alignment but the base is not aligned. perform a small + * buffered get of the beginning of the remote region */ + aligned_source_base = OPAL_ALIGN(source_address, module->selected_btl->btl_get_alignment, osc_rdma_base_t); + subsize = (size_t) (aligned_source_base - source_address); + + ret = ompi_osc_rdma_get_partial (sync, peer, source_address, source_handle, target_buffer, subsize, request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + source_address += subsize; + target_buffer = (void *) ((intptr_t) target_buffer + subsize); + size -= subsize; + + aligned_len = aligned_source_bound - aligned_source_base; + } + + if (!(((uint64_t) target_buffer | source_address) & btl_alignment_mask) && + (size & btl_alignment_mask)) { + /* remote region bases are aligned but the bounds are not. perform a + * small buffered get of the end of the remote region */ + aligned_len = size & ~btl_alignment_mask; + subsize = size - aligned_len; + size = aligned_len; + ret = ompi_osc_rdma_get_partial (sync, peer, source_address + aligned_len, source_handle, + (void *) ((intptr_t) target_buffer + aligned_len), subsize, request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + } + /* (remaining) user request is now correctly aligned */ + } + + if ((((uint64_t) target_buffer | size | source_address) & btl_alignment_mask)) { + /* local and remote alignments differ */ + request->buffer = ptr = malloc (aligned_len); + } else { + ptr = target_buffer; + } + + if (NULL != ptr) { + (void) ompi_osc_rdma_register (module, peer->data_endpoint, ptr, aligned_len, MCA_BTL_REG_FLAG_LOCAL_WRITE, &local_handle); } if (OPAL_UNLIKELY(NULL == local_handle)) { - return OMPI_ERR_OUT_OF_RESOURCE; + free (request->buffer); + request->buffer = NULL; + return ret; } } else { - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "using internal buffer %p in fragment %p for get", ptr, (void *) frag); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "using internal buffer %p in fragment %p for get of size %lu bytes, source address 0x%lx", + ptr, (void *) frag, aligned_len, (unsigned long) aligned_source_base); local_handle = frag->handle; } } + request->offset = source_address - aligned_source_base; + request->len = size; + request->origin_addr = target_buffer; + request->sync = sync; + ompi_osc_rdma_sync_rdma_inc (sync); do { diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 066e7777fc9..1f5913289ff 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -9,11 +9,13 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2008 University of Houston. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,7 +38,11 @@ #include "opal/threads/mutex.h" #include "opal/util/arch.h" +#include "opal/util/argv.h" #include "opal/align.h" +#if OPAL_CUDA_SUPPORT +#include "opal/datatype/opal_datatype_cuda.h" +#endif /* OPAL_CUDA_SUPPORT */ #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" @@ -259,14 +265,21 @@ static int ompi_osc_rdma_component_init (bool enable_progress_threads, } OBJ_CONSTRUCT(&mca_osc_rdma_component.aggregate, opal_free_list_t); - ret = opal_free_list_init (&mca_osc_rdma_component.aggregate, - sizeof(ompi_osc_rdma_aggregation_t), 8, - OBJ_CLASS(ompi_osc_rdma_aggregation_t), 0, 0, - 32, 128, 32, NULL, 0, NULL, NULL, NULL); - if (OPAL_SUCCESS != ret) { - opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "%s:%d: opal_free_list_init failed: %d\n", - __FILE__, __LINE__, ret); + + if (!enable_mpi_threads && mca_osc_rdma_component.aggregation_limit) { + ret = opal_free_list_init (&mca_osc_rdma_component.aggregate, + sizeof(ompi_osc_rdma_aggregation_t), 8, + OBJ_CLASS(ompi_osc_rdma_aggregation_t), 0, 0, + 32, 128, 32, NULL, 0, NULL, NULL, NULL); + + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: opal_free_list_init failed: %d\n", + __FILE__, __LINE__, ret); + } + } else { + /* only enable put aggregation when not using threads */ + mca_osc_rdma_component.aggregation_limit = 0; } return ret; @@ -303,6 +316,15 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s return -1; } +#if OPAL_CUDA_SUPPORT + /* GPU buffers are not supported by the rdma component */ + if (MPI_WIN_FLAVOR_CREATE == flavor) { + if (opal_cuda_check_bufs(*base, NULL)) { + return -1; + } + } +#endif /* OPAL_CUDA_SUPPORT */ + if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) { return -1; } @@ -605,7 +627,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s peer->state_handle = (mca_btl_base_registration_handle_t *) state_region->btl_handle_data; } peer->state = (osc_rdma_counter_t) ((uintptr_t) state_region->base + state_base + module->state_size * i); - peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, peer_rank); + peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank); } /* finish setting up the local peer structure */ @@ -985,7 +1007,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, } /* initialize the objects, so that always free in cleanup */ - OBJ_CONSTRUCT(&module->lock, opal_mutex_t); + OBJ_CONSTRUCT(&module->lock, opal_recursive_mutex_t); OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); OBJ_CONSTRUCT(&module->peer_lock, opal_mutex_t); diff --git a/ompi/mca/osc/rdma/osc_rdma_frag.h b/ompi/mca/osc/rdma/osc_rdma_frag.h index 6a5215f770f..e9636a24d25 100644 --- a/ompi/mca/osc/rdma/osc_rdma_frag.h +++ b/ompi/mca/osc/rdma/osc_rdma_frag.h @@ -60,7 +60,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size request_len = OPAL_ALIGN(request_len, 8, size_t); if (request_len > (mca_osc_rdma_component.buffer_size >> 1)) { - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_VALUE_OUT_OF_BOUNDS; } OPAL_THREAD_LOCK(&module->lock); @@ -73,9 +73,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size module->rdma_frag = NULL; if (curr) { - OPAL_THREAD_UNLOCK(&module->lock); ompi_osc_rdma_frag_complete (curr); - OPAL_THREAD_LOCK(&module->lock); } item = opal_free_list_get (&mca_osc_rdma_component.frags); diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 5d33f09e660..67e78a2a68e 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -49,8 +49,8 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod ompi_osc_rdma_lock_t value, ptrdiff_t offset) { uint64_t lock = (uint64_t) (intptr_t) peer->state + offset; - void *temp = &module->state->scratch_lock; volatile bool atomic_complete = false; + void *temp = NULL; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing shared lock %" PRIx64 " on peer %d. value 0x%lx", lock, @@ -58,24 +58,36 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod /* spin until the lock has been acquired */ if (!ompi_osc_rdma_peer_local_state (peer)) { + ompi_osc_rdma_frag_t *frag = NULL; + if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) { ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, (intptr_t) lock, peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL); } else { - ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) lock, module->state_handle, - peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER, - ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL); + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) lock, + frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0, + MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, + NULL); } if (OPAL_SUCCESS == ret) { while (!atomic_complete) { ompi_osc_rdma_progress (module); } - } else if (1 == OPAL_SUCCESS) { + } else if (1 == ret) { ret = OMPI_SUCCESS; } + if (frag) { + ompi_osc_rdma_frag_complete (frag); + } + return ret; } else { (void) ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value); @@ -105,7 +117,7 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod { uint64_t lock = (uint64_t) peer->state + offset; volatile bool atomic_complete; - ompi_osc_rdma_lock_t *temp; + ompi_osc_rdma_lock_t *temp = NULL; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "acquiring shared lock %" PRIx64 " on peer %d. value 0x%lx", lock, @@ -279,32 +291,40 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t * ptrdiff_t offset) { uint64_t lock = (uint64_t) (intptr_t) peer->state + offset; - void *temp = &module->state->scratch_lock; volatile bool atomic_complete = false; + void *temp = NULL; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock %" PRIx64 " on peer %d", lock, peer->rank); if (!ompi_osc_rdma_peer_local_state (peer)) { + ompi_osc_rdma_frag_t *frag = NULL; + if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) { ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, lock, peer->state_handle, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL); } else { - ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, lock, module->state_handle, + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, lock, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL); } - if (OPAL_UNLIKELY(OMPI_SUCCESS > ret)) { - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "error releasing exclusive lock"); - return ret; - } - if (OPAL_SUCCESS == ret) { while (!atomic_complete) { ompi_osc_rdma_progress (module); } + } else if (1 == ret) { + ret = OMPI_SUCCESS; + } + + if (frag) { + ompi_osc_rdma_frag_complete (frag); } } else { ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock); diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.c b/ompi/mca/osc/rdma/osc_rdma_peer.c index 8b59607cf94..f7a7bde7700 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.c +++ b/ompi/mca/osc/rdma/osc_rdma_peer.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -9,6 +11,10 @@ * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "osc_rdma_comm.h" #include "ompi/mca/bml/base/base.h" @@ -179,6 +185,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "peer %d: remote base region: 0x%" PRIx64 ", size: %" PRId64 ", flags: 0x%x, disp_unit: %d", peer->rank, base_region->base, base_region->len, peer->flags, disp_unit); + (void)disp_unit; // silence compiler warning if (ompi_osc_rdma_peer_local_base (peer)) { /* for now we store the local address in the standard place. do no overwrite it */ diff --git a/ompi/mca/osc/rdma/osc_rdma_request.c b/ompi/mca/osc/rdma/osc_rdma_request.c index a5a8bb8084e..86b088b1b6a 100644 --- a/ompi/mca/osc/rdma/osc_rdma_request.c +++ b/ompi/mca/osc/rdma/osc_rdma_request.c @@ -59,7 +59,10 @@ static void request_construct(ompi_osc_rdma_request_t *request) request->super.req_free = request_free; request->super.req_cancel = request_cancel; request->super.req_complete_cb = request_complete; - request->parent_request = 0; + request->parent_request = NULL; + request->buffer = NULL; + request->internal = false; + request->outstanding_requests = 0; OBJ_CONSTRUCT(&request->convertor, opal_convertor_t); } diff --git a/ompi/mca/osc/rdma/osc_rdma_request.h b/ompi/mca/osc/rdma/osc_rdma_request.h index ad10e4c69a5..3cec365a7aa 100644 --- a/ompi/mca/osc/rdma/osc_rdma_request.h +++ b/ompi/mca/osc/rdma/osc_rdma_request.h @@ -60,6 +60,7 @@ struct ompi_osc_rdma_request_t { /** synchronization object */ struct ompi_osc_rdma_sync_t *sync; + void *buffer; }; typedef struct ompi_osc_rdma_request_t ompi_osc_rdma_request_t; OBJ_CLASS_DECLARATION(ompi_osc_rdma_request_t); @@ -78,18 +79,19 @@ OBJ_CLASS_DECLARATION(ompi_osc_rdma_request_t); req = (ompi_osc_rdma_request_t*) item; \ OMPI_REQUEST_INIT(&req->super, false); \ req->super.req_mpi_object.win = module->win; \ - req->super.req_complete = false; \ req->super.req_state = OMPI_REQUEST_ACTIVE; \ req->module = rmodule; \ - req->internal = false; \ - req->outstanding_requests = 0; \ - req->parent_request = NULL; \ req->peer = (rpeer); \ } while (0) #define OMPI_OSC_RDMA_REQUEST_RETURN(req) \ do { \ OMPI_REQUEST_FINI(&(req)->super); \ + free ((req)->buffer); \ + (req)->buffer = NULL; \ + (req)->parent_request = NULL; \ + (req)->internal = false; \ + (req)->outstanding_requests = 0; \ opal_free_list_return (&mca_osc_rdma_component.requests, \ (opal_free_list_item_t *) (req)); \ } while (0) diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 47d99155764..123238d0209 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -151,8 +151,6 @@ struct ompi_osc_rdma_state_t { ompi_osc_rdma_lock_t local_lock; /** lock for the accumulate state to ensure ordering and consistency */ ompi_osc_rdma_lock_t accumulate_lock; - /** persistent scratch space for fetch and op/cswap when the result is not needed */ - ompi_osc_rdma_lock_t scratch_lock; /** current index to post to. compare-and-swap must be used to ensure * the index is free */ osc_rdma_counter_t post_index; diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 672850be996..fdd1117eafd 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -315,6 +315,8 @@ ompi_osc_sm_test(struct ompi_win_t *win, OBJ_RELEASE(module->post_group); module->post_group = NULL; *flag = 1; + } else { + *flag = 0; } OPAL_THREAD_UNLOCK(&module->lock); diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 2e2948e7a2d..6eebf0e11c9 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -5,7 +5,7 @@ * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -208,10 +208,9 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->posts[0] = (uint64_t *) (module->posts + 1); } else { unsigned long total, *rbuf; - char *data_file; int i, flag; size_t pagesize; - size_t state_size; + size_t state_size; int posts_size, post_size = (comm_size + 63) / 64; OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, @@ -245,22 +244,24 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit total += rbuf[i]; } - if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s", - ompi_process_info.job_session_dir, - ompi_comm_get_cid(module->comm), - ompi_process_info.nodename) < 0) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - /* user opal/shmem directly to create a shared memory segment */ state_size = sizeof(ompi_osc_sm_global_state_t) + sizeof(ompi_osc_sm_node_state_t) * comm_size; posts_size = comm_size * post_size * sizeof (uint64_t); - if (0 == ompi_comm_rank (module->comm)) { - ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size + posts_size); - if (OPAL_SUCCESS != ret) { - goto error; - } - } + if (0 == ompi_comm_rank (module->comm)) { + char *data_file; + if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s", + ompi_process_info.proc_session_dir, + ompi_comm_get_cid(module->comm), + ompi_process_info.nodename) < 0) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size + posts_size); + free(data_file); + if (OPAL_SUCCESS != ret) { + goto error; + } + } ret = module->comm->c_coll.coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, module->comm, module->comm->c_coll.coll_bcast_module); diff --git a/ompi/mca/pml/bfo/pml_bfo_cuda.c b/ompi/mca/pml/bfo/pml_bfo_cuda.c index 9c593cd691d..eb35b226e0e 100644 --- a/ompi/mca/pml/bfo/pml_bfo_cuda.c +++ b/ompi/mca/pml/bfo/pml_bfo_cuda.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +50,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size) { int rc; -#if OPAL_CUDA_SUPPORT_41 sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { unsigned char *base; @@ -81,10 +80,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq, sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); } -#else - /* Just do the rendezvous but set initial data to be sent to zero */ - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); -#endif /* OPAL_CUDA_SUPPORT_41 */ return rc; } diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h index 2c697910d9c..db04c7a77f6 100644 --- a/ompi/mca/pml/cm/pml_cm.h +++ b/ompi/mca/pml/cm/pml_cm.h @@ -16,6 +16,10 @@ #ifndef PML_CM_H #define PML_CM_H +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "ompi_config.h" #include "ompi/request/request.h" #include "ompi/mca/pml/pml.h" diff --git a/ompi/mca/pml/monitoring/Makefile.am b/ompi/mca/pml/monitoring/Makefile.am new file mode 100644 index 00000000000..517af90c0fd --- /dev/null +++ b/ompi/mca/pml/monitoring/Makefile.am @@ -0,0 +1,38 @@ +# +# Copyright (c) 2013-2015 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2013-2015 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +monitoring_sources = \ + pml_monitoring.c \ + pml_monitoring.h \ + pml_monitoring_comm.c \ + pml_monitoring_component.c \ + pml_monitoring_iprobe.c \ + pml_monitoring_irecv.c \ + pml_monitoring_isend.c \ + pml_monitoring_start.c + +if MCA_BUILD_ompi_pml_monitoring_DSO +component_noinst = +component_install = mca_pml_monitoring.la +else +component_noinst = libmca_pml_monitoring.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pml_monitoring_la_SOURCES = $(monitoring_sources) +mca_pml_monitoring_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pml_monitoring_la_SOURCES = $(monitoring_sources) +libmca_pml_monitoring_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/monitoring/README b/ompi/mca/pml/monitoring/README new file mode 100644 index 00000000000..8361027d658 --- /dev/null +++ b/ompi/mca/pml/monitoring/README @@ -0,0 +1,181 @@ + + Copyright (c) 2013-2015 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. + Copyright (c) 2013-2015 Inria. All rights reserved. + $COPYRIGHT$ + + Additional copyrights may follow + + $HEADER$ + +=========================================================================== + +Low level communication monitoring interface in Open MPI + +Introduction +------------ +This interface traces and monitors all messages sent by MPI before they go to the +communication channels. At that levels all communication are point-to-point communications: +collectives are already decomposed in send and receive calls. + +The monitoring is stored internally by each process and output on stderr at the end of the +application (during MPI_Finalize()). + + +Enabling the monitoring +----------------------- +To enable the monitoring add --mca pml_monitoring_enable x to the mpirun command line. +If x = 1 it monitors internal and external tags indifferently and aggregate everything. +If x = 2 it monitors internal tags and external tags separately. +If x = 0 the monitoring is disabled. +Other value of x are not supported. + +Internal tags are tags < 0. They are used to tag send and receive coming from +collective operations or from protocol communications + +External tags are tags >=0. They are used by the application in point-to-point communication. + +Therefore, distinguishing external and internal tags help to distinguish between point-to-point +and other communication (mainly collectives). + +Output format +------------- +The output of the monitoring looks like (with --mca pml_monitoring_enable 2): +I 0 1 108 bytes 27 msgs sent +E 0 1 1012 bytes 30 msgs sent +E 0 2 23052 bytes 61 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 208 bytes 52 msgs sent +E 1 0 860 bytes 24 msgs sent +E 1 3 2552 bytes 56 msgs sent +I 2 3 104 bytes 26 msgs sent +E 2 0 22804 bytes 49 msgs sent +E 2 3 860 bytes 24 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 204 bytes 51 msgs sent +E 3 1 2304 bytes 44 msgs sent +E 3 2 860 bytes 24 msgs sent + +Where: + - the first column distinguishes internal (I) and external (E) tags. + - the second column is the sender rank + - the third column is the receiver rank + - the fourth column is the number of bytes sent + - the last column is the number of messages. + +In this example process 0 as sent 27 messages to process 1 using point-to-point call +for 108 bytes and 30 messages with collectives and protocol related communication +for 1012 bytes to process 1. + +If the monitoring was called with --mca pml_monitoring_enable 1 everything is aggregated +under the internal tags. With te above example, you have: +I 0 1 1120 bytes 57 msgs sent +I 0 2 23052 bytes 61 msgs sent +I 1 0 860 bytes 24 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 2760 bytes 108 msgs sent +I 2 0 22804 bytes 49 msgs sent +I 2 3 964 bytes 50 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 2508 bytes 95 msgs sent +I 3 2 860 bytes 24 msgs sent + +Monitoring phases +----------------- +If one wants to monitor phases of the application, it is possible to flush the monitoring +at the application level. In this case all the monitoring since the last flush is stored +by every process in a file. + +An example of how to flush such monitoring is given in test/monitoring/monitoring_test.c + +Moreover, all the different flushed phased are aggregated at runtime and output at the end +of the application as described above. + +Example +------- +A working example is given in test/monitoring/monitoring_test.c +It features, MPI_COMM_WORLD monitoring , sub-communicator monitoring, collective and +point-to-point communication monitoring and phases monitoring + +To compile: +> make monitoring_test + +Helper scripts +-------------- +Two perl scripts are provided in test/monitoring +- aggregate_profile.pl is for aggregating monitoring phases of different processes + This script aggregates the profiles generated by the flush_monitoring function. + The files need to be in in given format: name__ + They are then aggregated by phases. + If one needs the profile of all the phases he can concatenate the different files, + or use the output of the monitoring system done at MPI_Finalize + in the example it should be call as: + ./aggregate_profile.pl prof/phase to generate + prof/phase_1.prof + prof/phase_2.prof + +- profile2mat.pl is for transforming a the monitoring output into a communication matrix. + Take a profile file and aggregates all the recorded communicator into matrices. + It generated a matrices for the number of messages, (msg), + for the total bytes transmitted (size) and + the average number of bytes per messages (avg) + + The output matrix is symmetric + +Do not forget to enable the execution right to these scripts. + +For instance, the provided examples store phases output in ./prof + +If you type: +> mpirun -np 4 --mca pml_monitoring_enable 2 ./monitoring_test +you should have the following output +Proc 3 flushing monitoring to: ./prof/phase_1_3.prof +Proc 0 flushing monitoring to: ./prof/phase_1_0.prof +Proc 2 flushing monitoring to: ./prof/phase_1_2.prof +Proc 1 flushing monitoring to: ./prof/phase_1_1.prof +Proc 1 flushing monitoring to: ./prof/phase_2_1.prof +Proc 3 flushing monitoring to: ./prof/phase_2_3.prof +Proc 0 flushing monitoring to: ./prof/phase_2_0.prof +Proc 2 flushing monitoring to: ./prof/phase_2_2.prof +I 2 3 104 bytes 26 msgs sent +E 2 0 22804 bytes 49 msgs sent +E 2 3 860 bytes 24 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 204 bytes 51 msgs sent +E 3 1 2304 bytes 44 msgs sent +E 3 2 860 bytes 24 msgs sent +I 0 1 108 bytes 27 msgs sent +E 0 1 1012 bytes 30 msgs sent +E 0 2 23052 bytes 61 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 208 bytes 52 msgs sent +E 1 0 860 bytes 24 msgs sent +E 1 3 2552 bytes 56 msgs sent + +you can parse the phases with: +> /aggregate_profile.pl prof/phase +Building prof/phase_1.prof +Building prof/phase_2.prof + +And you can build the different communication matrices of phase 1 with: +> ./profile2mat.pl prof/phase_1.prof +prof/phase_1.prof -> all +prof/phase_1_size_all.mat +prof/phase_1_msg_all.mat +prof/phase_1_avg_all.mat + +prof/phase_1.prof -> external +prof/phase_1_size_external.mat +prof/phase_1_msg_external.mat +prof/phase_1_avg_external.mat + +prof/phase_1.prof -> internal +prof/phase_1_size_internal.mat +prof/phase_1_msg_internal.mat +prof/phase_1_avg_internal.mat + +Credit +------ +Designed by George Bosilca and +Emmanuel Jeannot diff --git a/ompi/mca/pml/monitoring/pml_monitoring.c b/ompi/mca/pml/monitoring/pml_monitoring.c new file mode 100644 index 00000000000..cd848b5f4ce --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include "opal/class/opal_hash_table.h" + +/* array for stroring monitoring data*/ +uint64_t* sent_data = NULL; +uint64_t* messages_count = NULL; +uint64_t* filtered_sent_data = NULL; +uint64_t* filtered_messages_count = NULL; + +static int init_done = 0; +static int nbprocs = -1; +static int my_rank = -1; +opal_hash_table_t *translation_ht = NULL; + + +mca_pml_monitoring_module_t mca_pml_monitoring = { + mca_pml_monitoring_add_procs, + mca_pml_monitoring_del_procs, + mca_pml_monitoring_enable, + NULL, + mca_pml_monitoring_add_comm, + mca_pml_monitoring_del_comm, + mca_pml_monitoring_irecv_init, + mca_pml_monitoring_irecv, + mca_pml_monitoring_recv, + mca_pml_monitoring_isend_init, + mca_pml_monitoring_isend, + mca_pml_monitoring_send, + mca_pml_monitoring_iprobe, + mca_pml_monitoring_probe, + mca_pml_monitoring_start, + mca_pml_monitoring_improbe, + mca_pml_monitoring_mprobe, + mca_pml_monitoring_imrecv, + mca_pml_monitoring_mrecv, + mca_pml_monitoring_dump, + NULL, + 65535, + INT_MAX +}; + +int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, + size_t nprocs) +{ + /** + * Create the monitoring hashtable only for my MPI_COMM_WORLD. We choose + * to ignore by now all other processes. + */ + if(NULL == translation_ht) { + size_t i; + uint64_t key; + opal_process_name_t tmp; + + nbprocs = nprocs; + + translation_ht = OBJ_NEW(opal_hash_table_t); + opal_hash_table_init(translation_ht, 2048); + + + for( i = 0; i < nprocs; i++ ) { + /* rank : ompi_proc_local_proc in procs */ + if( procs[i] == ompi_proc_local_proc) + my_rank = i; + /* Extract the peer procname from the procs array */ + if( ompi_proc_is_sentinel(procs[i]) ) { + tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]); + } else { + tmp = procs[i]->super.proc_name; + } + key = *((uint64_t*)&tmp); + /* store the rank (in COMM_WORLD) of the process + with its name (a uniq opal ID) as key in the hash table*/ + if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(translation_ht, + key, (void*)(uintptr_t)i) ) { + return OMPI_ERR_OUT_OF_RESOURCE; /* failed to allocate memory or growing the hash table */ + } + } + } + return pml_selected_module.pml_add_procs(procs, nprocs); +} + + +int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, + size_t nprocs) +{ + return pml_selected_module.pml_del_procs(procs, nprocs); +} + +int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, + int verbose) +{ + return pml_selected_module.pml_dump(comm, verbose); +} + + +void finalize_monitoring( void ) +{ + free(filtered_sent_data); + free(filtered_messages_count); + free(sent_data); + free(messages_count); + opal_hash_table_remove_all( translation_ht ); + free(translation_ht); + +} + +static void initialize_monitoring( void ) +{ + sent_data = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); + messages_count = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); + filtered_sent_data = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); + filtered_messages_count = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); + + init_done = 1; +} + +void mca_pml_monitoring_reset( void ) +{ + if( !init_done ) return; + memset(sent_data, 0, nbprocs * sizeof(uint64_t)); + memset(messages_count, 0, nbprocs * sizeof(uint64_t)); + memset(filtered_sent_data, 0, nbprocs * sizeof(uint64_t)); + memset(filtered_messages_count, 0, nbprocs * sizeof(uint64_t)); +} + +void monitor_send_data(int world_rank, size_t data_size, int tag) +{ + if( 0 == filter_monitoring() ) return; /* right now the monitoring is not started */ + + if ( !init_done ) + initialize_monitoring(); + + /* distinguishses positive and negative tags if requested */ + if((tag<0) && (1 == filter_monitoring())){ + filtered_sent_data[world_rank] += data_size; + filtered_messages_count[world_rank]++; + } else { /* if filtered monitoring is not activated data is aggregated indifferently */ + sent_data[world_rank] += data_size; + messages_count[world_rank]++; + } +} + +int mca_pml_monitoring_get_messages_count (const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int comm_size = ompi_comm_size (comm); + uint64_t *values = (uint64_t*) value; + int i; + + if(comm != &ompi_mpi_comm_world.comm || NULL == messages_count) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = messages_count[i]; + } + + return OMPI_SUCCESS; +} + +int mca_pml_monitoring_get_messages_size (const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int comm_size = ompi_comm_size (comm); + uint64_t *values = (uint64_t*) value; + int i; + + if(comm != &ompi_mpi_comm_world.comm || NULL == sent_data) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = sent_data[i]; + } + + return OMPI_SUCCESS; +} + +static void output_monitoring( FILE *pf ) +{ + if( 0 == filter_monitoring() ) return; /* if disabled do nothing */ + + for (int i = 0 ; i < nbprocs ; i++) { + if(sent_data[i] > 0) { + fprintf(pf, "I\t%d\t%d\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\n", + my_rank, i, sent_data[i], messages_count[i]); + } + } + + if( 1 == filter_monitoring() ) return; + + for (int i = 0 ; i < nbprocs ; i++) { + if(filtered_sent_data[i] > 0) { + fprintf(pf, "E\t%d\t%d\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\n", + my_rank, i, filtered_sent_data[i], filtered_messages_count[i]); + } + } +} + + +/* + Flushes the monitoring into filename + Useful for phases (see example in test/monitoring) +*/ +int ompi_mca_pml_monitoring_flush(char* filename) +{ + FILE *pf = stderr; + + if ( !init_done ) return -1; + + if( NULL != filename ) + pf = fopen(filename, "w"); + + if(!pf) + return -1; + + fprintf(stderr, "Proc %d flushing monitoring to: %s\n", my_rank, filename); + output_monitoring( pf ); + + if( NULL != filename ) + fclose(pf); + return 0; +} diff --git a/ompi/mca/pml/monitoring/pml_monitoring.h b/ompi/mca/pml/monitoring/pml_monitoring.h new file mode 100644 index 00000000000..efd9a5b0686 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2015 Bull SAS. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PML_MONITORING_H +#define MCA_PML_MONITORING_H + +BEGIN_C_DECLS + +#include +#include +#include +#include +#include +#include + +typedef mca_pml_base_module_t mca_pml_monitoring_module_t; + +extern mca_pml_base_component_t pml_selected_component; +extern mca_pml_base_module_t pml_selected_module; +extern mca_pml_monitoring_module_t mca_pml_monitoring; +OMPI_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_monitoring_component; + +/* + * PML interface functions. + */ + +extern int mca_pml_monitoring_add_comm(struct ompi_communicator_t* comm); + +extern int mca_pml_monitoring_del_comm(struct ompi_communicator_t* comm); + +extern int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, + size_t nprocs); + +extern int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, + size_t nprocs); + +extern int mca_pml_monitoring_enable(bool enable); + +extern int mca_pml_monitoring_iprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_probe(int dst, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_improbe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + struct ompi_message_t **message, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_mprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + struct ompi_message_t **message, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_isend_init(const void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_isend(const void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_send(const void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm); + +extern int mca_pml_monitoring_irecv_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_irecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_recv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status); + +extern int mca_pml_monitoring_imrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_mrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + ompi_status_public_t* status); + +extern int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, + int verbose); + +extern int mca_pml_monitoring_start(size_t count, + ompi_request_t** requests); + +int mca_pml_monitoring_get_messages_count (const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +int mca_pml_monitoring_get_messages_size (const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +void finalize_monitoring( void ); +int filter_monitoring( void ); +void mca_pml_monitoring_reset( void ); +int ompi_mca_pml_monitoring_flush(char* filename); +void monitor_send_data(int world_rank, size_t data_size, int tag); + +END_C_DECLS + +#endif /* MCA_PML_MONITORING_H */ diff --git a/ompi/mca/pml/monitoring/pml_monitoring_comm.c b/ompi/mca/pml/monitoring/pml_monitoring_comm.c new file mode 100644 index 00000000000..1200f7ad714 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_comm.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +int mca_pml_monitoring_add_comm(struct ompi_communicator_t* comm) +{ + return pml_selected_module.pml_add_comm(comm); +} + +int mca_pml_monitoring_del_comm(struct ompi_communicator_t* comm) +{ + return pml_selected_module.pml_del_comm(comm); +} diff --git a/ompi/mca/pml/monitoring/pml_monitoring_component.c b/ompi/mca/pml/monitoring/pml_monitoring_component.c new file mode 100644 index 00000000000..017728ddfa1 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_component.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include + +static int mca_pml_monitoring_enabled = 0; +static int mca_pml_monitoring_active = 0; +static int mca_pml_monitoring_current_state = 0; +static char* mca_pml_monitoring_current_filename = NULL; +mca_pml_base_component_t pml_selected_component = {{0}}; +mca_pml_base_module_t pml_selected_module = {0}; + +/* Return the current status of the monitoring system 0 if off, 1 if the + * seperation between internal tags and external tags is enabled. Any other + * positive value if the segregation between point-to-point and collective is + * disabled. + */ +int filter_monitoring( void ) +{ + return mca_pml_monitoring_current_state; +} + +static int +mca_pml_monitoring_set_flush(struct mca_base_pvar_t *pvar, const void *value, void *obj) +{ + if( NULL != mca_pml_monitoring_current_filename ) + free(mca_pml_monitoring_current_filename); + if( NULL == value ) /* No more output */ + mca_pml_monitoring_current_filename = NULL; + else { + mca_pml_monitoring_current_filename = strdup((char*)value); + if( NULL == mca_pml_monitoring_current_filename ) + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} + +static int +mca_pml_monitoring_get_flush(const struct mca_base_pvar_t *pvar, void *value, void *obj) +{ + return OMPI_SUCCESS; +} + +static int +mca_pml_monitoring_notify_flush(struct mca_base_pvar_t *pvar, mca_base_pvar_event_t event, + void *obj, int *count) +{ + switch (event) { + case MCA_BASE_PVAR_HANDLE_BIND: + mca_pml_monitoring_reset(); + *count = (NULL == mca_pml_monitoring_current_filename ? 0 : strlen(mca_pml_monitoring_current_filename)); + case MCA_BASE_PVAR_HANDLE_UNBIND: + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_START: + mca_pml_monitoring_current_state = mca_pml_monitoring_enabled; + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_STOP: + if( 0 == ompi_mca_pml_monitoring_flush(mca_pml_monitoring_current_filename) ) + return OMPI_SUCCESS; + } + return OMPI_ERROR; +} + +static int +mca_pml_monitoring_messages_notify(mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj_handle, + int *count) +{ + switch (event) { + case MCA_BASE_PVAR_HANDLE_BIND: + /* Return the size of the communicator as the number of values */ + *count = ompi_comm_size ((ompi_communicator_t *) obj_handle); + case MCA_BASE_PVAR_HANDLE_UNBIND: + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_START: + mca_pml_monitoring_current_state = mca_pml_monitoring_enabled; + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_STOP: + mca_pml_monitoring_current_state = 0; + return OMPI_SUCCESS; + } + + return OMPI_ERROR; +} + +int mca_pml_monitoring_enable(bool enable) +{ + /* If we reach this point we were succesful at hijacking the interface of + * the real PML, and we are now correctly interleaved between the upper + * layer and the real PML. + */ + (void)mca_base_pvar_register("ompi", "pml", "monitoring", "flush", "Flush the monitoring information" + "in the provided file", OPAL_INFO_LVL_1, MCA_BASE_PVAR_CLASS_GENERIC, + MCA_BASE_VAR_TYPE_STRING, NULL, MPI_T_BIND_NO_OBJECT, + 0, + mca_pml_monitoring_get_flush, mca_pml_monitoring_set_flush, + mca_pml_monitoring_notify_flush, &mca_pml_monitoring_component); + + (void)mca_base_pvar_register("ompi", "pml", "monitoring", "messages_count", "Number of messages " + "sent to each peer in a communicator", OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_pml_monitoring_get_messages_count, NULL, mca_pml_monitoring_messages_notify, NULL); + + (void)mca_base_pvar_register("ompi", "pml", "monitoring", "messages_size", "Size of messages " + "sent to each peer in a communicator", OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_pml_monitoring_get_messages_size, NULL, mca_pml_monitoring_messages_notify, NULL); + + return pml_selected_module.pml_enable(enable); +} + +static int mca_pml_monitoring_component_open(void) +{ + if( mca_pml_monitoring_enabled ) { + opal_pointer_array_add(&mca_pml_base_pml, + strdup(mca_pml_monitoring_component.pmlm_version.mca_component_name)); + } + return OMPI_SUCCESS; +} + +static int mca_pml_monitoring_component_close(void) +{ + if( NULL != mca_pml_monitoring_current_filename ) { + free(mca_pml_monitoring_current_filename); + mca_pml_monitoring_current_filename = NULL; + } + if( !mca_pml_monitoring_enabled ) + return OMPI_SUCCESS; + + /** + * If this component is already active, then we are currently monitoring the execution + * and this close if the one from MPI_Finalize. Do the clean up and release the extra + * reference on ourselves. + */ + if( mca_pml_monitoring_active ) { /* Already active, turn off */ + pml_selected_component.pmlm_version.mca_close_component(); + memset(&pml_selected_component, 0, sizeof(mca_pml_base_component_t)); + memset(&pml_selected_module, 0, sizeof(mca_pml_base_module_t)); + mca_base_component_repository_release((mca_base_component_t*)&mca_pml_monitoring_component); + mca_pml_monitoring_active = 0; + return OMPI_SUCCESS; + } + + /** + * We are supposed to monitor the execution. Save the winner PML component and + * module, and swap it with ourselves. Increase our refcount so that we are + * not dlclose. + */ + if( OPAL_SUCCESS != mca_base_component_repository_retain_component(mca_pml_monitoring_component.pmlm_version.mca_type_name, + mca_pml_monitoring_component.pmlm_version.mca_component_name) ) { + return OMPI_ERROR; + } + + /* Save a copy of the selected PML */ + pml_selected_component = mca_pml_base_selected_component; + pml_selected_module = mca_pml; + /* Install our interception layer */ + mca_pml_base_selected_component = mca_pml_monitoring_component; + mca_pml = mca_pml_monitoring; + /* Restore some of the original valued: progress, flags, tags and context id */ + mca_pml.pml_progress = pml_selected_module.pml_progress; + mca_pml.pml_max_contextid = pml_selected_module.pml_max_contextid; + mca_pml.pml_max_tag = pml_selected_module.pml_max_tag; + mca_pml.pml_flags = pml_selected_module.pml_flags; + + mca_pml_monitoring_active = 1; + + return OMPI_SUCCESS; +} + +static mca_pml_base_module_t* +mca_pml_monitoring_component_init(int* priority, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + if( mca_pml_monitoring_enabled ) { + *priority = 0; /* I'm up but don't select me */ + return &mca_pml_monitoring; + } + return NULL; +} + +static int mca_pml_monitoring_component_finish(void) +{ + if( mca_pml_monitoring_enabled && mca_pml_monitoring_active ) { + /* Free internal data structure */ + finalize_monitoring(); + /* Call the original PML and then close */ + mca_pml_monitoring_active = 0; + mca_pml_monitoring_enabled = 0; + /* Restore the original PML */ + mca_pml_base_selected_component = pml_selected_component; + mca_pml = pml_selected_module; + /* Redirect the close call to the original PML */ + pml_selected_component.pmlm_finalize(); + /** + * We should never release the last ref on the current component or face forever punishement. + */ + /* mca_base_component_repository_release(&mca_pml_monitoring_component.pmlm_version); */ + } + return OMPI_SUCCESS; +} + +static int mca_pml_monitoring_component_register(void) +{ + (void)mca_base_component_var_register(&mca_pml_monitoring_component.pmlm_version, "enable", + "Enable the monitoring at the PML level. A value of 0 will disable the monitoring (default). " + "A value of 1 will aggregate all monitoring information (point-to-point and collective). " + "Any other value will enable filtered monitoring", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_4, + MCA_BASE_VAR_SCOPE_READONLY, &mca_pml_monitoring_enabled); + + return OMPI_SUCCESS; +} + +mca_pml_base_component_2_0_0_t mca_pml_monitoring_component = { + + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + .pmlm_version = { + MCA_PML_BASE_VERSION_2_0_0, + + .mca_component_name = "monitoring", /* MCA component name */ + .mca_component_major_version = OMPI_MAJOR_VERSION, /* MCA component major version */ + .mca_component_minor_version = OMPI_MINOR_VERSION, /* MCA component minor version */ + .mca_component_release_version = OMPI_RELEASE_VERSION, /* MCA component release version */ + .mca_open_component = mca_pml_monitoring_component_open, /* component open */ + .mca_close_component = mca_pml_monitoring_component_close, /* component close */ + .mca_register_component_params = mca_pml_monitoring_component_register + }, + .pmlm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .pmlm_init = mca_pml_monitoring_component_init, /* component init */ + .pmlm_finalize = mca_pml_monitoring_component_finish /* component finalize */ + +}; + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c b/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c new file mode 100644 index 00000000000..ec34cb5d27c --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + + +/* EJ: nothing to do here */ + +int mca_pml_monitoring_iprobe( int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + ompi_status_public_t* status ) +{ + return pml_selected_module.pml_iprobe(dst, tag, comm, + matched, status); +} + +int mca_pml_monitoring_probe( int dst, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ) +{ + return pml_selected_module.pml_probe(dst, tag, comm, status); +} + +int mca_pml_monitoring_improbe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + struct ompi_message_t **message, + ompi_status_public_t* status) +{ + return pml_selected_module.pml_improbe(dst, tag, comm, + matched, message, status); +} + + +int mca_pml_monitoring_mprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + struct ompi_message_t **message, + ompi_status_public_t* status) +{ + return pml_selected_module.pml_mprobe(dst, tag, comm, message, status); +} + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_irecv.c b/ompi/mca/pml/monitoring/pml_monitoring_irecv.c new file mode 100644 index 00000000000..91b247c7c53 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_irecv.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + + +/* EJ: loging is done on the sender. Nothing to do here */ + +int mca_pml_monitoring_irecv_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_irecv_init(buf, count, datatype, + src, tag, comm, request); +} + + +int mca_pml_monitoring_irecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_irecv(buf, count, datatype, + src, tag, comm, request); +} + + +int mca_pml_monitoring_recv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status) +{ + return pml_selected_module.pml_recv(buf, count, datatype, + src, tag, comm, status); +} + + +int mca_pml_monitoring_imrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_imrecv(buf, count, datatype, + message, request); +} + + +int mca_pml_monitoring_mrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + ompi_status_public_t* status) + +{ + return pml_selected_module.pml_mrecv(buf, count, datatype, + message, status); +} + + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_isend.c b/ompi/mca/pml/monitoring/pml_monitoring_isend.c new file mode 100644 index 00000000000..dd413169bee --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_isend.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +extern opal_hash_table_t *translation_ht; + +int mca_pml_monitoring_isend_init(const void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_isend_init(buf, count, datatype, + dst, tag, mode, comm, request); +} + +int mca_pml_monitoring_isend(const void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + + /* find the processor of teh destination */ + ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst, true); + int world_rank; + + /* find its name*/ + uint64_t key = *((uint64_t*)&(proc->super.proc_name)); + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + size_t type_size, data_size; + ompi_datatype_type_size(datatype, &type_size); + data_size = count*type_size; + monitor_send_data(world_rank, data_size, tag); + } + + return pml_selected_module.pml_isend(buf, count, datatype, + dst, tag, mode, comm, request); +} + +int mca_pml_monitoring_send(const void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm) +{ + + ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst, true); + int world_rank; + uint64_t key = *((uint64_t*) &(proc->super.proc_name)); + + /** + * If this fails the destination is not part of my MPI_COM_WORLD + */ + if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + size_t type_size, data_size; + ompi_datatype_type_size(datatype, &type_size); + data_size = count*type_size; + monitor_send_data(world_rank, data_size, tag); + } + + + return pml_selected_module.pml_send(buf, count, datatype, + dst, tag, mode, comm); +} + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_start.c b/ompi/mca/pml/monitoring/pml_monitoring_start.c new file mode 100644 index 00000000000..fbdebac1c27 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_start.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include + +extern opal_hash_table_t *translation_ht; + +/* manage persistant requests*/ +int mca_pml_monitoring_start(size_t count, + ompi_request_t** requests) +{ + size_t i; + + for( i = 0; i < count; i++ ) { + mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i]; + ompi_proc_t *proc; + int world_rank; + + if(NULL == pml_request) { + continue; + } + if(OMPI_REQUEST_PML != requests[i]->req_type) { + continue; + } + if(MCA_PML_REQUEST_SEND != pml_request->req_type) { + continue; + } + + proc = ompi_group_get_proc_ptr(pml_request->req_comm->c_remote_group, pml_request->req_peer, true); + uint64_t key = *((uint64_t*) &(proc->super.proc_name)); + + + /** + * If this fails the destination is not part of my MPI_COM_WORLD + */ + if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + size_t type_size, data_size; + ompi_datatype_type_size(pml_request->req_datatype, &type_size); + data_size = pml_request->req_count * type_size; + monitor_send_data(world_rank, data_size, 1); + } + } + return pml_selected_module.pml_start(count, requests); +} + diff --git a/ompi/mca/pml/ob1/pml_ob1_cuda.c b/ompi/mca/pml/ob1/pml_ob1_cuda.c index a44a8b377c8..12ad396363d 100644 --- a/ompi/mca/pml/ob1/pml_ob1_cuda.c +++ b/ompi/mca/pml/ob1/pml_ob1_cuda.c @@ -56,7 +56,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size) { int rc; -#if OPAL_CUDA_SUPPORT_41 #if OPAL_CUDA_GDR_SUPPORT /* With some BTLs, switch to RNDV from RGET at large messages */ if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && @@ -95,10 +94,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq, sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); } -#else - /* Just do the rendezvous but set initial data to be sent to zero */ - rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); -#endif /* OPAL_CUDA_SUPPORT_41 */ return rc; } diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index dba3c22875c..893e6cebec3 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -198,7 +198,7 @@ int mca_pml_ob1_send(const void *buf, return rc; } - /* free the request and return. don't care if it completes now */ + ompi_request_wait_completion (brequest); ompi_request_free (&brequest); return OMPI_SUCCESS; } diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 6f01a2007e5..d9073a59eaa 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -175,6 +175,7 @@ int mca_pml_ucx_init(void) OBJ_CONSTRUCT(&ompi_pml_ucx.convs, mca_pml_ucx_freelist_t); /* Create a completed request to be returned from isend */ + OBJ_CONSTRUCT(&ompi_pml_ucx.completed_send_req, ompi_request_t); mca_pml_ucx_completed_request_init(&ompi_pml_ucx.completed_send_req); opal_progress_register(mca_pml_ucx_progress); @@ -191,7 +192,10 @@ int mca_pml_ucx_cleanup(void) opal_progress_unregister(mca_pml_ucx_progress); + ompi_pml_ucx.completed_send_req.req_state = OMPI_REQUEST_INVALID; OMPI_REQUEST_FINI(&ompi_pml_ucx.completed_send_req); + OBJ_DESTRUCT(&ompi_pml_ucx.completed_send_req); + OBJ_DESTRUCT(&ompi_pml_ucx.convs); OBJ_DESTRUCT(&ompi_pml_ucx.persistent_reqs); @@ -203,6 +207,44 @@ int mca_pml_ucx_cleanup(void) return OMPI_SUCCESS; } +ucp_ep_h mca_pml_ucx_add_proc(ompi_communicator_t *comm, int dst) +{ + ucp_address_t *address; + ucs_status_t status; + size_t addrlen; + ucp_ep_h ep; + int ret; + + ompi_proc_t *proc0 = ompi_comm_peer_lookup(comm, 0); + ompi_proc_t *proc_peer = ompi_comm_peer_lookup(comm, dst); + + /* Note, mca_pml_base_pml_check_selected, doesn't use 3rd argument */ + if (OMPI_SUCCESS != (ret = mca_pml_base_pml_check_selected("ucx", + &proc0, + dst))) { + return NULL; + } + + ret = mca_pml_ucx_recv_worker_address(proc_peer, &address, &addrlen); + if (ret < 0) { + PML_UCX_ERROR("Failed to receive worker address from proc: %d", proc_peer->super.proc_name.vpid); + return NULL; + } + + PML_UCX_VERBOSE(2, "connecting to proc. %d", proc_peer->super.proc_name.vpid); + status = ucp_ep_create(ompi_pml_ucx.ucp_worker, address, &ep); + free(address); + if (UCS_OK != status) { + PML_UCX_ERROR("Failed to connect to proc: %d, %s", proc_peer->super.proc_name.vpid, + ucs_status_string(status)); + return NULL; + } + + proc_peer->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = ep; + + return ep; +} + int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs) { ucp_address_t *address; @@ -221,6 +263,7 @@ int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs) for (i = 0; i < nprocs; ++i) { ret = mca_pml_ucx_recv_worker_address(procs[i], &address, &addrlen); if (ret < 0) { + PML_UCX_ERROR("Failed to receive worker address from proc: %d", procs[i]->super.proc_name.vpid); return ret; } @@ -234,7 +277,8 @@ int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs) free(address); if (UCS_OK != status) { - PML_UCX_ERROR("Failed to connect"); + PML_UCX_ERROR("Failed to connect to proc: %d, %s", procs[i]->super.proc_name.vpid, + ucs_status_string(status)); return OMPI_ERROR; } @@ -257,6 +301,7 @@ int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs) } procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = NULL; } + opal_pmix.fence(NULL, 0); return OMPI_SUCCESS; } @@ -381,6 +426,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src return OMPI_ERROR; } + ucp_worker_progress(ompi_pml_ucx.ucp_worker); while (!req->req_complete) { opal_progress(); } @@ -420,7 +466,7 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat struct ompi_request_t **request) { mca_pml_ucx_persistent_request_t *req; - + ucp_ep_h ep; req = (mca_pml_ucx_persistent_request_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.persistent_reqs); if (req == NULL) { @@ -430,6 +476,12 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat PML_UCX_TRACE_SEND("isend_init request *%p=%p", buf, count, datatype, dst, tag, mode, comm, (void*)request, (void*)req) + ep = mca_pml_ucx_get_ep(comm, dst); + if (OPAL_UNLIKELY(NULL == ep)) { + PML_UCX_ERROR("Failed to get ep for rank %d", dst); + return OMPI_ERROR; + } + req->ompi.req_state = OMPI_REQUEST_INACTIVE; req->flags = MCA_PML_UCX_REQUEST_FLAG_SEND; req->buffer = (void *)buf; @@ -437,7 +489,7 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat req->datatype = mca_pml_ucx_get_datatype(datatype); req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); req->send.mode = mode; - req->send.ep = mca_pml_ucx_get_ep(comm, dst); + req->send.ep = ep; *request = &req->ompi; return OMPI_SUCCESS; @@ -449,13 +501,20 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, struct ompi_request_t **request) { ompi_request_t *req; + ucp_ep_h ep; PML_UCX_TRACE_SEND("isend request *%p", buf, count, datatype, dst, tag, mode, comm, (void*)request) /* TODO special care to sync/buffered send */ - req = (ompi_request_t*)ucp_tag_send_nb(mca_pml_ucx_get_ep(comm, dst), buf, count, + ep = mca_pml_ucx_get_ep(comm, dst); + if (OPAL_UNLIKELY(NULL == ep)) { + PML_UCX_ERROR("Failed to get ep for rank %d", dst); + return OMPI_ERROR; + } + + req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, mca_pml_ucx_get_datatype(datatype), PML_UCX_MAKE_SEND_TAG(tag, comm), mca_pml_ucx_send_completion); @@ -478,19 +537,27 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i struct ompi_communicator_t* comm) { ompi_request_t *req; + ucp_ep_h ep; PML_UCX_TRACE_SEND("%s", buf, count, datatype, dst, tag, mode, comm, "send"); /* TODO special care to sync/buffered send */ - req = (ompi_request_t*)ucp_tag_send_nb(mca_pml_ucx_get_ep(comm, dst), buf, count, + ep = mca_pml_ucx_get_ep(comm, dst); + if (OPAL_UNLIKELY(NULL == ep)) { + PML_UCX_ERROR("Failed to get ep for rank %d", dst); + return OMPI_ERROR; + } + + req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, mca_pml_ucx_get_datatype(datatype), PML_UCX_MAKE_SEND_TAG(tag, comm), mca_pml_ucx_send_completion); - if (req == NULL) { + if (OPAL_LIKELY(req == NULL)) { return OMPI_SUCCESS; } else if (!UCS_PTR_IS_ERR(req)) { PML_UCX_VERBOSE(8, "got request %p", (void*)req); + ucp_worker_progress(ompi_pml_ucx.ucp_worker); ompi_request_wait(&req, MPI_STATUS_IGNORE); return OMPI_SUCCESS; } else { @@ -557,7 +624,7 @@ int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm, 1, &info); if (ucp_msg != NULL) { PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message); - PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, ucp_msg); + PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg); *matched = 1; mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); } else if (UCS_PTR_STATUS(ucp_msg) == UCS_ERR_NO_MESSAGE) { @@ -582,7 +649,7 @@ int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm, 1, &info); if (ucp_msg != NULL) { PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message); - PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, ucp_msg); + PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg); mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); return OMPI_SUCCESS; } @@ -686,13 +753,14 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) if (tmp_req->req_complete) { /* tmp_req is already completed */ PML_UCX_VERBOSE(8, "completing persistent request %p", (void*)preq); - mca_pml_ucx_persistent_requset_complete(preq, tmp_req); + mca_pml_ucx_persistent_request_complete(preq, tmp_req); } else { /* tmp_req would be completed by callback and trigger completion * of preq */ PML_UCX_VERBOSE(8, "temporary request %p will complete persistent request %p", (void*)tmp_req, (void*)preq); tmp_req->req_complete_cb_data = preq; + preq->tmp_req = tmp_req; } OPAL_THREAD_UNLOCK(&ompi_request_lock); } else { diff --git a/ompi/mca/pml/ucx/pml_ucx.h b/ompi/mca/pml/ucx/pml_ucx.h index 8a696e8111a..2f50cb27770 100644 --- a/ompi/mca/pml/ucx/pml_ucx.h +++ b/ompi/mca/pml/ucx/pml_ucx.h @@ -69,13 +69,15 @@ extern mca_pml_ucx_module_t ompi_pml_ucx; _PML_UCX_QUOTE(_x) #define PML_UCX_ERROR(...) \ - opal_output_verbose(0, ompi_pml_ucx.output, "Error: " __FILE__ ":" \ - PML_UCX_QUOTE(__LINE__) __VA_ARGS__) + opal_output_verbose(0, ompi_pml_ucx.output, \ + __FILE__ ":" PML_UCX_QUOTE(__LINE__) \ + " Error: " __VA_ARGS__) #define PML_UCX_VERBOSE(_level, ... ) \ if (((_level) <= PML_UCX_MAX_VERBOSE) && ((_level) <= ompi_pml_ucx.verbose)) { \ - opal_output_verbose(_level, ompi_pml_ucx.output, __FILE__ ":" \ - PML_UCX_QUOTE(__LINE__) __VA_ARGS__); \ + opal_output_verbose(_level, ompi_pml_ucx.output, \ + __FILE__ ":" PML_UCX_QUOTE(__LINE__) " " \ + __VA_ARGS__); \ } int mca_pml_ucx_open(void); @@ -83,6 +85,7 @@ int mca_pml_ucx_close(void); int mca_pml_ucx_init(void); int mca_pml_ucx_cleanup(void); +ucp_ep_h mca_pml_ucx_add_proc(ompi_communicator_t *comm, int dst); int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs); int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs); @@ -144,4 +147,5 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests); int mca_pml_ucx_dump(struct ompi_communicator_t* comm, int verbose); + #endif /* PML_UCX_H_ */ diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index e1ad331c90b..a7a6d58529d 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -25,6 +25,12 @@ static int mca_pml_ucx_request_free(ompi_request_t **rptr) return OMPI_SUCCESS; } +static int mca_pml_ucx_request_cancel(ompi_request_t *req, int flag) +{ + ucp_request_cancel(ompi_pml_ucx.ucp_worker, req); + return OMPI_SUCCESS; +} + void mca_pml_ucx_send_completion(void *request, ucs_status_t status) { ompi_request_t *req = request; @@ -55,12 +61,20 @@ void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, OPAL_THREAD_UNLOCK(&ompi_request_lock); } -void mca_pml_ucx_persistent_requset_complete(mca_pml_ucx_persistent_request_t *preq, - ompi_request_t *tmp_req) +static void mca_pml_ucx_persistent_request_detach(mca_pml_ucx_persistent_request_t *preq, + ompi_request_t *tmp_req) +{ + tmp_req->req_complete_cb_data = NULL; + preq->tmp_req = NULL; +} + +inline void +mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq, + ompi_request_t *tmp_req) { preq->ompi.req_status = tmp_req->req_status; ompi_request_complete(&preq->ompi, true); - tmp_req->req_complete_cb_data = NULL; + mca_pml_ucx_persistent_request_detach(preq, tmp_req); mca_pml_ucx_request_reset(tmp_req); ucp_request_release(tmp_req); } @@ -73,7 +87,8 @@ static inline void mca_pml_ucx_preq_completion(ompi_request_t *tmp_req) ompi_request_complete(tmp_req, false); preq = (mca_pml_ucx_persistent_request_t*)tmp_req->req_complete_cb_data; if (preq != NULL) { - mca_pml_ucx_persistent_requset_complete(preq, tmp_req); + PML_UCX_ASSERT(preq->tmp_req != NULL); + mca_pml_ucx_persistent_request_complete(preq, tmp_req); } OPAL_THREAD_UNLOCK(&ompi_request_lock); } @@ -111,8 +126,6 @@ static void mca_pml_ucx_request_init_common(ompi_request_t* ompi_req, OMPI_REQUEST_INIT(ompi_req, req_persistent); ompi_req->req_type = OMPI_REQUEST_PML; ompi_req->req_state = state; - ompi_req->req_complete_cb = NULL; - ompi_req->req_complete_cb_data = NULL; ompi_req->req_free = req_free; ompi_req->req_cancel = req_cancel; } @@ -120,34 +133,56 @@ static void mca_pml_ucx_request_init_common(ompi_request_t* ompi_req, void mca_pml_ucx_request_init(void *request) { ompi_request_t* ompi_req = request; + OBJ_CONSTRUCT(ompi_req, ompi_request_t); mca_pml_ucx_request_init_common(ompi_req, false, OMPI_REQUEST_ACTIVE, - mca_pml_ucx_request_free, NULL); + mca_pml_ucx_request_free, + mca_pml_ucx_request_cancel); } void mca_pml_ucx_request_cleanup(void *request) { ompi_request_t* ompi_req = request; + ompi_req->req_state = OMPI_REQUEST_INVALID; OMPI_REQUEST_FINI(ompi_req); + OBJ_DESTRUCT(ompi_req); } static int mca_pml_ucx_persistent_request_free(ompi_request_t **rptr) { - mca_pml_ucx_persistent_request_t* req = (mca_pml_ucx_persistent_request_t*)*rptr; + mca_pml_ucx_persistent_request_t* preq = (mca_pml_ucx_persistent_request_t*)*rptr; + ompi_request_t *tmp_req = preq->tmp_req; + preq->ompi.req_state = OMPI_REQUEST_INVALID; + if (tmp_req != NULL) { + mca_pml_ucx_persistent_request_detach(preq, tmp_req); + ucp_request_release(tmp_req); + } + PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.persistent_reqs, &preq->ompi.super); *rptr = MPI_REQUEST_NULL; - req->ompi.req_state = OMPI_REQUEST_INVALID; - PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.persistent_reqs, &req->ompi.super); + return OMPI_SUCCESS; +} + +static int mca_pml_ucx_persistent_request_cancel(ompi_request_t *req, int flag) +{ + mca_pml_ucx_persistent_request_t* preq = (mca_pml_ucx_persistent_request_t*)req; + + if (preq->tmp_req != NULL) { + ucp_request_cancel(ompi_pml_ucx.ucp_worker, preq->tmp_req); + } return OMPI_SUCCESS; } static void mca_pml_ucx_persisternt_request_construct(mca_pml_ucx_persistent_request_t* req) { mca_pml_ucx_request_init_common(&req->ompi, true, OMPI_REQUEST_INACTIVE, - mca_pml_ucx_persistent_request_free, NULL); + mca_pml_ucx_persistent_request_free, + mca_pml_ucx_persistent_request_cancel); + req->tmp_req = NULL; } static void mca_pml_ucx_persisternt_request_destruct(mca_pml_ucx_persistent_request_t* req) { + req->ompi.req_state = OMPI_REQUEST_INVALID; OMPI_REQUEST_FINI(&req->ompi); } diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h index dfd91f31e4b..bfa30190215 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.h +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -89,6 +89,7 @@ enum { struct pml_ucx_persistent_request { ompi_request_t ompi; + ompi_request_t *tmp_req; unsigned flags; void *buffer; size_t count; @@ -114,7 +115,7 @@ void mca_pml_ucx_psend_completion(void *request, ucs_status_t status); void mca_pml_ucx_precv_completion(void *request, ucs_status_t status, ucp_tag_recv_info_t *info); -void mca_pml_ucx_persistent_requset_complete(mca_pml_ucx_persistent_request_t *preq, +void mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq, ompi_request_t *tmp_req); void mca_pml_ucx_completed_request_init(ompi_request_t *ompi_req); @@ -126,7 +127,12 @@ void mca_pml_ucx_request_cleanup(void *request); static inline ucp_ep_h mca_pml_ucx_get_ep(ompi_communicator_t *comm, int dst) { - return ompi_comm_peer_lookup(comm, dst)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]; + ucp_ep_h ep = ompi_comm_peer_lookup(comm,dst)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]; + if (OPAL_UNLIKELY(NULL == ep)) { + ep = mca_pml_ucx_add_proc(comm, dst); + } + + return ep; } static inline void mca_pml_ucx_request_reset(ompi_request_t *req) diff --git a/ompi/mca/pml/v/pml_v_output.h b/ompi/mca/pml/v/pml_v_output.h index 13c9c1e4821..77bb5b14055 100644 --- a/ompi/mca/pml/v/pml_v_output.h +++ b/ompi/mca/pml/v/pml_v_output.h @@ -30,6 +30,7 @@ static inline void V_OUTPUT_ERR(const char *fmt, ... ) va_start(list, fmt); ret = vasprintf(&str, fmt, list); assert(-1 != ret); + (void)ret; // silence compiler warning opal_output(0, "%s", str); free(str); va_end(list); diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index 436519ef2a3..0ad43146344 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -1,5 +1,5 @@ /* - * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -9,6 +9,10 @@ * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "pml_yalla.h" #include "pml_yalla_request.h" @@ -66,14 +70,14 @@ static int send_ep_address(void) address = alloca(addrlen); error = mxm_ep_get_address(ompi_pml_yalla.mxm_ep, address, &addrlen); if (MXM_OK != error) { - PML_YALLA_ERROR("Failed to get EP address"); + PML_YALLA_ERROR("%s", "Failed to get EP address"); return OMPI_ERROR; } OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_pml_yalla_component.pmlm_version, address, addrlen); if (OMPI_SUCCESS != rc) { - PML_YALLA_ERROR("Open MPI couldn't distribute EP connection details"); + PML_YALLA_ERROR("%s", "Open MPI couldn't distribute EP connection details"); return OMPI_ERROR; } @@ -87,7 +91,7 @@ static int recv_ep_address(ompi_proc_t *proc, void **address_p, size_t *addrlen_ OPAL_MODEX_RECV(rc, &mca_pml_yalla_component.pmlm_version, &proc->super.proc_name, address_p, addrlen_p); if (rc < 0) { - PML_YALLA_ERROR("Failed to receive EP address"); + PML_YALLA_ERROR("%s", "Failed to receive EP address"); } return rc; } @@ -103,18 +107,18 @@ int mca_pml_yalla_open(void) { mxm_error_t error; - PML_YALLA_VERBOSE(1, "mca_pml_yalla_open"); + PML_YALLA_VERBOSE(1, "%s", "mca_pml_yalla_open"); /* Set memory hooks */ if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & opal_mem_hooks_support_level())) { - PML_YALLA_VERBOSE(1, "enabling on-demand memory mapping"); + PML_YALLA_VERBOSE(1, "%s", "enabling on-demand memory mapping"); opal_setenv("MXM_MPI_MEM_ON_DEMAND_MAP", "y", false, &environ); ompi_pml_yalla.using_mem_hooks = 1; } else { - PML_YALLA_VERBOSE(1, "disabling on-demand memory mapping"); + PML_YALLA_VERBOSE(1, "%s", "disabling on-demand memory mapping"); ompi_pml_yalla.using_mem_hooks = 0; } opal_setenv("MXM_MPI_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y", @@ -137,7 +141,7 @@ int mca_pml_yalla_open(void) int mca_pml_yalla_close(void) { - PML_YALLA_VERBOSE(1, "mca_pml_yalla_close"); + PML_YALLA_VERBOSE(1, "%s", "mca_pml_yalla_close"); if (ompi_pml_yalla.ctx_opts != NULL) { mxm_config_free_context_opts(ompi_pml_yalla.ctx_opts); @@ -157,7 +161,7 @@ int mca_pml_yalla_init(void) mxm_error_t error; int rc; - PML_YALLA_VERBOSE(1, "mca_pml_yalla_init"); + PML_YALLA_VERBOSE(1, "%s", "mca_pml_yalla_init"); if (ompi_pml_yalla.using_mem_hooks) { opal_mem_hooks_register_release(mca_pml_yalla_mem_release_cb, NULL); @@ -188,7 +192,7 @@ int mca_pml_yalla_init(void) int mca_pml_yalla_cleanup(void) { - PML_YALLA_VERBOSE(1, "mca_pml_yalla_cleanup"); + PML_YALLA_VERBOSE(1, "%s", "mca_pml_yalla_cleanup"); opal_progress_unregister(mca_pml_yalla_progress); @@ -241,7 +245,7 @@ int mca_pml_yalla_add_procs(struct ompi_proc_t **procs, size_t nprocs) free(address); if (MXM_OK != error) { - PML_YALLA_ERROR("Failed to connect"); + PML_YALLA_ERROR("%s", "Failed to connect"); return OMPI_ERROR; } @@ -256,7 +260,7 @@ int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs) size_t i; if (ompi_mpi_finalized) { - PML_YALLA_VERBOSE(3, "using bulk powerdown"); + PML_YALLA_VERBOSE(3, "%s", "using bulk powerdown"); mxm_ep_powerdown(ompi_pml_yalla.mxm_ep); } @@ -303,7 +307,7 @@ int mca_pml_yalla_del_comm(struct ompi_communicator_t* comm) mxm_mq_h mq = (void*)comm->c_pml_comm; if (ompi_pml_yalla.mxm_context == NULL) { - PML_YALLA_ERROR("Destroying communicator after MXM context is destroyed"); + PML_YALLA_ERROR("%s", "Destroying communicator after MXM context is destroyed"); return OMPI_ERROR; } @@ -390,7 +394,7 @@ int mca_pml_yalla_isend_init(const void *buf, size_t count, ompi_datatype_t *dat { mca_pml_yalla_send_request_t *sreq; - sreq = MCA_PML_YALLA_SREQ_INIT(buf, count, datatype, dst, tag, mode, comm, + sreq = MCA_PML_YALLA_SREQ_INIT((void *)buf, count, datatype, dst, tag, mode, comm, OMPI_REQUEST_INACTIVE); sreq->super.ompi.req_persistent = true; sreq->super.flags = MCA_PML_YALLA_REQUEST_FLAG_SEND; @@ -459,7 +463,7 @@ int mca_pml_yalla_isend(const void *buf, size_t count, ompi_datatype_t *datatype mxm_error_t error; int rc; - sreq = MCA_PML_YALLA_SREQ_INIT(buf, count, datatype, dst, tag, mode, comm, + sreq = MCA_PML_YALLA_SREQ_INIT((void *)buf, count, datatype, dst, tag, mode, comm, OMPI_REQUEST_ACTIVE); sreq->super.ompi.req_persistent = false; sreq->super.flags = 0; @@ -493,7 +497,7 @@ int mca_pml_yalla_send(const void *buf, size_t count, ompi_datatype_t *datatype, mxm_send_req_t sreq; mxm_error_t error; - PML_YALLA_INIT_MXM_SEND_REQ(&sreq, buf, count, datatype, dst, tag, mode, comm, send); + PML_YALLA_INIT_MXM_SEND_REQ(&sreq, (void *)buf, count, datatype, dst, tag, mode, comm, send); PML_YALLA_INIT_BLOCKING_MXM_SEND_REQ(&sreq); PML_YALLA_VERBOSE(8, "send to %d tag %d dtype %s count %zu", dst, tag, diff --git a/ompi/mca/pml/yalla/pml_yalla_request.h b/ompi/mca/pml/yalla/pml_yalla_request.h index 915bfe51ccb..0ccc026c0c5 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.h +++ b/ompi/mca/pml/yalla/pml_yalla_request.h @@ -25,7 +25,15 @@ struct pml_yalla_base_request { ompi_request_t ompi; mca_pml_yalla_convertor_t *convertor; int flags; - mxm_req_base_t mxm_base[0]; /* overlaps with base of send/recv */ + /* overlaps with base of send/recv + * In ISO C90, you would have to give contents a length of 1, + * which means either you waste space or complicate the argument to malloc. + * Note: + * - 1 was the portable way to go, though it was rather strange + * - 0 was better at indicating intent, but not legal as far as + * the Standard was concerned and supported as an extension by some compilers (including gcc) + */ + mxm_req_base_t mxm_base[1]; }; struct pml_yalla_send_request { @@ -126,28 +134,26 @@ void mca_pml_yalla_init_reqs(void); } \ } -#define MCA_PML_YALLA_RREQ_INIT(_buf, _count, _datatype, _src, _tag, _comm, _state) \ - ({ \ - mca_pml_yalla_recv_request_t *rreq = (mca_pml_yalla_recv_request_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.recv_reqs); \ - \ - PML_YALLA_INIT_OMPI_REQ(&rreq->super.ompi, _comm, _state); \ - PML_YALLA_INIT_MXM_RECV_REQ(&rreq->mxm, _buf, _count, _datatype, _src, _tag, \ - _comm, irecv, rreq); \ - rreq; \ - }) - -#define MCA_PML_YALLA_SREQ_INIT(_buf, _count, _datatype, _dst, _tag, _mode, _comm, _state) \ - ({ \ - mca_pml_yalla_send_request_t *sreq = (mca_pml_yalla_send_request_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.send_reqs); \ - \ - PML_YALLA_INIT_OMPI_REQ(&sreq->super.ompi, _comm, _state); \ - PML_YALLA_INIT_MXM_SEND_REQ(&sreq->mxm, _buf, _count, _datatype, _dst, _tag, \ - mode, _comm, isend, sreq); \ - sreq->super.ompi.req_status.MPI_TAG = _tag; \ - sreq->super.ompi.req_status.MPI_SOURCE = (_comm)->c_my_rank; \ - sreq->super.ompi.req_status._ucount = _count; \ - sreq; \ - }) +static inline mca_pml_yalla_recv_request_t* MCA_PML_YALLA_RREQ_INIT(void *_buf, size_t _count, ompi_datatype_t *_datatype, + int _src, int _tag, struct ompi_communicator_t* _comm, int _state) +{ + mca_pml_yalla_recv_request_t *rreq = (mca_pml_yalla_recv_request_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.recv_reqs); + PML_YALLA_INIT_OMPI_REQ(&rreq->super.ompi, _comm, _state); + PML_YALLA_INIT_MXM_RECV_REQ(&rreq->mxm, _buf, _count, _datatype, _src, _tag, _comm, irecv, rreq); + return rreq; +} + +static inline mca_pml_yalla_send_request_t* MCA_PML_YALLA_SREQ_INIT(void *_buf, size_t _count, ompi_datatype_t *_datatype, + int _dst, int _tag, mca_pml_base_send_mode_t _mode, struct ompi_communicator_t* _comm, int _state) +{ + mca_pml_yalla_send_request_t *sreq = (mca_pml_yalla_send_request_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.send_reqs); + PML_YALLA_INIT_OMPI_REQ(&sreq->super.ompi, _comm, _state); + PML_YALLA_INIT_MXM_SEND_REQ(&sreq->mxm, _buf, _count, _datatype, _dst, _tag, _mode, _comm, isend, sreq); + sreq->super.ompi.req_status.MPI_TAG = _tag; + sreq->super.ompi.req_status.MPI_SOURCE = (_comm)->c_my_rank; + sreq->super.ompi.req_status._ucount = _count; + return sreq; +} #define PML_YALLA_INIT_MXM_PROBE_REQ(_rreq, _rank, _tag, _comm) \ { \ @@ -184,6 +190,7 @@ void mca_pml_yalla_init_reqs(void); (_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \ break; \ case MXM_ERR_CANCELED: \ + (_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \ (_mpi_status)->_cancelled = true; \ break; \ case MXM_ERR_MESSAGE_TRUNCATED: \ diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index a5796276ba9..9e5c8b3ea3e 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -3,8 +3,9 @@ * All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,6 +59,10 @@ typedef orte_ns_cmp_bitmask_t ompi_rte_cmp_bitmask_t; #define OMPI_RTE_CMP_JOBID ORTE_NS_CMP_JOBID #define OMPI_RTE_CMP_VPID ORTE_NS_CMP_VPID #define OMPI_RTE_CMP_ALL ORTE_NS_CMP_ALL +#define OMPI_LOCAL_JOBID(jobid) ORTE_LOCAL_JOBID(jobid) +#define OMPI_JOB_FAMILY(jobid) ORTE_JOB_FAMILY(jobid) +#define OMPI_CONSTRUCT_JOBID(family,local) ORTE_CONSTRUCT_JOBID(family,local) + /* This is the DSS tag to serialize a proc name */ #define OMPI_NAME ORTE_NAME #define OMPI_PROCESS_NAME_HTON ORTE_PROCESS_NAME_HTON @@ -83,16 +88,10 @@ typedef orte_local_rank_t ompi_local_rank_t; OMPI_DECLSPEC void __opal_attribute_noreturn__ ompi_rte_abort(int error_code, char *fmt, ...); #define ompi_rte_abort_peers(a, b, c) orte_errmgr.abort_peers(a, b, c) -#define OMPI_RTE_ERRHANDLER_FIRST ORTE_ERRMGR_CALLBACK_FIRST -#define OMPI_RTE_ERRHANDLER_LAST ORTE_ERRMGR_CALLBACK_LAST -#define OMPI_RTE_ERRHANDLER_PREPEND ORTE_ERRMGR_CALLBACK_PREPEND -#define OMPI_RTE_ERRHANDLER_APPEND ORTE_ERRMGR_CALLBACK_APPEND -typedef orte_error_t ompi_rte_error_report_t; -#define ompi_rte_register_errhandler(a, b) orte_errmgr.register_error_callback(a, b) #define OMPI_ERROR_LOG ORTE_ERROR_LOG /* Init and finalize objects and operations */ -#define ompi_rte_init(a, b) orte_init(a, b, ORTE_PROC_MPI) +OMPI_DECLSPEC int ompi_rte_init(int *pargc, char ***pargv); #define ompi_rte_finalize() orte_finalize() OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void); diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index c82b25ce400..1a678380f0b 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -52,6 +52,79 @@ extern ompi_rte_orte_component_t mca_rte_orte_component; +typedef struct { + volatile bool active; + int status; + int errhandler; +} errhandler_t; + +static void register_cbfunc(int status, int errhndler, void *cbdata) +{ + errhandler_t *cd = (errhandler_t*)cbdata; + cd->status = status; + cd->errhandler = errhndler; + cd->active = false; +} + +static volatile bool wait_for_release = true; +static int errhandler = -1; + +static void notify_cbfunc(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, + void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(cbdata); + } + wait_for_release = false; +} + + +int ompi_rte_init(int *pargc, char ***pargv) +{ + int rc; + opal_list_t info; + opal_value_t val; + errhandler_t cd; + + if (ORTE_SUCCESS != (rc = orte_init(pargc, pargv, ORTE_PROC_MPI))) { + return rc; + } + + if (!orte_standalone_operation) { + /* register to receive any debugger release */ + OBJ_CONSTRUCT(&info, opal_list_t); + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = strdup(OPAL_PMIX_ERROR_NAME); + val.type = OPAL_INT; + val.data.integer = OPAL_ERR_DEBUGGER_RELEASE; + opal_list_append(&info, &val.super); + cd.status = ORTE_ERROR; + cd.errhandler = -1; + cd.active = true; + + opal_pmix.register_errhandler(&info, notify_cbfunc, register_cbfunc, &cd); + + /* let the MPI progress engine run while we wait for + * registration to complete */ + OMPI_WAIT_FOR_COMPLETION(cd.active); + /* safely deconstruct the list */ + opal_list_remove_first(&info); + OBJ_DESTRUCT(&val); + OBJ_DESTRUCT(&info); + if (OPAL_SUCCESS != cd.status) { + /* ouch - we are doomed */ + ORTE_ERROR_LOG(cd.status); + return OMPI_ERROR; + } + errhandler = cd.errhandler; + } + + return OMPI_SUCCESS; +} + void ompi_rte_abort(int error_code, char *fmt, ...) { va_list arglist; @@ -100,10 +173,10 @@ void ompi_rte_abort(int error_code, char *fmt, ...) * attaching debuggers -- see big comment in * orte/tools/orterun/debuggers.c explaining the two scenarios. */ + void ompi_rte_wait_for_debugger(void) { int debugger; - orte_rml_recv_cb_t xfer; /* See lengthy comment in orte/tools/orterun/debuggers.c about orte_in_parallel_debugger */ @@ -117,12 +190,12 @@ void ompi_rte_wait_for_debugger(void) /* if not, just return */ return; } - /* if we are being debugged, then we need to find * the correct plug-ins */ ompi_debugger_setup_dlls(); + /* wait for the debugger to attach */ if (orte_standalone_operation) { /* spin until debugger attaches and releases us */ while (MPIR_debug_gate == 0) { @@ -133,23 +206,9 @@ void ompi_rte_wait_for_debugger(void) #endif } } else { - /* only the rank=0 proc waits for either a message from the - * HNP or for the debugger to attach - everyone else will just - * spin in * the grpcomm barrier in ompi_mpi_init until rank=0 - * joins them. - */ - if (0 != ORTE_PROC_MY_NAME->vpid) { - return; - } - - /* VPID 0 waits for a message from the HNP */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(OMPI_NAME_WILDCARD, - ORTE_RML_TAG_DEBUGGER_RELEASE, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - /* let the MPI progress engine run while we wait */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); + /* now wait for the notification to occur */ + OMPI_WAIT_FOR_COMPLETION(wait_for_release); + /* deregister the errhandler */ + opal_pmix.deregister_errhandler(errhandler, NULL, NULL); } } diff --git a/ompi/mca/rte/rte.h b/ompi/mca/rte/rte.h index bc14cbc476d..6929f957342 100644 --- a/ompi/mca/rte/rte.h +++ b/ompi/mca/rte/rte.h @@ -3,7 +3,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -111,8 +111,6 @@ * 2. int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs) - * Abort the specified list of peers * 3. OMPI_ERROR_LOG(rc) - print error message regarding the given return code - * 4. ompi_rte_register_errhandler - register a callback function for the RTE - * to report asynchronous errors to the caller * * (e) Init and finalize objects and operations * 1. ompi_rte_init - a function to initialize the RTE. The function diff --git a/ompi/mca/sbgp/ibnet/configure.m4 b/ompi/mca/sbgp/ibnet/configure.m4 index ae324cac9e1..6fdb24fa40d 100644 --- a/ompi/mca/sbgp/ibnet/configure.m4 +++ b/ompi/mca/sbgp/ibnet/configure.m4 @@ -2,6 +2,8 @@ # # Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. # Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,7 +28,7 @@ AC_DEFUN([MCA_ompi_sbgp_ibnet_CONFIG],[ OPAL_CHECK_OPENFABRICS([sbgp_ibnet], [sbgp_ofa_happy="yes"]) OPAL_CHECK_MLNX_OPENFABRICS([sbgp_ibnet], [sbgp_mlnx_ofed_happy="yes"]) - AS_IF([test "$sbgp_ofa_happy" = "yes" -a "$sbgp_mlnx_ofed_happy" = "yes"], + AS_IF([test "$sbgp_ofa_happy" = "yes" && test "$sbgp_mlnx_ofed_happy" = "yes"], [$1], [$2]) diff --git a/ompi/mca/sharedfp/addproc/.opal_unignore b/ompi/mca/sharedfp/addproc/.opal_unignore index debe198de7d..e69de29bb2d 100644 --- a/ompi/mca/sharedfp/addproc/.opal_unignore +++ b/ompi/mca/sharedfp/addproc/.opal_unignore @@ -1 +0,0 @@ -gabriel diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c index 976f9e7e0ce..6d3d9406339 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c @@ -101,7 +101,7 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, if( MPI_UNWEIGHTED != destweights ) { if( NULL != topo->outw ) free(topo->outw); } - free(topo); + OBJ_RELEASE(topo); } ompi_comm_free(newcomm); return err; diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index b318cb37ea4..6c31d1fa980 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -236,10 +236,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, /* Then, we need to know if the processes are bound */ /* We make the hypothesis that all processes are in */ /* the same state : all bound or none bound */ - hwloc_err = hwloc_topology_init(&opal_hwloc_topology); - if (-1 == hwloc_err) goto fallback; - hwloc_err = hwloc_topology_load(opal_hwloc_topology); - if (-1 == hwloc_err) goto fallback; + assert(NULL != opal_hwloc_topology); root_obj = hwloc_get_root_obj(opal_hwloc_topology); if (NULL == root_obj) goto fallback; diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c index 2892c4da110..ce3c6b172b6 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c @@ -47,7 +47,7 @@ int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t ** } /* Send Rank, receive max buffer size and max_clock back */ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + rank = ompi_comm_rank(&ompi_mpi_comm_world.comm); rc = mca_pml_v.host_pml.pml_send(&rank, 1, MPI_INTEGER, 0, VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD, MCA_PML_BASE_SEND_STANDARD, diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index 5ffb8eeed45..cbca901d614 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -26,7 +26,7 @@ SUBDIRS = profile -# if OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols +# OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols # to be replaced by PMPI_*. # In this directory, we need it to be 0 diff --git a/ompi/mpi/c/alltoall.c b/ompi/mpi/c/alltoall.c index 09468f24e0a..14bd63f57ae 100644 --- a/ompi/mpi/c/alltoall.c +++ b/ompi/mpi/c/alltoall.c @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,12 +94,11 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } - /* Do we need to do anything? Per MPI standard the (v3.1 page 168 line 48) - * the amount of data sent must be equal to the amount of data received. - */ - ompi_datatype_type_size(recvtype, &recvtype_size); - if( (0 == recvcount) || (0 == recvtype_size) ) { - return MPI_SUCCESS; + if (! OMPI_COMM_IS_INTER(comm)) { + ompi_datatype_type_size(recvtype, &recvtype_size); + if( (0 == recvcount) || (0 == recvtype_size) ) { + return MPI_SUCCESS; + } } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpi/c/comm_remote_group.c b/ompi/mpi/c/comm_remote_group.c index 7ec955c8f09..576ed5a6fa5 100644 --- a/ompi/mpi/c/comm_remote_group.c +++ b/ompi/mpi/c/comm_remote_group.c @@ -69,6 +69,5 @@ int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group) } *group = (MPI_Group) comm->c_remote_group; - ompi_group_increment_proc_count(*group); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/finalized.c b/ompi/mpi/c/finalized.c index 5cfd618d59d..dc6e8a6bab0 100644 --- a/ompi/mpi/c/finalized.c +++ b/ompi/mpi/c/finalized.c @@ -11,7 +11,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,8 +39,6 @@ static const char FUNC_NAME[] = "MPI_Finalized"; int MPI_Finalized(int *flag) { - MPI_Comm null = NULL; - OPAL_CR_NOOP_PROGRESS(); /* We must obtain the lock to guarnatee consistent values of @@ -63,7 +62,10 @@ int MPI_Finalized(int *flag) FUNC_NAME); } else { opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); - return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG, + /* We have no MPI object here so call ompi_errhandle_invoke + * directly */ + return ompi_errhandler_invoke(NULL, NULL, -1, + ompi_errcode_get_mpi_code(MPI_ERR_ARG), FUNC_NAME); } } diff --git a/ompi/mpi/c/get_address.c b/ompi/mpi/c/get_address.c index a49963f639d..08a91da63f0 100644 --- a/ompi/mpi/c/get_address.c +++ b/ompi/mpi/c/get_address.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -44,7 +44,7 @@ int MPI_Get_address(const void *location, MPI_Aint *address) if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == location || NULL == address) { + if (NULL == address) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } diff --git a/ompi/mpi/c/get_library_version.c b/ompi/mpi/c/get_library_version.c index e699a47957b..e66bb09bd18 100644 --- a/ompi/mpi/c/get_library_version.c +++ b/ompi/mpi/c/get_library_version.c @@ -12,6 +12,7 @@ * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +40,6 @@ static const char FUNC_NAME[] = "MPI_Get_library_version"; int MPI_Get_library_version(char *version, int *resultlen) { int len_left; - MPI_Comm null = MPI_COMM_NULL; char *ptr, tmp[MPI_MAX_LIBRARY_VERSION_STRING]; OPAL_CR_NOOP_PROGRESS(); @@ -62,7 +62,10 @@ int MPI_Get_library_version(char *version, int *resultlen) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else { - return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG, + /* We have no MPI object here so call ompi_errhandle_invoke + * directly */ + return ompi_errhandler_invoke(NULL, NULL, -1, + ompi_errcode_get_mpi_code(MPI_ERR_ARG), FUNC_NAME); } } diff --git a/ompi/mpi/c/get_version.c b/ompi/mpi/c/get_version.c index 99feb899cc9..def7dd90454 100644 --- a/ompi/mpi/c/get_version.c +++ b/ompi/mpi/c/get_version.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,8 +38,6 @@ static const char FUNC_NAME[] = "MPI_Get_version"; int MPI_Get_version(int *version, int *subversion) { - MPI_Comm null = NULL; - OPAL_CR_NOOP_PROGRESS(); if (MPI_PARAM_CHECK) { @@ -59,7 +58,10 @@ int MPI_Get_version(int *version, int *subversion) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else { - return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG, + /* We have no MPI object here so call ompi_errhandle_invoke + * directly */ + return ompi_errhandler_invoke(NULL, NULL, -1, + ompi_errcode_get_mpi_code(MPI_ERR_ARG), FUNC_NAME); } } diff --git a/ompi/mpi/c/initialized.c b/ompi/mpi/c/initialized.c index 30c8e74f62b..459b764af12 100644 --- a/ompi/mpi/c/initialized.c +++ b/ompi/mpi/c/initialized.c @@ -11,7 +11,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,8 +39,6 @@ static const char FUNC_NAME[] = "MPI_Initialized"; int MPI_Initialized(int *flag) { - MPI_Comm null = NULL; - OPAL_CR_NOOP_PROGRESS(); /* We must obtain the lock to guarnatee consistent values of @@ -63,7 +62,10 @@ int MPI_Initialized(int *flag) FUNC_NAME); } else { opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); - return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG, + /* We have no MPI object here so call ompi_errhandle_invoke + * directly */ + return ompi_errhandler_invoke(NULL, NULL, -1, + ompi_errcode_get_mpi_code(MPI_ERR_ARG), FUNC_NAME); } } diff --git a/ompi/mpi/c/intercomm_create.c b/ompi/mpi/c/intercomm_create.c index 1da9b55c570..8346a75ec11 100644 --- a/ompi/mpi/c/intercomm_create.c +++ b/ompi/mpi/c/intercomm_create.c @@ -171,10 +171,9 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, /* put group elements in the list */ for (j = 0; j < rsize; j++) { new_group_pointer->grp_proc_pointers[j] = rprocs[j]; + OBJ_RETAIN(rprocs[j]); } - ompi_group_increment_proc_count(new_group_pointer); - rc = ompi_comm_set ( &newcomp, /* new comm */ local_comm, /* old comm */ local_comm->c_local_group->grp_proc_count, /* local_size */ @@ -196,7 +195,6 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, goto err_exit; } - ompi_group_decrement_proc_count (new_group_pointer); OBJ_RELEASE(new_group_pointer); new_group_pointer = MPI_GROUP_NULL; diff --git a/ompi/mpi/c/intercomm_merge.c b/ompi/mpi/c/intercomm_merge.c index 64a6d476b4c..b0cfb2dcde1 100644 --- a/ompi/mpi/c/intercomm_merge.c +++ b/ompi/mpi/c/intercomm_merge.c @@ -114,7 +114,6 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, goto exit; } - ompi_group_decrement_proc_count(new_group_pointer); OBJ_RELEASE(new_group_pointer); new_group_pointer = MPI_GROUP_NULL; diff --git a/ompi/mpi/c/type_commit.c b/ompi/mpi/c/type_commit.c index 747bcccf1d2..64c795b6360 100644 --- a/ompi/mpi/c/type_commit.c +++ b/ompi/mpi/c/type_commit.c @@ -40,21 +40,21 @@ static const char FUNC_NAME[] = "MPI_Type_commit"; int MPI_Type_commit(MPI_Datatype *type) { - int rc; + int rc; - MEMCHECKER( - memchecker_datatype(*type); - ); + MEMCHECKER( + memchecker_datatype(*type); + ); - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == type || NULL == *type || MPI_DATATYPE_NULL == *type) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == type || NULL == *type || MPI_DATATYPE_NULL == *type) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); + } } - } - OPAL_CR_ENTER_LIBRARY(); + OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_commit( type ); - OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); + rc = ompi_datatype_commit( type ); + OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_set_attr.c b/ompi/mpi/c/type_set_attr.c index 071bf0b6cd3..afa5813f78f 100644 --- a/ompi/mpi/c/type_set_attr.c +++ b/ompi/mpi/c/type_set_attr.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -48,16 +48,14 @@ int MPI_Type_set_attr (MPI_Datatype type, memchecker_datatype(type); ); - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == type || MPI_DATATYPE_NULL == type) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); - } else if (NULL == attribute_val) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == type || MPI_DATATYPE_NULL == type) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); + } } - } - OPAL_CR_ENTER_LIBRARY(); + OPAL_CR_ENTER_LIBRARY(); ret = ompi_attr_set_c(TYPE_ATTR, type, &type->d_keyhash, type_keyval, attribute_val, false); diff --git a/ompi/mpi/c/wtick.c b/ompi/mpi/c/wtick.c index 60a07585bad..9f4795f192c 100644 --- a/ompi/mpi/c/wtick.c +++ b/ompi/mpi/c/wtick.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -41,7 +41,15 @@ double MPI_Wtick(void) OPAL_CR_NOOP_PROGRESS(); #if OPAL_TIMER_CYCLE_NATIVE - return opal_timer_base_get_freq(); + { + opal_timer_t freq = opal_timer_base_get_freq(); + if (0 == freq) { + /* That should never happen, but if it does, return a bogus value + * rather than crashing with a division by zero */ + return (double)0.0; + } + return (double)1.0 / (double)freq; + } #elif OPAL_TIMER_USEC_NATIVE return 0.000001; #else diff --git a/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h b/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h index cd997dbc92d..3c3b16cf39e 100644 --- a/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h +++ b/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h @@ -4,6 +4,8 @@ ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2013 Los Alamos National Security, LLC. All rights ! reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -60,8 +62,31 @@ interface integer :: ierr end subroutine MPI_COMM_NULL_DELETE_FN + subroutine MPI_COMM_DUP_FN(oldcomm, comm_keyval, extra_state, attribute_val_in, & + attribute_val_out, flag, ierr ) + implicit none + include 'mpif-config.h' + integer :: oldcomm + integer :: comm_keyval + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + integer :: ierr + end subroutine MPI_COMM_DUP_FN + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + subroutine MPI_TYPE_DUP_FN( oldtype, type_keyval, extra_state, & + attribute_val_in, attribute_val_out, & + flag, ierr ) + implicit none + include 'mpif-config.h' + integer :: oldtype + integer :: type_keyval + integer(KIND=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + integer :: ierr + end subroutine MPI_TYPE_DUP_FN + subroutine MPI_TYPE_NULL_COPY_FN( type, type_keyval, extra_state, & attribute_val_in, attribute_val_out, & flag, ierr ) @@ -86,6 +111,18 @@ interface !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + subroutine MPI_WIN_DUP_FN( oldwin, win_keyval, extra_state, & + attribute_val_in, attribute_val_out, & + flag, ierr) + implicit none + include 'mpif-config.h' + integer :: oldwin + integer :: win_keyval + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + integer :: ierr + end subroutine MPI_WIN_DUP_FN + subroutine MPI_WIN_NULL_COPY_FN( window, win_keyval, extra_state, & attribute_val_in, attribute_val_out, & flag, ierr ) diff --git a/ompi/mpi/fortran/base/gen-mpi-mangling.pl b/ompi/mpi/fortran/base/gen-mpi-mangling.pl index d061eed89dc..96294f9fa9e 100755 --- a/ompi/mpi/fortran/base/gen-mpi-mangling.pl +++ b/ompi/mpi/fortran/base/gen-mpi-mangling.pl @@ -62,13 +62,13 @@ f_name => "MPI_IN_PLACE", }; $fortran->{unweighted} = { - c_type => "int", + c_type => "int *", c_name => "mpi_fortran_unweighted", f_type => "integer", f_name => "MPI_UNWEIGHTED", }; $fortran->{weights_empty} = { - c_type => "int", + c_type => "int *", c_name => "mpi_fortran_weights_empty", f_type => "integer", f_name => "MPI_WEIGHTS_EMPTY", diff --git a/ompi/mpi/fortran/common_sym_whitelist.txt b/ompi/mpi/fortran/common_sym_whitelist.txt new file mode 100644 index 00000000000..1b21fe88059 --- /dev/null +++ b/ompi/mpi/fortran/common_sym_whitelist.txt @@ -0,0 +1,65 @@ +# Open MPI's Fortran libraries have a bunch of deliberate common +# symbols. Whitelist them. +ompi_f08_mpi_2complex +ompi_f08_mpi_2double_complex +ompi_f08_mpi_2double_precision +ompi_f08_mpi_2integer +ompi_f08_mpi_2real +ompi_f08_mpi_aint +ompi_f08_mpi_band +ompi_f08_mpi_bor +ompi_f08_mpi_bxor +ompi_f08_mpi_byte +ompi_f08_mpi_character +ompi_f08_mpi_comm_null +ompi_f08_mpi_comm_self +ompi_f08_mpi_comm_world +ompi_f08_mpi_complex +ompi_f08_mpi_complex8 +ompi_f08_mpi_complex16 +ompi_f08_mpi_complex32 +ompi_f08_mpi_datatype_null +ompi_f08_mpi_double_complex +ompi_f08_mpi_double_precision +ompi_f08_mpi_errhandler_null +ompi_f08_mpi_errors_are_fatal +ompi_f08_mpi_errors_return +ompi_f08_mpi_file_null +ompi_f08_mpi_group_empty +ompi_f08_mpi_group_null +ompi_f08_mpi_info_env +ompi_f08_mpi_info_null +ompi_f08_mpi_integer +ompi_f08_mpi_integer1 +ompi_f08_mpi_integer16 +ompi_f08_mpi_integer2 +ompi_f08_mpi_integer4 +ompi_f08_mpi_integer8 +ompi_f08_mpi_land +ompi_f08_mpi_lb +ompi_f08_mpi_logical +ompi_f08_mpi_logical1 +ompi_f08_mpi_logical2 +ompi_f08_mpi_logical4 +ompi_f08_mpi_logical8 +ompi_f08_mpi_lor +ompi_f08_mpi_lxor +ompi_f08_mpi_max +ompi_f08_mpi_maxloc +ompi_f08_mpi_message_no_proc +ompi_f08_mpi_message_null +ompi_f08_mpi_min +ompi_f08_mpi_minloc +ompi_f08_mpi_op_null +ompi_f08_mpi_packed +ompi_f08_mpi_prod +ompi_f08_mpi_real +ompi_f08_mpi_real16 +ompi_f08_mpi_real2 +ompi_f08_mpi_real4 +ompi_f08_mpi_real8 +ompi_f08_mpi_replace +ompi_f08_mpi_request_null +ompi_f08_mpi_sum +ompi_f08_mpi_ub +ompi_f08_mpi_win_null diff --git a/ompi/mpi/fortran/mpif-h/get_address_f.c b/ompi/mpi/fortran/mpif-h/get_address_f.c index bb37b43094a..4c19d61dfdc 100644 --- a/ompi/mpi/fortran/mpif-h/get_address_f.c +++ b/ompi/mpi/fortran/mpif-h/get_address_f.c @@ -22,6 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -71,7 +72,7 @@ void ompi_get_address_f(char *location, MPI_Aint *address, MPI_Fint *ierr) int c_ierr; MPI_Aint c_address; - c_ierr = PMPI_Get_address(location, &c_address); + c_ierr = PMPI_Get_address(OMPI_F2C_BOTTOM(location), &c_address); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/use-mpi-f08/Makefile.am b/ompi/mpi/fortran/use-mpi-f08/Makefile.am index b5ec7695b6f..78137e38653 100644 --- a/ompi/mpi/fortran/use-mpi-f08/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-f08/Makefile.am @@ -7,7 +7,7 @@ # Copyright (c) 2012-2013 Inria. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2015 Research Organization for Information Science +# Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # # $COPYRIGHT$ @@ -43,8 +43,6 @@ noinst_LTLIBRARIES = $(module_sentinel_file) mpi-f08.lo: $(module_sentinel_file) mpi-f08.lo: mpi-f08.F90 mpi-f08.lo: mpi-f-interfaces-bind.h pmpi-f-interfaces-bind.h -mpi-f08.lo: attr-fn-f08-callback-interfaces.h -mpi-f08.lo: conversion-fn-null-f08-interface.h mpi-f08.lo: sizeof_f08.h # @@ -800,13 +798,8 @@ libmpi_usempif08_la_SOURCES = \ $(pmpi_api_files) \ mpi-f-interfaces-bind.h \ pmpi-f-interfaces-bind.h \ - attr-fn-f08-callback-interfaces.h \ - conversion-fn-null-f08-interface.h \ mpi-f08.F90 \ - mpi-f-interfaces-bind.h pmpi-f-interfaces-bind.h \ - attr-fn-f08-callback-interfaces.h \ buffer_detach.c \ - conversion-fn-null-f08-interface.h \ constants.h \ constants.c @@ -846,8 +839,6 @@ $(pmpi_api_lo_files): mpi-f08.lo mpi-f08.lo: $(module_sentinel_file) $(SIZEOF_H) mpi-f08.lo: mpi-f-interfaces-bind.h pmpi-f-interfaces-bind.h -mpi-f08.lo: attr-fn-f08-callback-interfaces.h -mpi-f08.lo: conversion-fn-null-f08-interface.h ########################################################################### @@ -857,6 +848,7 @@ libforce_usempif08_internal_modules_to_be_built_la_SOURCES = \ mpi-f08-types.F90 \ mpi-f08-interfaces.F90 \ mpi-f08-interfaces-callbacks.F90 \ + mpi-f08-callbacks.F90 \ pmpi-f08-interfaces.F90 config_h = \ @@ -876,6 +868,9 @@ mpi-f08-interfaces.lo: mpi-f08-interfaces-callbacks.lo mpi-f08-interfaces-callbacks.lo: $(config_h) mpi-f08-interfaces-callbacks.lo: mpi-f08-interfaces-callbacks.F90 mpi-f08-interfaces-callbacks.lo: mpi-f08-types.lo +mpi-f08-callbacks.lo: $(config_h) +mpi-f08-callbacks.lo: mpi-f08-callbacks.F90 +mpi-f08-callbacks.lo: mpi-f08-types.lo pmpi-f08-interfaces.lo: $(config_h) pmpi-f08-interfaces.lo: pmpi-f08-interfaces.F90 pmpi-f08-interfaces.lo: mpi-f08-interfaces-callbacks.lo diff --git a/ompi/mpi/fortran/use-mpi-f08/attr-fn-f08-callback-interfaces.h b/ompi/mpi/fortran/use-mpi-f08/attr-fn-f08-callback-interfaces.h deleted file mode 100644 index c7502ac7cec..00000000000 --- a/ompi/mpi/fortran/use-mpi-f08/attr-fn-f08-callback-interfaces.h +++ /dev/null @@ -1,152 +0,0 @@ -! -*- f90 -*- -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2013 Los Alamos National Security, LLC. All rights -! reserved. -! Copyright (c) 2015 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! -! F08 handle (e.g., Type(MPI_Comm)) pre-defined attribute callback -! function interfaces -! - -interface - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_NULL_COPY_FN( comm, comm_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval, extra_state - integer :: attribute_val_in, attribute_val_out, ierr - logical :: flag - end subroutine MPI_NULL_COPY_FN - - subroutine MPI_NULL_DELETE_FN( comm, comm_keyval, attribute_val_out, & - extra_state, ierr ) - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval, attribute_val_out, extra_state, ierr - end subroutine MPI_NULL_DELETE_FN - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_COMM_NULL_COPY_FN( comm, comm_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) BIND(C,name="ompi_comm_null_copy_fn_f") - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_COMM_NULL_COPY_FN - - subroutine MPI_COMM_DUP_FN( comm, comm_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) BIND(C,name="ompi_comm_dup_fn_f") - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_COMM_DUP_FN - - subroutine MPI_COMM_NULL_DELETE_FN(comm, comm_keyval, attribute_val_out, & - extra_state, ierr ) BIND(C,name="ompi_comm_null_delete_fn_f") - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval - integer(kind=MPI_ADDRESS_KIND) :: attribute_val_out, extra_state - integer :: ierr - end subroutine MPI_COMM_NULL_DELETE_FN - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_TYPE_NULL_COPY_FN( type, type_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) BIND(C,name="ompi_type_null_copy_fn_f") - use mpi_f08_types - implicit none - type(MPI_Datatype) :: type - integer :: type_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_TYPE_NULL_COPY_FN - - subroutine MPI_TYPE_DUP_FN( type, type_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) BIND(C,name="ompi_type_dup_fn_f") - use mpi_f08_types - implicit none - type(MPI_Datatype) :: type - integer :: type_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_TYPE_DUP_FN - - subroutine MPI_TYPE_NULL_DELETE_FN( type, type_keyval, attribute_val_out, & - extra_state, ierr ) BIND(C,name="ompi_type_null_delete_fn_f") - use mpi_f08_types - implicit none - type(MPI_Datatype) :: type - integer :: type_keyval - integer(kind=MPI_ADDRESS_KIND) :: attribute_val_out, extra_state - integer :: ierr - end subroutine MPI_TYPE_NULL_DELETE_FN - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_WIN_NULL_COPY_FN( window, win_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) BIND(C,name="ompi_win_null_copy_fn_f") - use mpi_f08_types - implicit none - type(MPI_Win) :: window - integer :: win_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_WIN_NULL_COPY_FN - - subroutine MPI_WIN_DUP_FN( window, win_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) BIND(C,name="ompi_win_dup_fn_f") - use mpi_f08_types - implicit none - type(MPI_Win) :: window - integer :: win_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_WIN_DUP_FN - - subroutine MPI_WIN_NULL_DELETE_FN( window, win_keyval, attribute_val_out, & - extra_state, ierr ) BIND(C,name="ompi_win_null_delete_fn_f") - use mpi_f08_types - implicit none - type(MPI_Win) :: window - integer :: win_keyval - integer(kind=MPI_ADDRESS_KIND) :: attribute_val_out, extra_state - integer :: ierr - end subroutine MPI_WIN_NULL_DELETE_FN - -end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/conversion-fn-null-f08-interface.h b/ompi/mpi/fortran/use-mpi-f08/conversion-fn-null-f08-interface.h deleted file mode 100644 index c653de6f49b..00000000000 --- a/ompi/mpi/fortran/use-mpi-f08/conversion-fn-null-f08-interface.h +++ /dev/null @@ -1,35 +0,0 @@ -! -*- f90 -*- -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! Note about these declarations: these are "external" functions in -! mpif-common.h. However, if we don't declare them here, compilers will add -! them to the "mpi" module namespace, and result in linker errors if MPI -! F90 applications try to use them. because the implementations of -! these functions are not in the MPI module namespace -- they're the F77 -! functions. - -! -! F08 handle pre-defined conversion callback function interface -! - -interface - - subroutine MPI_CONVERSION_FN_NULL(userbuf, datatype, count, filebuf, & - position, extra_state, ierror) - use mpi_f08_types - implicit none - character(len=*), intent(in) :: filebuf - character(len=*), intent(out) :: userbuf - type(MPI_Datatype) :: datatype - integer, intent(in) :: count, ierror - integer(kind=MPI_OFFSET_KIND), intent(in) :: position - integer(kind=MPI_ADDRESS_KIND), intent(in) :: extra_state - end subroutine MPI_CONVERSION_FN_NULL - -end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-callbacks.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-callbacks.F90 new file mode 100644 index 00000000000..d992702ef2e --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-callbacks.F90 @@ -0,0 +1,142 @@ +! -*- f90 -*- +! Copyright (c) 2016 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +module mpi_f08_callbacks + +! MPI3.1, p270, 5-19 + +contains + +subroutine MPI_COMM_DUP_FN(oldcomm,comm_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Comm) :: oldcomm + integer :: comm_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .true. + attribute_val_out = attribute_val_in + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_COMM_NULL_COPY_FN(oldcomm,comm_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Comm) :: oldcomm + integer :: comm_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .false. + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_COMM_NULL_DELETE_FN(comm,comm_keyval, & + attribute_val, extra_state, ierror) + use mpi_f08_types + implicit none + type(MPI_Comm) :: comm + integer :: comm_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: attribute_val, extra_state + + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_TYPE_DUP_FN(oldtype,type_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Datatype) :: oldtype + integer :: type_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .true. + attribute_val_out = attribute_val_in + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_TYPE_NULL_COPY_FN(oldtype,type_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Datatype) :: oldtype + integer :: type_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .false. + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_TYPE_NULL_DELETE_FN(datatype,type_keyval, & + attribute_val, extra_state, ierror) + use mpi_f08_types + implicit none + type(MPI_Datatype) :: datatype + integer :: type_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: attribute_val, extra_state + + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_WIN_DUP_FN(oldwin,win_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Win) :: oldwin + integer :: win_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .true. + attribute_val_out = attribute_val_in + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_WIN_NULL_COPY_FN(oldwin,win_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Win) :: oldwin + integer :: win_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .false. + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_WIN_NULL_DELETE_FN(win,win_keyval, & + attribute_val, extra_state, ierror) + use mpi_f08_types + implicit none + type(MPI_Win) :: win + integer :: win_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: attribute_val, extra_state + + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_CONVERSION_FN_NULL(userbuf, datatype, count, & + filebuf, position, extra_state, ierror) + use, intrinsic :: iso_c_binding, only : c_ptr + use mpi_f08_types + implicit none + type(c_ptr), value :: userbuf, filebuf + type(MPI_Datatype) :: datatype + integer :: count, ierror + integer(kind=MPI_OFFSET_KIND) :: position + integer(kind=MPI_ADDRESS_KIND) :: extra_state + + ! Do nothing +end subroutine + +end module mpi_f08_callbacks diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 index 1665b7339d0..47801afefe3 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 @@ -2,7 +2,7 @@ ! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2015 Research Organization for Information Science +! Copyright (c) 2015-2016 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ @@ -66,7 +66,7 @@ SUBROUTINE MPI_User_function(invec, inoutvec, len, datatype) OMPI_ABSTRACT INTERFACE SUBROUTINE MPI_Comm_copy_attr_function(oldcomm,comm_keyval,extra_state, & - attribute_val_in,attribute_val_out,flag,ierror) BIND(C) + attribute_val_in,attribute_val_out,flag,ierror) USE mpi_f08_types IMPLICIT NONE TYPE(MPI_Comm) :: oldcomm @@ -78,7 +78,7 @@ SUBROUTINE MPI_Comm_copy_attr_function(oldcomm,comm_keyval,extra_state, & OMPI_ABSTRACT INTERFACE SUBROUTINE MPI_Comm_delete_attr_function(comm,comm_keyval, & - attribute_val, extra_state, ierror) BIND(C) + attribute_val, extra_state, ierror) USE mpi_f08_types IMPLICIT NONE TYPE(MPI_Comm) :: comm @@ -89,7 +89,7 @@ SUBROUTINE MPI_Comm_delete_attr_function(comm,comm_keyval, & OMPI_ABSTRACT INTERFACE SUBROUTINE MPI_Win_copy_attr_function(oldwin,win_keyval,extra_state, & - attribute_val_in,attribute_val_out,flag,ierror) BIND(C) + attribute_val_in,attribute_val_out,flag,ierror) USE mpi_f08_types IMPLICIT NONE TYPE(MPI_Win) :: oldwin @@ -101,7 +101,7 @@ SUBROUTINE MPI_Win_copy_attr_function(oldwin,win_keyval,extra_state, & OMPI_ABSTRACT INTERFACE SUBROUTINE MPI_Win_delete_attr_function(win,win_keyval,attribute_val, & - extra_state,ierror) BIND(C) + extra_state,ierror) USE mpi_f08_types IMPLICIT NONE TYPE(MPI_Win) :: win @@ -112,7 +112,7 @@ SUBROUTINE MPI_Win_delete_attr_function(win,win_keyval,attribute_val, & OMPI_ABSTRACT INTERFACE SUBROUTINE MPI_Type_copy_attr_function(oldtype,type_keyval,extra_state, & - attribute_val_in,attribute_val_out,flag,ierror) BIND(C) + attribute_val_in,attribute_val_out,flag,ierror) USE mpi_f08_types IMPLICIT NONE TYPE(MPI_Datatype) :: oldtype @@ -124,7 +124,7 @@ SUBROUTINE MPI_Type_copy_attr_function(oldtype,type_keyval,extra_state, & OMPI_ABSTRACT INTERFACE SUBROUTINE MPI_Type_delete_attr_function(datatype,type_keyval, & - attribute_val,extra_state,ierror) BIND(C) + attribute_val,extra_state,ierror) USE mpi_f08_types IMPLICIT NONE TYPE(MPI_Datatype) :: datatype diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 index a7afe220167..43b6cb09109 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 @@ -13,6 +13,8 @@ ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2016 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -27,6 +29,7 @@ module mpi_f08 use mpi_f08_types use mpi_f08_interfaces ! this module contains the mpi_f08 interface declarations use pmpi_f08_interfaces ! this module contains the pmpi_f08 interface declarations + use mpi_f08_callbacks ! this module contains the mpi_f08 attribute callback subroutines ! ! Declaration of the interfaces to the ompi impl files @@ -35,14 +38,6 @@ module mpi_f08 #include "mpi-f-interfaces-bind.h" #include "pmpi-f-interfaces-bind.h" -! The MPI attribute callback functions - - include "attr-fn-f08-callback-interfaces.h" - -! The MPI_CONVERSION_FN_NULL function - - include "conversion-fn-null-f08-interface.h" - ! The sizeof interfaces include "sizeof_f08.h" diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in index c9fe16e5bfa..a636245f16a 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in @@ -3636,12 +3636,12 @@ subroutine MPI_Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recv @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: request @@ -3658,12 +3658,12 @@ subroutine PMPI_Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, rec @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: request @@ -4698,12 +4698,12 @@ subroutine MPI_Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvb @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: ierror @@ -4719,12 +4719,12 @@ subroutine PMPI_Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recv @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h index 64218aa21a7..a57a3b47e0e 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h @@ -3379,7 +3379,7 @@ subroutine MPI_Dist_graph_create(comm_old, n, sources, degrees, destinations, & integer, dimension(n), intent(in) :: degrees integer, dimension(n), intent(in) :: destinations integer, dimension(n), intent(in) :: weights - logical, intent(in) :: info + integer, intent(in) :: info logical, intent(in) :: reorder integer, intent(out) :: comm_dist_graph integer, intent(out) :: ierror @@ -3400,7 +3400,7 @@ subroutine MPI_Dist_graph_create_adjacent(comm_old, indegree, sources, sourcewei integer, intent(in) :: outdegree integer, dimension(outdegree), intent(in) :: destinations integer, dimension(outdegree), intent(in) :: destweights - logical, intent(in) :: info + integer, intent(in) :: info logical, intent(in) :: reorder integer, intent(out) :: comm_dist_graph integer, intent(out) :: ierror diff --git a/ompi/mpi/java/c/Makefile.am b/ompi/mpi/java/c/Makefile.am index 95615ea41f8..a0a5c6ac980 100644 --- a/ompi/mpi/java/c/Makefile.am +++ b/ompi/mpi/java/c/Makefile.am @@ -1,9 +1,11 @@ # -*- makefile -*- # # Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,7 +46,7 @@ libmpi_java_la_SOURCES = \ mpi_Status.c \ mpi_Win.c -libmpi_java_la_LIBADD = $(top_builddir)/ompi/libmpi.la +libmpi_java_la_LIBADD = -ldl $(top_builddir)/ompi/libmpi.la libmpi_java_la_LDFLAGS = -version-info $(libmpi_java_so_version) endif diff --git a/ompi/mpi/java/c/mpi_MPI.c b/ompi/mpi/java/c/mpi_MPI.c index 5b3a39e1368..40bb28b5563 100644 --- a/ompi/mpi/java/c/mpi_MPI.c +++ b/ompi/mpi/java/c/mpi_MPI.c @@ -14,6 +14,8 @@ * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,8 +64,13 @@ #ifdef HAVE_SYS_STAT_H #include #endif +#ifdef HAVE_DLFCN_H #include +#endif #include +#ifdef HAVE_LIBGEN_H +#include +#endif #include "opal/util/output.h" #include "opal/datatype/opal_convertor.h" @@ -126,7 +133,27 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) { libmpi = dlopen("libmpi." OPAL_DYN_LIB_SUFFIX, RTLD_NOW | RTLD_GLOBAL); - if(libmpi == NULL) +#if defined(HAVE_DL_INFO) && defined(HAVE_LIBGEN_H) + /* + * OS X El Capitan does not propagate DYLD_LIBRARY_PATH to children any more + * so if previous dlopen failed, try to open libmpi in the same directory + * than the current libmpi_java + */ + if(NULL == libmpi) { + Dl_info info; + if(0 != dladdr((void *)JNI_OnLoad, &info)) { + char libmpipath[OPAL_PATH_MAX]; + char *libmpijavapath = strdup(info.dli_fname); + if (NULL != libmpijavapath) { + snprintf(libmpipath, OPAL_PATH_MAX-1, "%s/libmpi." OPAL_DYN_LIB_SUFFIX, dirname(libmpijavapath)); + free(libmpijavapath); + libmpi = dlopen(libmpipath, RTLD_NOW | RTLD_GLOBAL); + } + } + } +#endif + + if(NULL == libmpi) { fprintf(stderr, "Java bindings failed to load libmpi: %s\n",dlerror()); exit(1); diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in index 202c49a49dc..ae211b84adb 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in @@ -15,15 +15,15 @@ .nf #include int MPI_Neighbor_alltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\f[]P, MPI_Datatype \fIsendtype\fP, + const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int\fI recvcounts\fP[], const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP) int MPI_Ineighbor_alltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\f[]P, MPI_Datatype \fIsendtype\fP, + const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int\fI recvcounts\fP[], const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, - MPI_Request \fI*request\fP) + MPI_Request \fI*request\fP) .fi .SH Fortran Syntax diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in index 5dc22abe036..e71731b9951 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in @@ -16,14 +16,14 @@ .nf #include int MPI_Neighbor_alltoallw(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], - void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], - const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP) + const MPI_Aint \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], + void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], + const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP) int MPI_Ineighbor_alltoallw(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], - void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], - const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) + const MPI_Aint \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], + void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], + const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax @@ -34,16 +34,18 @@ MPI_NEIGHBOR_ALLTOALLW(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP + INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP INTEGER \fICOMM, IERROR\fP MPI_INEIGHBOR_ALLTOALLW(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, REQUEST, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP + INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP INTEGER \fICOMM, REQUEST, IERROR\fP .fi diff --git a/ompi/mpi/man/man3/MPI_Testsome.3in b/ompi/mpi/man/man3/MPI_Testsome.3in index 92c8d36ddaf..2e1a6c67211 100644 --- a/ompi/mpi/man/man3/MPI_Testsome.3in +++ b/ompi/mpi/man/man3/MPI_Testsome.3in @@ -1,7 +1,7 @@ .\" -*- nroff -*- .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation -.\" Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. .\" $COPYRIGHT$ .TH MPI_Testsome 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -13,7 +13,8 @@ .nf #include int MPI_Testsome(int \fIincount\fP, MPI_Request \fIarray_of_requests[]\fP, - int\fI *outcount\fP, int\fI array_of_indices[]\fP, MPI_Status\fI array_of_statuses[]\fP) + int\fI *outcount\fP, int\fI array_of_indices[]\fP, + MPI_Status\fI array_of_statuses[]\fP) .fi .SH Fortran Syntax @@ -43,7 +44,8 @@ MPI_Testsome(\fIincount\fP, \fIarray_of_requests\fP, \fIoutcount\fP, \fIarray_of .nf #include static int Request::Testsome(int \fIincount\fP, Request - \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[], Status \fIarray_of_statuses\fP[]) + \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[], + Status \fIarray_of_statuses\fP[]) static int Request::Testsome(int \fIincount\fP, Request \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[]) @@ -76,7 +78,20 @@ Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Behaves like MPI_Waitsome, except that it returns immediately. If no operation has completed it returns outcount = 0. If there is no active handle in the list, it returns outcount = MPI_UNDEFINED. +Behaves like MPI_Waitsome, except that it returns immediately. +.sp +Returns in outcount the number of requests from the list +array_of_requests that have completed. Returns in the first outcount +locations of the array array_of_indices the indices of these +operations (index within the array array_of_requests; the array is +indexed from 0 in C and from 1 in Fortran). Returns in the first +outcount locations of the array array_of_status the status for these +completed operations. If a request that completed was allocated by a +nonblocking communication call, then it is deallocated, and the +associated handle is set to MPI_REQUEST_NULL. +.sp +If no operation has completed it returns outcount = 0. If there is no +active handle in the list, it returns outcount = MPI_UNDEFINED. .sp MPI_Testsome is a local operation, which returns immediately, whereas MPI_Waitsome blocks until a communication completes, if it was passed a list that contains at least one active handle. Both calls fulfill a fairness requirement: If a request for a receive repeatedly appears in a list of requests passed to MPI_Waitsome or MPI_Testsome, and a matching send has been posted, then the receive will eventually succeed unless the send is satisfied by another receive; send requests also fulfill this fairness requirement. .sp diff --git a/ompi/mpi/tool/cvar_read.c b/ompi/mpi/tool/cvar_read.c index 2b7e839fd51..843ca493f82 100644 --- a/ompi/mpi/tool/cvar_read.c +++ b/ompi/mpi/tool/cvar_read.c @@ -56,7 +56,7 @@ int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf) ((unsigned long long *) buf)[0] = value->ullval; break; case MCA_BASE_VAR_TYPE_SIZE_T: - ((int *) buf)[0] = value->sizetval; + ((size_t *) buf)[0] = value->sizetval; break; case MCA_BASE_VAR_TYPE_BOOL: ((int *) buf)[0] = value->boolval; diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index c00576b2896..56397a28c22 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -14,8 +14,10 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,6 +52,7 @@ ompi_proc_t* ompi_proc_local_proc = NULL; static void ompi_proc_construct(ompi_proc_t* proc); static void ompi_proc_destruct(ompi_proc_t* proc); +static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name); OBJ_CLASS_INSTANCE( ompi_proc_t, @@ -84,10 +87,10 @@ void ompi_proc_destruct(ompi_proc_t* proc) if (NULL != proc->super.proc_hostname) { free(proc->super.proc_hostname); } - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc); opal_hash_table_remove_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name)); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); } /** @@ -126,14 +129,15 @@ static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t * retrieving the hostname (if below the modex cutoff), determining the * remote architecture, and calculating the locality of the process. */ -static int ompi_proc_complete_init_single (ompi_proc_t *proc) +int ompi_proc_complete_init_single (ompi_proc_t *proc) { uint16_t u16, *u16ptr; int ret; u16ptr = &u16; - if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid) { + if ((OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid == OMPI_PROC_MY_NAME->jobid) && + (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid)) { /* nothing else to do */ return OMPI_SUCCESS; } @@ -198,6 +202,33 @@ opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name) return NULL; } +static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name) +{ + ompi_proc_t *proc = NULL; + int ret; + + /* double-check that another competing thread has not added this proc */ + ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); + if (OPAL_SUCCESS == ret) { + goto exit; + } + + /* allocate a new ompi_proc_t object for the process and insert it into the process table */ + ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + /* allocation fail */ + goto exit; + } + + /* finish filling in the important proc data fields */ + ret = ompi_proc_complete_init_single (proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + goto exit; + } +exit: + return proc; +} + opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name) { ompi_proc_t *proc = NULL; @@ -209,29 +240,9 @@ opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name) return &proc->super; } - OPAL_THREAD_LOCK(&ompi_proc_lock); - do { - /* double-check that another competing thread has not added this proc */ - ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); - if (OPAL_SUCCESS == ret) { - break; - } - - /* allocate a new ompi_proc_t object for the process and insert it into the process table */ - ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - /* allocation fail */ - break; - } - - /* finish filling in the important proc data fields */ - ret = ompi_proc_complete_init_single (proc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - proc = NULL; - break; - } - } while (0); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); + proc = ompi_proc_for_name_nolock (proc_name); + opal_mutex_unlock (&ompi_proc_lock); return (opal_proc_t *) proc; } @@ -319,7 +330,7 @@ int ompi_proc_complete_init(void) ompi_proc_t *proc; int ret, errcode = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { ret = ompi_proc_complete_init_single (proc); @@ -328,7 +339,7 @@ int ompi_proc_complete_init(void) break; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); if (ompi_process_info.num_procs >= ompi_add_procs_cutoff) { uint16_t u16, *u16ptr; @@ -419,7 +430,7 @@ ompi_proc_t **ompi_proc_get_allocated (size_t *size) my_name = *OMPI_CAST_RTE_NAME(&ompi_proc_local_proc->super.proc_name); /* First count how many match this jobid */ - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, OMPI_CAST_RTE_NAME(&proc->super.proc_name), &my_name)) { ++count; @@ -429,7 +440,7 @@ ompi_proc_t **ompi_proc_get_allocated (size_t *size) /* allocate an array */ procs = (ompi_proc_t**) malloc(count * sizeof(ompi_proc_t*)); if (NULL == procs) { - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return NULL; } @@ -454,7 +465,7 @@ ompi_proc_t **ompi_proc_get_allocated (size_t *size) procs[count++] = proc; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); *size = count; return procs; @@ -518,7 +529,7 @@ ompi_proc_t** ompi_proc_all(size_t* size) return NULL; } - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { /* We know this isn't consistent with the behavior in ompi_proc_world, * but we are leaving the RETAIN for now because the code using this function @@ -529,7 +540,7 @@ ompi_proc_t** ompi_proc_all(size_t* size) OBJ_RETAIN(proc); procs[count++] = proc; } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); *size = count; return procs; } @@ -560,14 +571,14 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name ) /* return the proc-struct which matches this jobid+process id */ mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) { rproc = proc; break; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rproc; } @@ -579,7 +590,7 @@ int ompi_proc_refresh(void) ompi_vpid_t i = 0; int ret=OMPI_SUCCESS; - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { /* Does not change: proc->super.proc_name.vpid */ @@ -602,7 +613,7 @@ int ompi_proc_refresh(void) } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return ret; } @@ -612,8 +623,9 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf) { int rc; + char *nspace; - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); /* cycle through the provided array, packing the OMPI level * data for each proc. This data may or may not be included @@ -628,26 +640,44 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, * can be sent. */ for (int i = 0 ; i < proclistsize ; ++i) { - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_name), 1, OMPI_NAME); + ompi_proc_t *proc = proclist[i]; + + if (ompi_proc_is_sentinel (proc)) { + proc = ompi_proc_for_name_nolock (ompi_proc_sentinel_to_name ((uintptr_t) proc)); + } + + /* send proc name */ + rc = opal_dss.pack(buf, &(proc->super.proc_name), 1, OMPI_NAME); + if(rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + opal_mutex_unlock (&ompi_proc_lock); + return rc; + } + /* retrieve and send the corresponding nspace for this job + * as the remote side may not know the translation */ + nspace = (char*)opal_pmix.get_nspace(proc->super.proc_name.jobid); + rc = opal_dss.pack(buf, &nspace, 1, OPAL_STRING); if(rc != OPAL_SUCCESS) { OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rc; } - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_arch), 1, OPAL_UINT32); + /* pack architecture flag */ + rc = opal_dss.pack(buf, &(proc->super.proc_arch), 1, OPAL_UINT32); if(rc != OPAL_SUCCESS) { OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rc; } - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_hostname), 1, OPAL_STRING); + /* pass the name of the host this proc is on */ + rc = opal_dss.pack(buf, &(proc->super.proc_hostname), 1, OPAL_STRING); if(rc != OPAL_SUCCESS) { OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rc; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return OMPI_SUCCESS; } @@ -659,7 +689,7 @@ ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew) /* return the proc-struct which matches this jobid+process id */ mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) { rproc = proc; @@ -674,15 +704,10 @@ ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew) if (NULL == rproc) { *isnew = true; rproc = OBJ_NEW(ompi_proc_t); - if (NULL != rproc) { - opal_list_append(&ompi_proc_list, (opal_list_item_t*)rproc); - *OMPI_CAST_RTE_NAME(&rproc->super.proc_name) = *name; - } - /* caller had better fill in the rest of the proc, or there's - going to be pain later... */ + ompi_proc_allocate (name->jobid, name->vpid, &rproc); } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rproc; } @@ -719,6 +744,7 @@ ompi_proc_unpack(opal_buffer_t* buf, char *new_hostname; bool isnew = false; int rc; + char *nspace; rc = opal_dss.unpack(buf, &new_name, &count, OMPI_NAME); if (rc != OPAL_SUCCESS) { @@ -727,6 +753,15 @@ ompi_proc_unpack(opal_buffer_t* buf, free(newprocs); return rc; } + rc = opal_dss.unpack(buf, &nspace, &count, OPAL_STRING); + if (rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + free(plist); + free(newprocs); + return rc; + } + opal_pmix.register_jobid(new_name.jobid, nspace); + free(nspace); rc = opal_dss.unpack(buf, &new_arch, &count, OPAL_UINT32); if (rc != OPAL_SUCCESS) { OMPI_ERROR_LOG(rc); diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index 0117b3f0025..5e44ba75273 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -13,6 +13,8 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -120,6 +122,18 @@ OMPI_DECLSPEC int ompi_proc_init(void); */ OMPI_DECLSPEC int ompi_proc_complete_init(void); +/** + * Complete filling up the proc information (arch, name and locality) for + * a given proc. This function is to be called only after the modex exchange + * has been completed. + * + * @param[in] proc the proc whose information will be filled up + * + * @retval OMPI_SUCCESS All information correctly set. + * @retval OMPI_ERROR Some info could not be initialized. + */ +OMPI_DECLSPEC int ompi_proc_complete_init_single(ompi_proc_t* proc); + /** * Finalize the OMPI Process subsystem * @@ -367,16 +381,66 @@ static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc) return (intptr_t) proc & 0x1; } -static inline intptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) +#if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P +/* + * we assume an ompi_proc_t is at least aligned on two bytes, + * so if the LSB of a pointer to an ompi_proc_t is 1, we have to handle + * this pointer as a sentinel instead of a pointer. + * a sentinel can be seen as an uint64_t with the following format : + * - bit 0 : 1 + * - bits 1-15 : local jobid + * - bits 16-31 : job family + * - bits 32-63 : vpid + */ +static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) +{ + uintptr_t tmp, sentinel = 0; + /* local jobid must fit in 15 bits */ + assert(! (OMPI_LOCAL_JOBID(name.jobid) & 0x8000)); + sentinel |= 0x1; + tmp = (uintptr_t)OMPI_LOCAL_JOBID(name.jobid); + sentinel |= ((tmp << 1) & 0xfffe); + tmp = (uintptr_t)OMPI_JOB_FAMILY(name.jobid); + sentinel |= ((tmp << 16) & 0xffff0000); + tmp = (uintptr_t)name.vpid; + sentinel |= ((tmp << 32) & 0xffffffff00000000); + return sentinel; +} + +static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel) +{ + opal_process_name_t name; + uint32_t local, family; + uint32_t vpid; + assert(sentinel & 0x1); + local = (sentinel >> 1) & 0x7fff; + family = (sentinel >> 16) & 0xffff; + vpid = (sentinel >> 32) & 0xffffffff; + name.jobid = OMPI_CONSTRUCT_JOBID(family,local); + name.vpid = vpid; + return name; +} +#elif 4 == SIZEOF_VOID_P +/* + * currently, a sentinel is only made from the current jobid aka OMPI_PROC_MY_NAME->jobid + * so we only store the first 31 bits of the vpid + */ +static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) { - return (*((intptr_t *) &name) << 1) | 0x1; + assert(OMPI_PROC_MY_NAME->jobid == name.jobid); + return (uintptr_t)((name.vpid <<1) | 0x1); } -static inline opal_process_name_t ompi_proc_sentinel_to_name (intptr_t sentinel) +static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel) { - sentinel >>= 1; - return *((opal_process_name_t *) &sentinel); + opal_process_name_t name; + name.jobid = OMPI_PROC_MY_NAME->jobid; + name.vpid = sentinel >> 1; + return name; } +#else +#error unsupported pointer size +#endif END_C_DECLS diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 7638a01a9ea..0b768907bc3 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -16,6 +16,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,6 +42,7 @@ #include #include "opal/mca/backtrace/backtrace.h" +#include "opal/runtime/opal_params.h" #include "ompi/communicator/communicator.h" #include "ompi/runtime/mpiruntime.h" @@ -137,11 +140,11 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* Should we print a stack trace? Not aggregated because they might be different on all processes. */ - if (ompi_mpi_abort_print_stack) { + if (opal_abort_print_stack) { char **messages; int len, i; - if (OMPI_SUCCESS == opal_backtrace_buffer(&messages, &len)) { + if (OPAL_SUCCESS == opal_backtrace_buffer(&messages, &len)) { for (i = 0; i < len; ++i) { fprintf(stderr, "[%s:%d] [%d] func:%s\n", host, (int) pid, i, messages[i]); @@ -161,7 +164,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, if (errcode < 0 || asprintf(&msg, "[%s:%d] aborting with MPI error %s%s", host, (int) pid, ompi_mpi_errnum_get_string(errcode), - ompi_mpi_abort_print_stack ? + opal_abort_print_stack ? " (stack trace available on stderr)" : "") < 0) { msg = NULL; } @@ -172,9 +175,9 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* Should we wait for a while before aborting? */ - if (0 != ompi_mpi_abort_delay) { - if (ompi_mpi_abort_delay < 0) { - fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter mpi_abort_delay is < 0)\n", + if (0 != opal_abort_delay) { + if (opal_abort_delay < 0) { + fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter opal_abort_delay is < 0)\n", host, (int) pid); fflush(stderr); while (1) { @@ -182,10 +185,10 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, } } else { fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n", - host, (int) pid, ompi_mpi_abort_delay); + host, (int) pid, opal_abort_delay); do { sleep(1); - } while (--ompi_mpi_abort_delay > 0); + } while (--opal_abort_delay > 0); } } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 4c0391d970e..7b4444576cf 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -18,7 +18,7 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ @@ -98,7 +98,7 @@ #endif #include "ompi/runtime/ompi_cr.h" -#if defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 +#if defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 && MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT #include "opal/mca/memory/linux/memory_linux.h" /* So this sucks, but with OPAL in its own library that is brought in implicity from libmpi, there are times when the malloc initialize @@ -106,7 +106,7 @@ from here, since any MPI code is going to call MPI_Init... */ OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = opal_memory_linux_malloc_init_hook; -#endif +#endif /* defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 && MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT */ /* This is required for the boundaries of the hash tables used to store * the F90 types returned by the MPI_Type_create_f90_XXX functions. @@ -378,6 +378,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) size_t nprocs; char *error = NULL; char *cmd=NULL, *av=NULL; + ompi_errhandler_errtrk_t errtrk; OPAL_TIMING_DECLARE(tm); OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); @@ -504,11 +505,18 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } } - /* Register the default errhandler callback - RTE will ignore if it - * doesn't support this capability - */ - ompi_rte_register_errhandler(ompi_errhandler_runtime_callback, - OMPI_RTE_ERRHANDLER_LAST); + /* Register the default errhandler callback */ + errtrk.status = OPAL_ERROR; + errtrk.active = true; + opal_pmix.register_errhandler(NULL, ompi_errhandler_callback, + ompi_errhandler_registration_callback, + (void*)&errtrk); + OMPI_WAIT_FOR_COMPLETION(errtrk.active); + if (OPAL_SUCCESS != errtrk.status) { + error = "Error handler registration"; + ret = errtrk.status; + goto error; + } /* Figure out the final MPI thread levels. If we were not compiled for support for MPI threads, then don't allow @@ -639,10 +647,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs - * in our job, so no proc array is passed. If a barrier is required, - * the "modex" function will perform it internally - */ - OPAL_MODEX(NULL, 1); + * in our job. If a barrier is required, the "modex" function will + * perform it internally */ + OPAL_MODEX(); OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 68bbd94709f..5997231d189 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -10,11 +10,13 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,8 +55,6 @@ int ompi_debug_show_mpi_alloc_mem_leaks = 0; bool ompi_debug_no_free_handles = false; bool ompi_mpi_show_mca_params = false; char *ompi_mpi_show_mca_params_file = NULL; -bool ompi_mpi_abort_print_stack = false; -int ompi_mpi_abort_delay = 0; bool ompi_mpi_keep_fqdn_hostnames = false; bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE); bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE); @@ -64,7 +64,9 @@ int ompi_mpi_event_tick_rate = -1; char *ompi_mpi_show_mca_params_string = NULL; bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE); bool ompi_mpi_preconnect_mpi = false; -uint32_t ompi_add_procs_cutoff = 1024; + +#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0 +uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT; bool ompi_mpi_dynamics_enabled = true; static bool show_default_mca_params = false; @@ -206,33 +208,6 @@ int ompi_mpi_register_params(void) /* User-level process pinning controls */ - /* MPI_ABORT controls */ - ompi_mpi_abort_delay = 0; - (void) mca_base_var_register("ompi", "mpi", NULL, "abort_delay", - "If nonzero, print out an identifying message when MPI_ABORT is invoked (hostname, PID of the process that called MPI_ABORT) and delay for that many seconds before exiting (a negative delay value means to never abort). This allows attaching of a debugger before quitting the job.", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_mpi_abort_delay); - - ompi_mpi_abort_print_stack = false; - (void) mca_base_var_register("ompi", "mpi", NULL, "abort_print_stack", - "If nonzero, print out a stack trace when MPI_ABORT is invoked", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - /* If we do not have stack trace - capability, make this a constant - MCA variable */ -#if OPAL_WANT_PRETTY_PRINT_STACKTRACE - 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, -#else - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_CONSTANT, -#endif - &ompi_mpi_abort_print_stack); - ompi_mpi_preconnect_mpi = false; value = mca_base_var_register("ompi", "mpi", NULL, "preconnect_mpi", "Whether to force MPI processes to fully " @@ -290,12 +265,12 @@ int ompi_mpi_register_params(void) ompi_rte_abort(1, NULL); } - ompi_add_procs_cutoff = 1024; + ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT; (void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff", "Maximum world size for pre-allocating resources for all " "remote processes. Increasing this limit may improve " - "communication performance at the cost of memory usage " - "(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, + "communication performance at the cost of memory usage", + MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_add_procs_cutoff); @@ -307,6 +282,18 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_dynamics_enabled); + value = mca_base_var_find ("opal", "opal", NULL, "abort_delay"); + if (0 <= value) { + (void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay", + MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + } + + value = mca_base_var_find ("opal", "opal", NULL, "abort_print_stack"); + if (0 <= value) { + (void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_print_stack", + MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + } + return OMPI_SUCCESS; } diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index 9db001ef6f5..1b4a5aeac73 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -124,11 +124,6 @@ OMPI_DECLSPEC extern bool ompi_have_sparse_group_storage; */ OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage; -/** - * Cutoff point for retrieving hostnames - */ -OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff; - /** * Cutoff point for calling add_procs for all processes */ diff --git a/ompi/win/win.c b/ompi/win/win.c index a4629a3a420..6a371e69735 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -145,7 +145,6 @@ static int alloc_window(struct ompi_communicator_t *comm, ompi_info_t *info, int /* setup data that is independent of osc component */ group = comm->c_local_group; OBJ_RETAIN(group); - ompi_group_increment_proc_count(group); win->w_group = group; *win_out = win; @@ -366,7 +365,6 @@ ompi_win_get_name(ompi_win_t *win, char *win_name, int *length) int ompi_win_group(ompi_win_t *win, ompi_group_t **group) { OBJ_RETAIN(win->w_group); - ompi_group_increment_proc_count(win->w_group); *group = win->w_group; return OMPI_SUCCESS; @@ -406,7 +404,6 @@ ompi_win_destruct(ompi_win_t *win) } if (NULL != win->w_group) { - ompi_group_decrement_proc_count(win->w_group); OBJ_RELEASE(win->w_group); } diff --git a/opal/class/opal_free_list.c b/opal/class/opal_free_list.c index ff6a0c3f516..b509fe28407 100644 --- a/opal/class/opal_free_list.c +++ b/opal/class/opal_free_list.c @@ -121,6 +121,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_ return OPAL_ERROR; } + if (frag_class && frag_size < frag_class->cls_sizeof) { + frag_size = frag_class->cls_sizeof; + } + if (frag_size > flist->fl_frag_size) { flist->fl_frag_size = frag_size; } @@ -164,9 +168,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) return OPAL_ERR_TEMP_OUT_OF_RESOURCE; } - head_size = (NULL == flist->fl_mpool) ? flist->fl_frag_size: - flist->fl_frag_class->cls_sizeof; - head_size = OPAL_ALIGN(head_size, flist->fl_frag_alignment, size_t); + head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t); /* calculate head allocation size */ alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) + diff --git a/opal/class/opal_hotel.c b/opal/class/opal_hotel.c index 0fd8f1ea03f..2a02c7e552f 100644 --- a/opal/class/opal_hotel.c +++ b/opal/class/opal_hotel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ @@ -24,12 +24,22 @@ static void local_eviction_callback(int fd, short flags, void *arg) (opal_hotel_room_eviction_callback_arg_t*) arg; void *occupant = eargs->hotel->rooms[eargs->room_num].occupant; - /* Remove the occupant from the room and invoke the user callback - to tell them that they were evicted */ - opal_hotel_checkout(eargs->hotel, eargs->room_num); - eargs->hotel->evict_callback_fn(eargs->hotel, - eargs->room_num, - occupant); + /* Remove the occurpant from the room. + + Do not change this logic without also changing the same logic + in opal_hotel_checkout() and + opal_hotel_checkout_and_return_occupant(). */ + opal_hotel_t *hotel = eargs->hotel; + opal_hotel_room_t *room = &(hotel->rooms[eargs->room_num]); + room->occupant = NULL; + hotel->last_unoccupied_room++; + assert(hotel->last_unoccupied_room < hotel->num_rooms); + hotel->unoccupied_rooms[hotel->last_unoccupied_room] = eargs->room_num; + + /* Invoke the user callback to tell them that they were evicted */ + hotel->evict_callback_fn(hotel, + eargs->room_num, + occupant); } diff --git a/opal/class/opal_hotel.h b/opal/class/opal_hotel.h index 23aec8d3ca0..d4b25f24aca 100644 --- a/opal/class/opal_hotel.h +++ b/opal/class/opal_hotel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -146,6 +146,11 @@ OBJ_CLASS_DECLARATION(opal_hotel_t); * will be set - occupants will remain checked into the hotel until * explicitly checked out. * + * Also note: the eviction_callback_fn should absolutely not call any + * of the hotel checkout functions. Specifically: the occupant has + * already been ("forcibly") checked out *before* the + * eviction_callback_fn is invoked. + * * @return OPAL_SUCCESS if all initializations were succesful. Otherwise, * the error indicate what went wrong in the function. */ @@ -244,6 +249,9 @@ static inline void opal_hotel_checkout(opal_hotel_t *hotel, int room_num) /* If there's an occupant in the room, check them out */ room = &(hotel->rooms[room_num]); if (OPAL_LIKELY(NULL != room->occupant)) { + /* Do not change this logic without also changing the same + logic in opal_hotel_checkout_and_return_occupant() and + opal_hotel.c:local_eviction_callback(). */ room->occupant = NULL; if (NULL != hotel->evbase) { opal_event_del(&(room->eviction_timer_event)); @@ -280,6 +288,9 @@ static inline void opal_hotel_checkout_and_return_occupant(opal_hotel_t *hotel, room = &(hotel->rooms[room_num]); if (OPAL_LIKELY(NULL != room->occupant)) { opal_output (10, "checking out occupant %p from room num %d", room->occupant, room_num); + /* Do not change this logic without also changing the same + logic in opal_hotel_checkout() and + opal_hotel.c:local_eviction_callback(). */ *occupant = room->occupant; room->occupant = NULL; if (NULL != hotel->evbase) { diff --git a/opal/class/opal_list.c b/opal/class/opal_list.c index ed20659a089..e0a5112c38a 100644 --- a/opal/class/opal_list.c +++ b/opal/class/opal_list.c @@ -97,17 +97,11 @@ static void opal_list_construct(opal_list_t *list) /* - * Release the list items in the list. - * Reset list pointers to be NULL + * Reset all the pointers to be NULL -- do not actually destroy + * anything. */ static void opal_list_destruct(opal_list_t *list) { - opal_list_item_t *it; - - while (NULL != (it = opal_list_remove_first(list))) { - OBJ_RELEASE(it); - } - opal_list_construct(list); } diff --git a/opal/class/opal_list.h b/opal/class/opal_list.h index dd21763dbb8..bb80afaf491 100644 --- a/opal/class/opal_list.h +++ b/opal/class/opal_list.h @@ -168,22 +168,26 @@ typedef struct opal_list_t opal_list_t; * * @param[in] list List to destruct or release */ -#define OPAL_LIST_DESTRUCT(list) \ - do { \ - opal_list_item_t *it; \ - while (NULL != (it = opal_list_remove_first(list))) { \ - OBJ_RELEASE(it); \ - } \ - OBJ_DESTRUCT(list); \ +#define OPAL_LIST_DESTRUCT(list) \ + do { \ + opal_list_item_t *it; \ + if (1 == ((opal_object_t*)(list))->obj_reference_count) { \ + while (NULL != (it = opal_list_remove_first(list))) { \ + OBJ_RELEASE(it); \ + } \ + } \ + OBJ_DESTRUCT(list); \ } while(0); -#define OPAL_LIST_RELEASE(list) \ - do { \ - opal_list_item_t *it; \ - while (NULL != (it = opal_list_remove_first(list))) { \ - OBJ_RELEASE(it); \ - } \ - OBJ_RELEASE(list); \ +#define OPAL_LIST_RELEASE(list) \ + do { \ + opal_list_item_t *it; \ + if (1 == ((opal_object_t*)(list))->obj_reference_count) { \ + while (NULL != (it = opal_list_remove_first(list))) { \ + OBJ_RELEASE(it); \ + } \ + } \ + OBJ_RELEASE(list); \ } while(0); diff --git a/opal/common_sym_whitelist.txt b/opal/common_sym_whitelist.txt new file mode 100644 index 00000000000..d3c525b54d9 --- /dev/null +++ b/opal/common_sym_whitelist.txt @@ -0,0 +1,6 @@ +# Ignore symbols in libopen-pal that are auto-generated and we can't +# do anything about them (e.g., flex/bison symbols). +opal_show_help_yyleng +opal_show_help_yytext +opal_util_keyval_yyleng +opal_util_keyval_yytext diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 1c10efd1aa8..d5481283183 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -474,7 +474,10 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } #else #define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ - assert(0 == (bdt_mask)) +{ \ + assert(0 == (bdt_mask)); \ + (void)bdt_mask; /* silence compiler warning */ \ +} #endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ /** diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index 8ff6d1644d1..956a1d46bcb 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -330,7 +330,7 @@ COPY_TYPE_HETEROGENEOUS( float12, long double ) #if SIZEOF_FLOAT == 16 COPY_TYPE_HETEROGENEOUS( float16, float ) -#elif SIZEOF_DOUBLE == 8 +#elif SIZEOF_DOUBLE == 16 COPY_TYPE_HETEROGENEOUS( float16, double ) #elif HAVE_LONG_DOUBLE && SIZEOF_LONG_DOUBLE == 16 COPY_TYPE_HETEROGENEOUS( float16, long double ) diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index cf00a690c56..25f014ead0d 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -329,6 +329,25 @@ OPAL_DECLSPEC opal_datatype_t* opal_datatype_create_from_packed_description( void** packed_buffer, struct opal_proc_t* remote_processor ); +/* Compute the span in memory of count datatypes. This function help with temporary + * memory allocations for receiving already typed data (such as those used for reduce + * operations). This span is the distance between the minimum and the maximum byte + * in the memory layout of count datatypes, or in other terms the memory needed to + * allocate count times the datatype without the gap in the beginning and at the end. + * + * Returns: the memory span of count repetition of the datatype, and in the gap + * argument, the number of bytes of the gap at the beginning. + */ +static inline OPAL_PTRDIFF_TYPE +opal_datatype_span( const opal_datatype_t* pData, int64_t count, + OPAL_PTRDIFF_TYPE* gap) +{ + OPAL_PTRDIFF_TYPE extent = (pData->ub - pData->lb); + OPAL_PTRDIFF_TYPE true_extent = (pData->true_ub - pData->true_lb); + *gap = pData->true_lb; + return true_extent + (count - 1) * extent; +} + #if OPAL_ENABLE_DEBUG /* * Set a breakpoint to this function in your favorite debugger diff --git a/opal/datatype/opal_datatype_copy.c b/opal/datatype/opal_datatype_copy.c index 700bad3ac20..1e2c5f70000 100644 --- a/opal/datatype/opal_datatype_copy.c +++ b/opal/datatype/opal_datatype_copy.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,9 +25,6 @@ #include "opal_config.h" #include -#ifdef HAVE_ALLOCA_H -#include -#endif #include #include "opal/prefetch.h" diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 7519674765c..d4ed216a5d3 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -4,6 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,6 +13,10 @@ * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #if !defined(MEM_OP_NAME) #error #endif /* !defined((MEM_OP_NAME) */ diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index b52719bcfc3..5b66e4df595 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -11,7 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,9 +24,6 @@ #include "opal_config.h" #include -#ifdef HAVE_ALLOCA_H -#include -#endif #include #include "opal/datatype/opal_datatype.h" diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index 0e1907c32b3..f5e51b86f9d 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -26,10 +26,6 @@ #include #include -#ifdef HAVE_ALLOCA_H -#include -#endif - #include "opal/datatype/opal_datatype.h" #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_datatype_internal.h" diff --git a/opal/datatype/opal_datatype_resize.c b/opal/datatype/opal_datatype_resize.c index 71347d0b5de..b239c675b02 100644 --- a/opal/datatype/opal_datatype_resize.c +++ b/opal/datatype/opal_datatype_resize.c @@ -4,6 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,9 +23,6 @@ int32_t opal_datatype_resize( opal_datatype_t* type, OPAL_PTRDIFF_TYPE lb, OPAL_ type->lb = lb; type->ub = lb + extent; - type->true_lb += lb; - type->true_ub += lb; - type->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; if( (extent == (OPAL_PTRDIFF_TYPE)type->size) && (type->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) { diff --git a/opal/dss/dss_compare.c b/opal/dss/dss_compare.c index 0329d2b03ba..20ae1f0fe75 100644 --- a/opal/dss/dss_compare.c +++ b/opal/dss/dss_compare.c @@ -10,9 +10,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -305,7 +305,64 @@ int opal_dss_compare_node_stat(opal_node_stats_t *value1, opal_node_stats_t *val /* OPAL_VALUE */ int opal_dss_compare_value(opal_value_t *value1, opal_value_t *value2, opal_data_type_t type) { - return OPAL_EQUAL; /* eventually compare field to field */ + if (NULL == value1 && NULL == value2) { + return OPAL_EQUAL; + } + if (NULL == value2) { + return OPAL_VALUE1_GREATER; + } + if (NULL == value1) { + return OPAL_VALUE2_GREATER; + } + if (value1->type != value2->type) { + opal_output(0, "COMPARE-OPAL-VALUE: INCONSISTENT TYPE %d vs %d", (int)value1->type, (int)value2->type); + return OPAL_EQUAL; + } + switch (value1->type) { + case OPAL_BYTE: + return opal_dss_compare_byte((char *)&value1->data.byte, (char *)&value2->data.byte, type); + case OPAL_STRING: + return opal_dss_compare_string(value1->data.string, value2->data.string, type); + case OPAL_PID: + return opal_dss_compare_pid(&value1->data.pid, &value2->data.pid, type); + case OPAL_INT: + return opal_dss_compare_int(&value1->data.integer, &value2->data.integer, type); + case OPAL_INT8: + return opal_dss_compare_int8(&value1->data.int8, &value2->data.int8, type); + case OPAL_INT16: + return opal_dss_compare_int16(&value1->data.int16, &value2->data.int16, type); + case OPAL_INT32: + return opal_dss_compare_int32(&value1->data.int32, &value2->data.int32, type); + case OPAL_INT64: + return opal_dss_compare_int64(&value1->data.int64, &value2->data.int64, type); + case OPAL_UINT: + return opal_dss_compare_uint(&value1->data.uint, &value2->data.uint, type); + case OPAL_UINT8: + return opal_dss_compare_uint8(&value1->data.uint8, &value2->data.uint8, type); + case OPAL_UINT16: + return opal_dss_compare_uint16(&value1->data.uint16, &value2->data.uint16, type); + case OPAL_UINT32: + return opal_dss_compare_uint32(&value1->data.uint32, &value2->data.uint32, type); + case OPAL_UINT64: + return opal_dss_compare_uint64(&value1->data.uint64, &value2->data.uint64, type); + case OPAL_BYTE_OBJECT: + return opal_dss_compare_byte_object(&value1->data.bo, &value2->data.bo, type); + case OPAL_SIZE: + return opal_dss_compare_size(&value1->data.size, &value2->data.size, type); + case OPAL_FLOAT: + return opal_dss_compare_float(&value1->data.fval, &value2->data.fval, type); + case OPAL_DOUBLE: + return opal_dss_compare_double(&value1->data.dval, &value2->data.dval, type); + case OPAL_BOOL: + return opal_dss_compare_bool(&value1->data.flag, &value2->data.flag, type); + case OPAL_TIMEVAL: + return opal_dss_compare_timeval(&value1->data.tv, &value2->data.tv, type); + case OPAL_NAME: + return opal_dss_compare_name(&value1->data.name, &value2->data.name, type); + default: + opal_output(0, "COMPARE-OPAL-VALUE: UNSUPPORTED TYPE %d", (int)value1->type); + return OPAL_EQUAL; + } } /* OPAL_BUFFER */ @@ -392,3 +449,12 @@ int opal_dss_compare_jobid(opal_jobid_t *value1, return OPAL_EQUAL; } +int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + diff --git a/opal/dss/dss_copy.c b/opal/dss/dss_copy.c index f0d1544ba59..839ddc648b9 100644 --- a/opal/dss/dss_copy.c +++ b/opal/dss/dss_copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -61,6 +61,7 @@ int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type) case OPAL_INT: case OPAL_UINT: + case OPAL_STATUS: datasize = sizeof(int); break; diff --git a/opal/dss/dss_internal.h b/opal/dss/dss_internal.h index b00a37ffd0d..2c1e3af73b2 100644 --- a/opal/dss/dss_internal.h +++ b/opal/dss/dss_internal.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -332,6 +332,9 @@ int opal_dss_pack_jobid(opal_buffer_t *buffer, const void *src, int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type); +int opal_dss_pack_status(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + /* * Internal unpack functions */ @@ -401,6 +404,8 @@ int opal_dss_unpack_jobid(opal_buffer_t *buffer, void *dest, int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type); +int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); /* * Internal copy functions @@ -497,6 +502,8 @@ int opal_dss_compare_jobid(opal_jobid_t *value1, opal_jobid_t *value2, opal_data_type_t type); +int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type); + /* * Internal print functions */ @@ -536,6 +543,7 @@ int opal_dss_print_time(char **output, char *prefix, time_t *src, opal_data_type int opal_dss_print_name(char **output, char *prefix, opal_process_name_t *name, opal_data_type_t type); int opal_dss_print_jobid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type); int opal_dss_print_vpid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type); +int opal_dss_print_status(char **output, char *prefix, int *src, opal_data_type_t type); /* diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c index 628806b570e..366cf2586a9 100644 --- a/opal/dss/dss_open_close.c +++ b/opal/dss/dss_open_close.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -611,6 +611,17 @@ int opal_dss_open(void) return rc; } + + tmp = OPAL_STATUS; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_status, + opal_dss_unpack_status, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_status, + (opal_dss_print_fn_t)opal_dss_print_status, + OPAL_DSS_UNSTRUCTURED, + "OPAL_STATUS", &tmp))) { + return rc; + } /* All done */ opal_dss_initialized = true; diff --git a/opal/dss/dss_pack.c b/opal/dss/dss_pack.c index a68ad12930c..396c351d256 100644 --- a/opal/dss/dss_pack.c +++ b/opal/dss/dss_pack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -1240,3 +1240,20 @@ int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src, return ret; } +/* + * STATUS + */ +int opal_dss_pack_status(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* Turn around and pack the real type */ + ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_INT); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + } + + return ret; +} + diff --git a/opal/dss/dss_print.c b/opal/dss/dss_print.c index ece4572eec4..f8e413efead 100644 --- a/opal/dss/dss_print.c +++ b/opal/dss/dss_print.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -25,6 +25,7 @@ #include "opal_stdint.h" #include +#include "opal/util/error.h" #include "opal/dss/dss_internal.h" int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type) @@ -1060,3 +1061,29 @@ int opal_dss_print_vpid(char **output, char *prefix, return OPAL_SUCCESS; } + +int opal_dss_print_status(char **output, char *prefix, + int *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_STATUS\tValue: NULL pointer", prefx); + if (prefx != prefix) { + free(prefx); + } + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_STATUS\tValue: %s", prefx, opal_strerror(*src)); + if (prefx != prefix) { + free(prefx); + } + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h index 8c1bad91efd..8424d2b9084 100644 --- a/opal/dss/dss_types.h +++ b/opal/dss/dss_types.h @@ -13,9 +13,9 @@ * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,6 +47,7 @@ typedef struct { opal_jobid_t jobid; opal_vpid_t vpid; } opal_process_name_t; +#define OPAL_SIZEOF_PROCESS_NAME_T 8 BEGIN_C_DECLS @@ -119,6 +120,8 @@ typedef struct { #define OPAL_NAME (opal_data_type_t) 50 #define OPAL_JOBID (opal_data_type_t) 51 #define OPAL_VPID (opal_data_type_t) 52 +#define OPAL_STATUS (opal_data_type_t) 53 + /* OPAL Dynamic */ #define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 100 @@ -244,6 +247,7 @@ typedef struct { float fval; double dval; struct timeval tv; + int status; opal_process_name_t name; opal_bool_array_t flag_array; opal_uint8_array_t byte_array; diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c index 99e62d097f7..4f66e5aacb2 100644 --- a/opal/dss/dss_unpack.c +++ b/opal/dss/dss_unpack.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -1519,3 +1519,20 @@ int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest, return ret; } + +/* + * STATUS + */ +int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + + /* Turn around and unpack the real type */ + ret = opal_dss_unpack_buffer(buffer, dest, num_vals, OPAL_INT); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + } + + return ret; +} diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 82c046946f8..bb0a575d501 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,7 +80,8 @@ enum { OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50), OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51), OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52), - OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53) + OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53), + OPAL_ERR_DEBUGGER_RELEASE = (OPAL_ERR_BASE - 54) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/base/base.h b/opal/mca/base/base.h index 8eb29408f99..1fdcbd899d7 100644 --- a/opal/mca/base/base.h +++ b/opal/mca/base/base.h @@ -13,6 +13,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,6 +37,7 @@ #include "opal/mca/base/mca_base_var.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/util/cmd_line.h" +#include "opal/util/output.h" BEGIN_C_DECLS diff --git a/opal/mca/base/mca_base_component_repository.c b/opal/mca/base/mca_base_component_repository.c index f98e484fa9b..f1497f68360 100644 --- a/opal/mca/base/mca_base_component_repository.c +++ b/opal/mca/base/mca_base_component_repository.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #include "opal/class/opal_list.h" #include "opal/mca/mca.h" @@ -280,30 +283,57 @@ static void mca_base_component_repository_release_internal (mca_base_component_r } #endif -void mca_base_component_repository_release(const mca_base_component_t *component) -{ #if OPAL_HAVE_DL_SUPPORT +static mca_base_component_repository_item_t *find_component (const char *type, const char *name) +{ mca_base_component_repository_item_t *ri; opal_list_t *component_list; int ret; - ret = opal_hash_table_get_value_ptr (&mca_base_component_repository, component->mca_type_name, - strlen (component->mca_type_name), (void **) &component_list); + ret = opal_hash_table_get_value_ptr (&mca_base_component_repository, type, + strlen (type), (void **) &component_list); if (OPAL_SUCCESS != ret) { /* component does not exist in the repository */ - return; + return NULL; } OPAL_LIST_FOREACH(ri, component_list, mca_base_component_repository_item_t) { - if (0 == strcmp (ri->ri_name, component->mca_component_name)) { - /* go ahead and dlclose the component if it is open */ - mca_base_component_repository_release_internal (ri); - break; + if (0 == strcmp (ri->ri_name, name)) { + return ri; } } + + return NULL; +} +#endif + +void mca_base_component_repository_release(const mca_base_component_t *component) +{ +#if OPAL_HAVE_DL_SUPPORT + mca_base_component_repository_item_t *ri; + + ri = find_component (component->mca_type_name, component->mca_component_name); + if (NULL != ri && !(--ri->ri_refcnt)) { + mca_base_component_repository_release_internal (ri); + } #endif } +int mca_base_component_repository_retain_component (const char *type, const char *name) +{ +#if OPAL_HAVE_DL_SUPPORT + mca_base_component_repository_item_t *ri = find_component(type, name); + + if (NULL != ri) { + ++ri->ri_refcnt; + return OPAL_SUCCESS; + } + + return OPAL_ERR_NOT_FOUND; +#else + return OPAL_ERR_NOT_SUPPORTED; +#endif +} int mca_base_component_repository_open (mca_base_framework_t *framework, mca_base_component_repository_item_t *ri) @@ -443,6 +473,7 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, component to be closed later. */ ri->ri_component_struct = mitem->cli_component = component_struct; + ri->ri_refcnt = 1; opal_list_append(&framework->framework_components, &mitem->super); opal_output_verbose (MCA_BASE_VERBOSE_INFO, 0, "mca_base_component_repository_open: opened dynamic %s MCA " diff --git a/opal/mca/base/mca_base_component_repository.h b/opal/mca/base/mca_base_component_repository.h index d480bb8cf4b..290c83c83c3 100644 --- a/opal/mca/base/mca_base_component_repository.h +++ b/opal/mca/base/mca_base_component_repository.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,6 +53,8 @@ struct mca_base_component_repository_item_t { opal_dl_handle_t *ri_dlhandle; const mca_base_component_t *ri_component_struct; + + int ri_refcnt; }; typedef struct mca_base_component_repository_item_t mca_base_component_repository_item_t; @@ -102,7 +107,25 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, mca_base_component_repository_item_t *ri); -void mca_base_component_repository_release(const mca_base_component_t *component); +/** + * @brief Reduce the reference count of a component and dlclose it if necessary + */ +void mca_base_component_repository_release (const mca_base_component_t *component); + +/** + * @brief Increase the reference count of a component + * + * Each component repository item starts with a reference count of 0. This ensures that + * when a framework closes it's components the repository items are all correctly + * dlclosed. This function can be used to prevent the dlclose if a component is needed + * after its framework has closed the associated component. Users of this function + * should call mca_base_component_repository_release() once they are finished with the + * component. + * + * @note all components are automatically unloaded by the + * mca_base_component_repository_finalize() call. + */ +int mca_base_component_repository_retain_component (const char *type, const char *name); END_C_DECLS diff --git a/opal/mca/base/mca_base_pvar.c b/opal/mca/base/mca_base_pvar.c index b7cf0e2c768..e5bda920378 100644 --- a/opal/mca/base/mca_base_pvar.c +++ b/opal/mca/base/mca_base_pvar.c @@ -707,7 +707,8 @@ int mca_base_pvar_handle_write_value (mca_base_pvar_handle_t *handle, const void return OPAL_ERR_PERM; } - /* TODO -- actually write the variable. this will likely require a pvar lock */ + /* write the value directly from the variable. */ + ret = handle->pvar->set_value (handle->pvar, value, handle->obj_handle); ret = mca_base_pvar_handle_update (handle); if (OPAL_SUCCESS != ret) { diff --git a/opal/mca/base/mca_base_var_group.c b/opal/mca/base/mca_base_var_group.c index 6d4ea00637a..bbbd6166b03 100644 --- a/opal/mca/base/mca_base_var_group.c +++ b/opal/mca/base/mca_base_var_group.c @@ -274,12 +274,6 @@ static int group_register (const char *project_name, const char *framework_name, } } - /* avoid groups of the form opal_opal, ompi_ompi, etc */ - if (NULL != project_name && NULL != framework_name && - (0 == strcmp (project_name, framework_name))) { - project_name = NULL; - } - /* build the group name */ ret = mca_base_var_generate_full_name4 (NULL, project_name, framework_name, component_name, &group->group_full_name); diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index f138d9fa61a..2f858d36c9b 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -11,8 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -210,7 +210,6 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) return OPAL_ERROR; } - OPAL_THREAD_LOCK(&device->device_lock); if (!device->progress) { int rc; device->progress = true; @@ -219,7 +218,6 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) return rc; } } - OPAL_THREAD_UNLOCK(&device->device_lock); #endif } #ifdef HAVE_IBV_RESIZE_CQ @@ -356,8 +354,10 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) } else #endif { + opal_mutex_lock(&openib_btl->device->device_lock); openib_btl->qps[qp].u.srq_qp.srq = ibv_create_srq(openib_btl->device->ib_pd, &attr); + opal_mutex_unlock(&openib_btl->device->device_lock); } if (NULL == openib_btl->qps[qp].u.srq_qp.srq) { mca_btl_openib_show_init_error(__FILE__, __LINE__, @@ -403,15 +403,32 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) } } + openib_btl->srqs_created = true; + return OPAL_SUCCESS; } -static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs) +static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl) +{ + int rc = OPAL_SUCCESS; + opal_mutex_lock(&openib_btl->ib_lock); + if (!openib_btl->srqs_created && + (mca_btl_openib_component.num_srq_qps > 0 || + mca_btl_openib_component.num_xrc_qps > 0)) { + rc = create_srq(openib_btl); + } + opal_mutex_unlock(&openib_btl->ib_lock); + return rc; +} + + +static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl) { uint32_t send_cqes, recv_cqes; int rc = OPAL_SUCCESS, qp; mca_btl_openib_device_t *device = openib_btl->device; + opal_mutex_lock(&openib_btl->ib_lock); /* figure out reasonable sizes for completion queues */ for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { if(BTL_OPENIB_QP_TYPE_SRQ(qp)) { @@ -419,11 +436,14 @@ static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num; } else { send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num + - mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs; + mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * openib_btl->num_peers; recv_cqes = send_cqes; } + + opal_mutex_lock(&openib_btl->device->device_lock); openib_btl->device->cq_size[qp_cq_prio(qp)] += recv_cqes; openib_btl->device->cq_size[BTL_OPENIB_LP_CQ] += send_cqes; + opal_mutex_unlock(&openib_btl->device->device_lock); } rc = adjust_cq(device, BTL_OPENIB_HP_CQ); @@ -436,14 +456,8 @@ static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl goto out; } - if (0 == openib_btl->num_peers && - (mca_btl_openib_component.num_srq_qps > 0 || - mca_btl_openib_component.num_xrc_qps > 0)) { - rc = create_srq(openib_btl); - } - - openib_btl->num_peers += nprocs; out: + opal_mutex_unlock(&openib_btl->ib_lock); return rc; } @@ -606,10 +620,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, static int prepare_device_for_use (mca_btl_openib_device_t *device) { mca_btl_openib_frag_init_data_t *init_data; - int rc, length; + int rc = OPAL_SUCCESS, length; + + opal_mutex_lock(&device->device_lock); if (device->ready_for_use) { - return OPAL_SUCCESS; + goto exit; } /* For each btl module that we made - find every @@ -630,7 +646,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) sizeof(mca_btl_openib_device_qp_t)); if (NULL == device->qps) { BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__)); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } for (int qp_index = 0 ; qp_index < mca_btl_openib_component.num_qps ; qp_index++) { @@ -662,13 +679,15 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) mca_btl_openib_component.num_xrc_qps, ibv_get_device_name(device->ib_dev), opal_process_info.nodename); - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } if (MCA_BTL_XRC_ENABLED) { if (OPAL_SUCCESS != mca_btl_openib_open_xrc_domain(device)) { BTL_ERROR(("XRC Internal error. Failed to open xrc domain")); - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } } #endif @@ -683,7 +702,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) sizeof(mca_btl_openib_endpoint_t*)); if(NULL == device->eager_rdma_buffers) { BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } } @@ -696,7 +716,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) device->eager_rdma_buffers = NULL; } BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } length = sizeof(mca_btl_openib_header_t) + @@ -724,7 +745,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) "opal_free_list_init", ibv_get_device_name(device->ib_dev)); } - return rc; + goto exit; } /* setup all the qps */ @@ -732,7 +753,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t)); if (NULL == init_data) { BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } /* Initialize pool of send fragments */ @@ -765,7 +787,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) "opal_free_list_init", ibv_get_device_name(device->ib_dev)); } - return OPAL_ERROR; + goto exit; } init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t)); @@ -787,15 +809,184 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) mca_btl_openib_component.ib_free_list_inc, device->mpool, 0, NULL, mca_btl_openib_frag_init, init_data)) { - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } } device->ready_for_use = true; +exit: + opal_mutex_unlock(&device->device_lock); + return rc; +} + +static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_openib_proc_t* ib_proc, + volatile mca_btl_base_endpoint_t **endpoint_ptr, + int local_port_cnt, int btl_rank) +{ + int rem_port_cnt, matching_port = -1, j, rc; + mca_btl_base_endpoint_t *endpoint; + opal_btl_openib_connect_base_module_t *local_cpc; + opal_btl_openib_connect_base_module_data_t *remote_cpc_data; + + *endpoint_ptr = NULL; + + /* check if the remote proc has any ports that: + - on the same subnet as the local proc, and + - on that subnet, has a CPC in common with the local proc + */ + + rem_port_cnt = 0; + BTL_VERBOSE(("got %d port_infos ", ib_proc->proc_port_count)); + for (j = 0; j < (int) ib_proc->proc_port_count; j++){ + BTL_VERBOSE(("got a subnet %016" PRIx64, + ib_proc->proc_ports[j].pm_port_info.subnet_id)); + if (ib_proc->proc_ports[j].pm_port_info.subnet_id == + openib_btl->port_info.subnet_id) { + BTL_VERBOSE(("Got a matching subnet!")); + if (rem_port_cnt == btl_rank) { + matching_port = j; + } + rem_port_cnt++; + } + } + + if (0 == rem_port_cnt) { + /* no use trying to communicate with this endpoint */ + BTL_VERBOSE(("No matching subnet id/CPC was found, moving on.. ")); + return OPAL_ERROR; + } + + /* If this process has multiple ports on a single subnet ID, + and the report proc also has multiple ports on this same + subnet ID, the default connection pattern is: + + LOCAL REMOTE PEER + 1st port on subnet X <--> 1st port on subnet X + 2nd port on subnet X <--> 2nd port on subnet X + 3nd port on subnet X <--> 3nd port on subnet X + ...etc. + + Note that the port numbers may not be contiguous, and they + may not be the same on either side. Hence the "1st", "2nd", + "3rd, etc. notation, above. + + Hence, if the local "rank" of this module's port on the + subnet ID is greater than the total number of ports on the + peer on this same subnet, then we have no match. So skip + this connection. */ + if (rem_port_cnt < local_port_cnt && btl_rank >= rem_port_cnt) { + BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. ")); + return OPAL_ERROR; + } + + /* Now that we have verified that we're on the same subnet and + the remote peer has enough ports, see if that specific port + on the peer has a matching CPC. */ + assert(btl_rank <= ib_proc->proc_port_count); + assert(matching_port != -1); + if (OPAL_SUCCESS != + opal_btl_openib_connect_base_find_match(openib_btl, + &(ib_proc->proc_ports[matching_port]), + &local_cpc, + &remote_cpc_data)) { + return OPAL_ERROR; + } + + /* The btl_proc datastructure is shared by all IB BTL + * instances that are trying to reach this destination. + * Cache the peer instance on the btl_proc. + */ + endpoint = OBJ_NEW(mca_btl_openib_endpoint_t); + assert(((opal_object_t*)endpoint)->obj_reference_count == 1); + if(NULL == endpoint) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED) { + int rem_port_cnt = 0; + for(j = 0; j < (int) ib_proc->proc_port_count; j++) { + if(ib_proc->proc_ports[j].pm_port_info.subnet_id == + openib_btl->port_info.subnet_id) { + if (rem_port_cnt == btl_rank) + break; + else + rem_port_cnt ++; + } + } + + assert(rem_port_cnt == btl_rank); + /* Push the subnet/lid/jobid to xrc hash */ + rc = mca_btl_openib_ib_address_add_new( + ib_proc->proc_ports[j].pm_port_info.lid, + ib_proc->proc_ports[j].pm_port_info.subnet_id, + ib_proc->proc_opal->proc_name.jobid, endpoint); + if (OPAL_SUCCESS != rc ) { + return OPAL_ERROR; + } + } +#endif + mca_btl_openib_endpoint_init(openib_btl, endpoint, + local_cpc, + &(ib_proc->proc_ports[matching_port]), + remote_cpc_data); + + rc = mca_btl_openib_proc_insert(ib_proc, endpoint); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + + if(OPAL_SUCCESS != mca_btl_openib_tune_endpoint(openib_btl, endpoint)) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + + /* protect device because several endpoints for different ib_proc's + * may be simultaneously initialized */ + opal_mutex_lock(&openib_btl->device->device_lock); + endpoint->index = opal_pointer_array_add(openib_btl->device->endpoints, (void*)endpoint); + opal_mutex_unlock(&openib_btl->device->device_lock); + + if( 0 > endpoint->index ) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + + /* Tell the selected CPC that it won. NOTE: This call is + outside of / separate from mca_btl_openib_endpoint_init() + because this function likely needs the endpoint->index. */ + if (NULL != local_cpc->cbm_endpoint_init) { + rc = local_cpc->cbm_endpoint_init(endpoint); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + } + + *endpoint_ptr = endpoint; return OPAL_SUCCESS; } +static int get_openib_btl_params(mca_btl_openib_module_t* openib_btl, int *port_cnt_ptr) +{ + int port_cnt = 0, rank = -1, j; + for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){ + if(mca_btl_openib_component.openib_btls[j]->port_info.subnet_id + == openib_btl->port_info.subnet_id) { + if(openib_btl == mca_btl_openib_component.openib_btls[j]) { + rank = port_cnt; + } + port_cnt++; + } + } + *port_cnt_ptr = port_cnt; + return rank; +} + /* * add a proc to this btl module * creates an endpoint that is setup on the @@ -809,22 +1000,15 @@ int mca_btl_openib_add_procs( opal_bitmap_t* reachable) { mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl; - int i,j, rc, local_procs; - int rem_subnet_id_port_cnt; + size_t nprocs_new_loc = 0, nprocs_new = 0; + int i,j, rc; int lcl_subnet_id_port_cnt = 0; int btl_rank = 0; - mca_btl_base_endpoint_t* endpoint; - opal_btl_openib_connect_base_module_t *local_cpc; - opal_btl_openib_connect_base_module_data_t *remote_cpc_data; + volatile mca_btl_base_endpoint_t* endpoint; - for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){ - if(mca_btl_openib_component.openib_btls[j]->port_info.subnet_id - == openib_btl->port_info.subnet_id) { - if(openib_btl == mca_btl_openib_component.openib_btls[j]) { - btl_rank = lcl_subnet_id_port_cnt; - } - lcl_subnet_id_port_cnt++; - } + btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt); + if( 0 > btl_rank ){ + return OPAL_ERR_NOT_FOUND; } #if HAVE_XRC @@ -844,23 +1028,20 @@ int mca_btl_openib_add_procs( return rc; } - rc = mca_btl_openib_size_queues(openib_btl, nprocs); - if (OPAL_SUCCESS != rc) { - BTL_ERROR(("error creating cqs")); - return rc; + if (0 == openib_btl->num_peers) { + /* ensure completion queues are created before attempting to + * make a loop-back queue pair */ + rc = openib_btl_size_queues(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return rc; + } } - for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) { + /* prepare all proc's and account them properly */ + for (i = 0, nprocs_new_loc = 0 ; i < (int) nprocs; i++) { struct opal_proc_t* proc = procs[i]; mca_btl_openib_proc_t* ib_proc; - bool found_existing = false; - int remote_matching_port; - - opal_output(-1, "add procs: adding proc %d", i); - - if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { - local_procs ++; - } #if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE) /* Most current iWARP adapters (June 2008) cannot handle @@ -874,210 +1055,199 @@ int mca_btl_openib_add_procs( } #endif - if(NULL == (ib_proc = mca_btl_openib_proc_create(proc))) { + if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc)) ) { /* if we don't have connection info for this process, it's * okay because some other method might be able to reach it, * so just mark it as unreachable by us */ continue; } - OPAL_THREAD_LOCK(&ib_proc->proc_lock); - for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) { - endpoint = ib_proc->proc_endpoints[j]; - if (endpoint->endpoint_btl == openib_btl) { - found_existing = true; - break; - } - } - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); + /* account this openib_btl in this proc */ + rc = mca_btl_openib_proc_reg_btl(ib_proc, openib_btl); - if (found_existing) { - if (reachable) { - opal_bitmap_set_bit(reachable, i); - } - peers[i] = endpoint; - continue; - } + opal_mutex_unlock( &ib_proc->proc_lock ); - /* check if the remote proc has any ports that: - - on the same subnet as the local proc, and - - on that subnet, has a CPC in common with the local proc - */ - remote_matching_port = -1; - rem_subnet_id_port_cnt = 0; - BTL_VERBOSE(("got %d port_infos ", ib_proc->proc_port_count)); - for (j = 0; j < (int) ib_proc->proc_port_count; j++){ - BTL_VERBOSE(("got a subnet %016" PRIx64, - ib_proc->proc_ports[j].pm_port_info.subnet_id)); - if (ib_proc->proc_ports[j].pm_port_info.subnet_id == - openib_btl->port_info.subnet_id) { - BTL_VERBOSE(("Got a matching subnet!")); - if (rem_subnet_id_port_cnt == btl_rank) { - remote_matching_port = j; - } - rem_subnet_id_port_cnt++; + switch( rc ){ + case OPAL_SUCCESS: + /* this is a new process to this openib btl */ + nprocs_new++; + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { + nprocs_new_loc ++; } + break; + case OPAL_ERR_RESOURCE_BUSY: + /* process was accounted earlier in this openib btl */ + break; + default: + /* unexpected error, e.g. out of mem */ + return rc; } + } - if (0 == rem_subnet_id_port_cnt) { - /* no use trying to communicate with this endpoint */ - BTL_VERBOSE(("No matching subnet id/CPC was found, moving on.. ")); - continue; - } + if (nprocs_new) { + OPAL_THREAD_ADD32(&openib_btl->num_peers, nprocs_new); - /* If this process has multiple ports on a single subnet ID, - and the report proc also has multiple ports on this same - subnet ID, the default connection pattern is: - - LOCAL REMOTE PEER - 1st port on subnet X <--> 1st port on subnet X - 2nd port on subnet X <--> 2nd port on subnet X - 3nd port on subnet X <--> 3nd port on subnet X - ...etc. - - Note that the port numbers may not be contiguous, and they - may not be the same on either side. Hence the "1st", "2nd", - "3rd, etc. notation, above. - - Hence, if the local "rank" of this module's port on the - subnet ID is greater than the total number of ports on the - peer on this same subnet, then we have no match. So skip - this connection. */ - if (rem_subnet_id_port_cnt < lcl_subnet_id_port_cnt && - btl_rank >= rem_subnet_id_port_cnt) { - BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. ")); - continue; + /* adjust cq sizes given the new procs */ + rc = openib_btl_size_queues (openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return rc; } + } - /* Now that we have verified that we're on the same subnet and - the remote peer has enough ports, see if that specific port - on the peer has a matching CPC. */ - assert(btl_rank <= ib_proc->proc_port_count); - assert(remote_matching_port != -1); - if (OPAL_SUCCESS != - opal_btl_openib_connect_base_find_match(openib_btl, - &(ib_proc->proc_ports[remote_matching_port]), - &local_cpc, - &remote_cpc_data)) { - continue; - } + rc = openib_btl_prepare (openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib btl module for use")); + return rc; + } - OPAL_THREAD_LOCK(&ib_proc->proc_lock); + opal_mutex_lock(&openib_btl->device->device_lock); + openib_btl->local_procs += nprocs_new_loc; + if( 0 < nprocs_new_loc ){ + openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs; + } + opal_mutex_unlock(&openib_btl->device->device_lock); - /* The btl_proc datastructure is shared by all IB BTL - * instances that are trying to reach this destination. - * Cache the peer instance on the btl_proc. - */ - endpoint = OBJ_NEW(mca_btl_openib_endpoint_t); - assert(((opal_object_t*)endpoint)->obj_reference_count == 1); - if(NULL == endpoint) { - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } + /* prepare endpoints */ + for (i = 0, nprocs_new_loc = 0 ; i < (int) nprocs; i++) { + struct opal_proc_t* proc = procs[i]; + mca_btl_openib_proc_t* ib_proc; + bool found_existing = false; -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED) { - int rem_port_cnt = 0; - for(j = 0; j < (int) ib_proc->proc_port_count; j++) { - if(ib_proc->proc_ports[j].pm_port_info.subnet_id == - openib_btl->port_info.subnet_id) { - if (rem_port_cnt == btl_rank) - break; - else - rem_port_cnt ++; - } - } + opal_output(-1, "add procs: adding proc %d", i); - assert(rem_port_cnt == btl_rank); - /* Push the subnet/lid/jobid to xrc hash */ - rc = mca_btl_openib_ib_address_add_new( - ib_proc->proc_ports[j].pm_port_info.lid, - ib_proc->proc_ports[j].pm_port_info.subnet_id, - proc->proc_name.jobid, endpoint); - if (OPAL_SUCCESS != rc ) { - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return OPAL_ERROR; - } +#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE) + /* Most current iWARP adapters (June 2008) cannot handle + talking to other processes on the same host (!) -- so mark + them as unreachable (need to use sm). So for the moment, + we'll just mark any local peer on an iWARP NIC as + unreachable. See trac ticket #1352. */ + if (IBV_TRANSPORT_IWARP == openib_btl->device->ib_dev->transport_type && + OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { + continue; } #endif - mca_btl_openib_endpoint_init(openib_btl, endpoint, - local_cpc, - &(ib_proc->proc_ports[remote_matching_port]), - remote_cpc_data); - rc = mca_btl_openib_proc_insert(ib_proc, endpoint); - if (OPAL_SUCCESS != rc) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); + if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc)) ) { + /* if we don't have connection info for this process, it's + * okay because some other method might be able to reach it, + * so just mark it as unreachable by us */ continue; } - if(OPAL_SUCCESS != mca_btl_openib_tune_endpoint(openib_btl, endpoint)) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return OPAL_ERROR; - } + found_existing = false; - endpoint->index = opal_pointer_array_add(openib_btl->device->endpoints, (void*)endpoint); - if( 0 > endpoint->index ) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - continue; + for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) { + endpoint = ib_proc->proc_endpoints[j]; + if (endpoint->endpoint_btl == openib_btl) { + found_existing = true; + break; + } } - /* Tell the selected CPC that it won. NOTE: This call is - outside of / separate from mca_btl_openib_endpoint_init() - because this function likely needs the endpoint->index. */ - if (NULL != local_cpc->cbm_endpoint_init) { - rc = local_cpc->cbm_endpoint_init(endpoint); - if (OPAL_SUCCESS != rc) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - continue; + if( !found_existing ) { + rc = init_ib_proc_nolock(openib_btl, ib_proc, &endpoint, + lcl_subnet_id_port_cnt, btl_rank); + if( OPAL_SUCCESS == rc ){ + found_existing = true; } } + opal_mutex_unlock( &ib_proc->proc_lock ); - opal_bitmap_set_bit(reachable, i); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); + if (found_existing) { + if (reachable) { + opal_bitmap_set_bit(reachable, i); + } + peers[i] = (mca_btl_base_endpoint_t*)endpoint; + } - peers[i] = endpoint; } - openib_btl->local_procs += local_procs; - openib_btl->device->mem_reg_max /= openib_btl->local_procs; - return OPAL_SUCCESS; } struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl, struct opal_proc_t *proc) { mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl; - mca_btl_base_endpoint_t *endpoint; + volatile mca_btl_base_endpoint_t *endpoint = NULL; + int local_port_cnt = 0, btl_rank, rc; mca_btl_openib_proc_t *ib_proc; - if (NULL == (ib_proc = mca_btl_openib_proc_create(proc))) { + rc = prepare_device_for_use (openib_btl->device); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib device for use")); + return NULL; + } + + if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc))) { /* if we don't have connection info for this process, it's * okay because some other method might be able to reach it, * so just mark it as unreachable by us */ return NULL; } - OPAL_THREAD_LOCK(&ib_proc->proc_lock); + rc = mca_btl_openib_proc_reg_btl(ib_proc, openib_btl); + + switch( rc ){ + case OPAL_SUCCESS: + /* unlock first to avoid possible deadlocks */ + opal_mutex_unlock(&ib_proc->proc_lock); + + /* this is a new process to this openib btl + * account this procs if need */ + OPAL_THREAD_ADD32(&openib_btl->num_peers, 1); + rc = openib_btl_size_queues(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return NULL; + } + + if( OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags) ) { + opal_mutex_lock(&openib_btl->ib_lock); + openib_btl->local_procs += 1; + openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs; + opal_mutex_unlock(&openib_btl->ib_lock); + } + + /* lock process back */ + opal_mutex_lock(&ib_proc->proc_lock); + break; + case OPAL_ERR_RESOURCE_BUSY: + /* process was accounted earlier in this openib btl */ + break; + default: + /* unexpected error, e.g. out of mem */ + BTL_ERROR(("Unexpected OPAL error %d", rc)); + return NULL; + } + + rc = openib_btl_prepare(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib btl structure for use")); + goto exit; + } + for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) { endpoint = ib_proc->proc_endpoints[j]; if (endpoint->endpoint_btl == openib_btl) { - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return endpoint; + goto exit; } } - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - - BTL_VERBOSE(("creating new endpoint for remote process {.jobid = 0x%x, .vpid = 0x%x}", - proc->proc_name.jobid, proc->proc_name.vpid)); endpoint = NULL; - (void) mca_btl_openib_add_procs (btl, 1, &proc, &endpoint, NULL); - return endpoint; + + btl_rank = get_openib_btl_params(openib_btl, &local_port_cnt); + if( 0 > btl_rank ){ + goto exit; + } + + (void)init_ib_proc_nolock(openib_btl, ib_proc, &endpoint, + local_port_cnt, btl_rank); + +exit: + opal_mutex_unlock(&ib_proc->proc_lock); + + return (struct mca_btl_base_endpoint_t *)endpoint; } /* diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index a9c13e3d130..f5772f63606 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. @@ -300,11 +300,6 @@ struct mca_btl_openib_component_t { #if BTL_OPENIB_FAILOVER_ENABLED int verbose_failover; #endif -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - int use_memalign; - size_t memalign_threshold; - void* (*previous_malloc_hook)(size_t __size, const void*); -#endif #if OPAL_CUDA_SUPPORT bool cuda_async_send; bool cuda_async_recv; @@ -371,6 +366,9 @@ typedef struct mca_btl_openib_device_t { #endif opal_mutex_t device_lock; /* device level lock */ struct ibv_context *ib_dev_context; +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + struct ibv_exp_device_attr ib_exp_dev_attr; +#endif struct ibv_device_attr ib_dev_attr; struct ibv_pd *ib_pd; struct ibv_cq *ib_cq[2]; @@ -406,7 +404,7 @@ typedef struct mca_btl_openib_device_t { /* Maximum value supported by this device for max_inline_data */ uint32_t max_inline_data; /* Registration limit and current count */ - uint64_t mem_reg_max, mem_reg_active; + uint64_t mem_reg_max, mem_reg_max_total, mem_reg_active; /* Device is ready for use */ bool ready_for_use; /* Async event */ @@ -460,6 +458,7 @@ struct mca_btl_openib_module_t { mca_btl_base_module_t super; bool btl_inited; + bool srqs_created; /** Common information about all ports */ mca_btl_openib_modex_message_t port_info; @@ -490,6 +489,8 @@ struct mca_btl_openib_module_t { mca_btl_openib_module_qp_t * qps; int local_procs; /** number of local procs */ + + bool atomic_ops_be; /** atomic result is big endian */ }; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t; diff --git a/opal/mca/btl/openib/btl_openib_atomic.c b/opal/mca/btl/openib/btl_openib_atomic.c index 6e6698877d6..0c6460f2cf3 100644 --- a/opal/mca/btl/openib/btl_openib_atomic.c +++ b/opal/mca/btl/openib/btl_openib_atomic.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -27,6 +27,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st { mca_btl_openib_get_frag_t* frag = NULL; int qp = order; + int32_t rkey; int rc; frag = to_get_frag(alloc_recv_user_frag()); @@ -61,26 +62,18 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st frag->sr_desc.wr.atomic.compare_add = operand; frag->sr_desc.wr.atomic.swap = operand2; + rkey = remote_handle->rkey; + #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if((endpoint->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN) != (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - frag->sr_desc.wr.atomic.rkey = opal_swap_bytes4 (remote_handle->rkey); - } else -#endif - { - frag->sr_desc.wr.atomic.rkey = remote_handle->rkey; + rkey = opal_swap_bytes4 (rkey); } - -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - frag->sr_desc.qp_type.xrc.remote_srqn = endpoint->rem_info.rem_srqs[qp].rem_srq_num; -#else - frag->sr_desc.xrc_remote_srq_num = endpoint->rem_info.rem_srqs[qp].rem_srq_num; #endif - } -#endif + frag->sr_desc.wr.atomic.rkey = rkey; + + /* NTH: the SRQ# is set in mca_btl_get_internal */ if (endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&endpoint->endpoint_lock); diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 07dcf6e2f1d..336fffef43b 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. @@ -42,7 +42,7 @@ #include #include #include -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED /* * The include of malloc.h below breaks abstractions in OMPI (by * directly including a header file from another component), but has @@ -55,7 +55,7 @@ * Internally, OMPI uses the built-in ptmalloc from the linux memory * component anyway. */ -#include "opal/mca/memory/linux/malloc.h" +#include "opal/mca/memory/linux/memory_linux.h" #endif #include "opal/mca/event/event.h" @@ -123,7 +123,6 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, * Local variables */ static mca_btl_openib_device_t *receive_queues_device = NULL; -static bool malloc_hook_set = false; static int num_devices_intentionally_ignored = 0; mca_btl_openib_component_t mca_btl_openib_component = { @@ -147,30 +146,6 @@ mca_btl_openib_component_t mca_btl_openib_component = { } }; -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED -/* This is a memory allocator hook. The purpose of this is to make - * every malloc aligned since this speeds up IB HCA work. - * There two basic cases here: - * - * 1. Memory manager for Open MPI is enabled. Then memalign below will - * be overridden by __memalign_hook which is set to - * opal_memory_linux_memalign_hook. Thus, _malloc_hook is going to - * use opal_memory_linux_memalign_hook. - * - * 2. No memory manager support. The memalign below is just regular glibc - * memalign which will be called through __malloc_hook instead of malloc. - */ -static void *btl_openib_malloc_hook(size_t sz, const void* caller) -{ - if (sz < mca_btl_openib_component.memalign_threshold && - malloc_hook_set) { - return mca_btl_openib_component.previous_malloc_hook(sz, caller); - } else { - return memalign(mca_btl_openib_component.use_memalign, sz); - } -} -#endif - static int btl_openib_component_register(void) { int ret; @@ -257,16 +232,6 @@ static int btl_openib_component_close(void) free(mca_btl_openib_component.default_recv_qps); } -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - /* Must check to see whether the malloc hook was set before - assigning it back because ompi_info will call _register() and - then _close() (which won't set the hook) */ - if (malloc_hook_set) { - __malloc_hook = mca_btl_openib_component.previous_malloc_hook; - malloc_hook_set = false; - } -#endif - /* close memory registration debugging output */ opal_output_close (mca_btl_openib_component.memory_registration_verbose); @@ -573,7 +538,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size, } if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_WRITE) { - access_flag |= IBV_ACCESS_REMOTE_WRITE; + access_flag |= IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE; } if (reg->access_flags & MCA_MPOOL_ACCESS_LOCAL_WRITE) { @@ -582,7 +547,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size, #if HAVE_DECL_IBV_ATOMIC_HCA if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_ATOMIC) { - access_flag |= IBV_ACCESS_REMOTE_ATOMIC; + access_flag |= IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_LOCAL_WRITE; } #endif @@ -822,13 +787,41 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, openib_btl->super.btl_get_local_registration_threshold = 0; #if HAVE_DECL_IBV_ATOMIC_HCA - if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) { + openib_btl->atomic_ops_be = false; + +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + /* check that 8-byte atomics are supported */ + if (!(device->ib_exp_dev_attr.ext_atom.log_atomic_arg_sizes & (1<<3ull))) { openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS; openib_btl->super.btl_atomic_flags = 0; openib_btl->super.btl_atomic_fop = NULL; openib_btl->super.btl_atomic_cswap = NULL; - } else if (IBV_ATOMIC_GLOB == openib_btl->device->ib_dev_attr.atomic_cap) { + } +#endif + +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + switch (openib_btl->device->ib_exp_dev_attr.exp_atomic_cap) +#else + switch (openib_btl->device->ib_dev_attr.atomic_cap) +#endif + { + case IBV_ATOMIC_GLOB: openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB; + break; +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + case IBV_EXP_ATOMIC_HCA_REPLY_BE: + openib_btl->atomic_ops_be = true; + break; +#endif + case IBV_ATOMIC_HCA: + break; + case IBV_ATOMIC_NONE: + default: + /* no atomics or an unsupported atomic type */ + openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS; + openib_btl->super.btl_atomic_flags = 0; + openib_btl->super.btl_atomic_fop = NULL; + openib_btl->super.btl_atomic_cswap = NULL; } #endif @@ -1607,7 +1600,8 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) } device->mem_reg_active = 0; - device->mem_reg_max = calculate_max_reg(ibv_get_device_name(ib_dev)); + device->mem_reg_max_total = calculate_max_reg(ibv_get_device_name(ib_dev)); + device->mem_reg_max = device->mem_reg_max_total; if(( 0 == device->mem_reg_max) && mca_btl_openib_component.abort_not_enough_reg_mem) { return OPAL_ERROR; } @@ -1626,7 +1620,14 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) ibv_get_device_name(device->ib_dev), strerror(errno))); goto error; } - +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + device->ib_exp_dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1; + if(ibv_exp_query_device(device->ib_dev_context, &device->ib_exp_dev_attr)){ + BTL_ERROR(("error obtaining device attributes for %s errno says %s", + ibv_get_device_name(device->ib_dev), strerror(errno))); + goto error; + } +#endif if(ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)){ BTL_ERROR(("error obtaining device attributes for %s errno says %s", ibv_get_device_name(device->ib_dev), strerror(errno))); @@ -2511,19 +2512,14 @@ btl_openib_component_init(int *num_btl_modules, *num_btl_modules = 0; num_devs = 0; -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED /* If we got this far, then setup the memory alloc hook (because we're most likely going to be using this component). The hook is to be set up as early as possible in this function since we - want most of the allocated resources be aligned.*/ - if (mca_btl_openib_component.use_memalign > 0 && - (opal_mem_hooks_support_level() & - (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT)) != 0) { - mca_btl_openib_component.previous_malloc_hook = __malloc_hook; - __malloc_hook = btl_openib_malloc_hook; - malloc_hook_set = true; - } -#endif + want most of the allocated resources be aligned. + */ + opal_memory_linux_malloc_set_alignment(32, mca_btl_openib_module.super.btl_eager_limit); +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ /* Per https://svn.open-mpi.org/trac/ompi/ticket/1305, check to see if $sysfsdir/class/infiniband exists. If it does not, @@ -2924,13 +2920,6 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_component.ib_num_btls = 0; btl_openib_modex_send(); -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - /*Unset malloc hook since the component won't start*/ - if (malloc_hook_set) { - __malloc_hook = mca_btl_openib_component.previous_malloc_hook; - malloc_hook_set = false; - } -#endif if (NULL != btls) { free(btls); } @@ -3446,6 +3435,11 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, mca_btl_openib_get_frag_t *get_frag = to_get_frag(des); + /* check if atomic result needs to be byte swapped (mlx5) */ + if (openib_btl->atomic_ops_be && IBV_WC_RDMA_READ != wc->opcode) { + *((int64_t *) frag->sg_entry.addr) = ntoh64 (*((int64_t *) frag->sg_entry.addr)); + } + get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr, get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data, OPAL_SUCCESS); diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index a4f84e08927..0186f8d5e26 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved. @@ -579,7 +579,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) opal_output(-1, "Now we are CONNECTED"); if (MCA_BTL_XRC_ENABLED) { - OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock); + opal_mutex_lock (&endpoint->ib_addr->addr_lock); if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) { /* We are not xrc master */ /* set our qp pointer to master qp */ @@ -622,7 +622,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) } } } - OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock); + opal_mutex_unlock (&endpoint->ib_addr->addr_lock); } diff --git a/opal/mca/btl/openib/btl_openib_get.c b/opal/mca/btl/openib/btl_openib_get.c index 2d335619c19..c8bc78105db 100644 --- a/opal/mca/btl/openib/btl_openib_get.c +++ b/opal/mca/btl/openib/btl_openib_get.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -92,16 +92,6 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint frag->sr_desc.wr.rdma.rkey = remote_handle->rkey; } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; -#else - frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; -#endif - } -#endif - if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&ep->endpoint_lock); rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags); @@ -138,6 +128,19 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base int qp = to_base_frag(frag)->base.order; struct ibv_send_wr *bad_wr; +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { + /* NTH: the remote SRQ number is only available once the endpoint is connected. By + * setting the value here instead of mca_btl_openib_get we guarantee the rem_srqs + * array is initialized. */ +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; +#else + frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; +#endif + } +#endif + /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index d3664435496..07dcdd07c76 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -663,7 +663,7 @@ int btl_openib_register_mca_params(void) } asprintf(&default_qps, - "P,128,256,192,128:S,%u,1024,1008,64:S,%u,1024,1008,64:S,%u,1024,1008,64", + "S,128,256,192,128:S,%u,1024,1008,64:S,%u,1024,1008,64:S,%u,1024,1008,64", mid_qp_size, (uint32_t)mca_btl_openib_module.super.btl_eager_limit, (uint32_t)mca_btl_openib_module.super.btl_max_send_size); @@ -703,26 +703,19 @@ int btl_openib_register_mca_params(void) 0, &mca_btl_openib_component.gid_index, REGINT_GE_ZERO)); -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - CHECK(reg_int("memalign", NULL, - "[64 | 32 | 0] - Enable (64bit or 32bit)/Disable(0) memory" - "alignment for all malloc calls if btl openib is used.", - 32, &mca_btl_openib_component.use_memalign, - REGINT_GE_ZERO)); +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED + tmp = mca_base_var_find ("opal", "memory", "linux", "memalign"); + if (0 <= tmp) { + (void) mca_base_var_register_synonym(tmp, "opal", "btl", "openib", "memalign", + MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + } - mca_btl_openib_component.memalign_threshold = - mca_btl_openib_module.super.btl_eager_limit; - tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, - "memalign_threshold", - "Allocating memory more than btl_openib_memalign_threshhold" - "bytes will automatically be algined to the value of btl_openib_memalign bytes." - "memalign_threshhold defaults to the same value as mca_btl_openib_eager_limit.", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_openib_component.memalign_threshold); - if (0 > tmp) ret = tmp; -#endif + tmp = mca_base_var_find ("opal", "memory", "linux", "memalign_threshold"); + if (0 <= tmp) { + (void) mca_base_var_register_synonym(tmp, "opal", "btl", "openib", "memalign_threshold", + MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + } +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ /* Register any MCA params for the connect pseudo-components */ if (OPAL_SUCCESS == ret) { @@ -823,16 +816,5 @@ int btl_openib_verify_mca_params (void) } #endif -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - if (mca_btl_openib_component.use_memalign != 32 - && mca_btl_openib_component.use_memalign != 64 - && mca_btl_openib_component.use_memalign != 0){ - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "Wrong btl_openib_memalign parameter value. Allowed values: 64, 32, 0.", - "btl_openib_memalign is reset to 32"); - mca_btl_openib_component.use_memalign = 32; - } -#endif - return OPAL_SUCCESS; } diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index 27719bcdef3..f994f4aef08 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -12,6 +12,10 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +33,23 @@ #include "connect/base.h" #include "connect/connect.h" +static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem); +static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem); + +OBJ_CLASS_INSTANCE(mca_btl_openib_proc_btlptr_t, + opal_list_item_t, mca_btl_openib_proc_btl_construct, + mca_btl_openib_proc_btl_destruct); + +static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem) +{ + elem->openib_btl = NULL; +} + +static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem) +{ + elem->openib_btl = NULL; +} + static void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc); static void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc); @@ -44,10 +65,7 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc) ib_proc->proc_endpoints = 0; ib_proc->proc_endpoint_count = 0; OBJ_CONSTRUCT(&ib_proc->proc_lock, opal_mutex_t); - /* add to list of all proc instance */ - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); - opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super); - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); + OBJ_CONSTRUCT(&ib_proc->openib_btls, opal_list_t); } /* @@ -56,10 +74,7 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc) void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) { - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); - opal_list_remove_item(&mca_btl_openib_component.ib_procs, &ib_proc->super); - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); + mca_btl_openib_proc_btlptr_t* elem; /* release resources */ if(NULL != ib_proc->proc_endpoints) { @@ -77,6 +92,13 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) free(ib_proc->proc_ports); } OBJ_DESTRUCT(&ib_proc->proc_lock); + + elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls); + while( NULL != elem ){ + OBJ_RELEASE(elem); + elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls); + } + OBJ_DESTRUCT(&ib_proc->openib_btls); } @@ -84,26 +106,38 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) * Look for an existing IB process instances based on the associated * opal_proc_t instance. */ -static mca_btl_openib_proc_t* mca_btl_openib_proc_lookup_proc(opal_proc_t* proc) +static mca_btl_openib_proc_t* ibproc_lookup_no_lock(opal_proc_t* proc) { mca_btl_openib_proc_t* ib_proc; - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); - for(ib_proc = (mca_btl_openib_proc_t*) opal_list_get_first(&mca_btl_openib_component.ib_procs); ib_proc != (mca_btl_openib_proc_t*) opal_list_get_end(&mca_btl_openib_component.ib_procs); ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { if(ib_proc->proc_opal == proc) { - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); return ib_proc; } } - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); return NULL; } +static mca_btl_openib_proc_t* ibproc_lookup_and_lock(opal_proc_t* proc) +{ + mca_btl_openib_proc_t* ib_proc; + + /* get the process from the list */ + opal_mutex_lock(&mca_btl_openib_component.ib_lock); + ib_proc = ibproc_lookup_no_lock(proc); + opal_mutex_unlock(&mca_btl_openib_component.ib_lock); + if( NULL != ib_proc ){ + /* if we were able to find it - lock it. + * NOTE: we want to lock it outside of list locked region */ + opal_mutex_lock(&ib_proc->proc_lock); + } + return ib_proc; +} + static void inline unpack8(char **src, uint8_t *value) { /* Copy one character */ @@ -120,9 +154,9 @@ static void inline unpack8(char **src, uint8_t *value) * associated w/ a given destination on this datastructure. */ -mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) +mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc) { - mca_btl_openib_proc_t* module_proc = NULL; + mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL; size_t msg_size; uint32_t size; int rc, i, j; @@ -130,21 +164,30 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) char *offset; int modex_message_size; mca_btl_openib_modex_message_t dummy; + bool is_new = false; /* Check if we have already created a IB proc * structure for this ompi process */ - module_proc = mca_btl_openib_proc_lookup_proc(proc); - if (NULL != module_proc) { + ib_proc = ibproc_lookup_and_lock(proc); + if (NULL != ib_proc) { /* Gotcha! */ - return module_proc; + return ib_proc; } - /* Oops! First time, gotta create a new IB proc + /* All initialization has to be an atomic operation. we do the following assumption: + * - we let all concurent threads to try to do the initialization; + * - when one has finished it locks ib_lock and checks if corresponding + * process is still missing; + * - if so - new proc is added, otherwise - initialized proc struct is released. + */ + + /* First time, gotta create a new IB proc * out of the opal_proc ... */ - module_proc = OBJ_NEW(mca_btl_openib_proc_t); + ib_proc = OBJ_NEW(mca_btl_openib_proc_t); + /* Initialize number of peer */ - module_proc->proc_endpoint_count = 0; - module_proc->proc_opal = proc; + ib_proc->proc_endpoint_count = 0; + ib_proc->proc_opal = proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, @@ -153,11 +196,10 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, OPAL_NAME_PRINT(proc->proc_name))); - OBJ_RELEASE(module_proc); - return NULL; + goto err_exit; } if (0 == msg_size) { - return NULL; + goto err_exit; } /* Message was packed in btl_openib_component.c; the format is @@ -166,46 +208,46 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) /* Unpack the number of modules in the message */ offset = (char *) message; - unpack8(&offset, &(module_proc->proc_port_count)); - BTL_VERBOSE(("unpack: %d btls", module_proc->proc_port_count)); - if (module_proc->proc_port_count > 0) { - module_proc->proc_ports = (mca_btl_openib_proc_modex_t *) + unpack8(&offset, &(ib_proc->proc_port_count)); + BTL_VERBOSE(("unpack: %d btls", ib_proc->proc_port_count)); + if (ib_proc->proc_port_count > 0) { + ib_proc->proc_ports = (mca_btl_openib_proc_modex_t *) malloc(sizeof(mca_btl_openib_proc_modex_t) * - module_proc->proc_port_count); + ib_proc->proc_port_count); } else { - module_proc->proc_ports = NULL; + ib_proc->proc_ports = NULL; } /* Loop over unpacking all the ports */ - for (i = 0; i < module_proc->proc_port_count; i++) { + for (i = 0; i < ib_proc->proc_port_count; i++) { /* Unpack the modex comment message struct */ size = modex_message_size; - memcpy(&(module_proc->proc_ports[i].pm_port_info), offset, size); + memcpy(&(ib_proc->proc_ports[i].pm_port_info), offset, size); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT - MCA_BTL_OPENIB_MODEX_MSG_NTOH(module_proc->proc_ports[i].pm_port_info); + MCA_BTL_OPENIB_MODEX_MSG_NTOH(ib_proc->proc_ports[i].pm_port_info); #endif offset += size; BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d", i, (int)(offset-((char*)message)))); /* Unpack the number of CPCs that follow */ - unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count)); + unpack8(&offset, &(ib_proc->proc_ports[i].pm_cpc_data_count)); BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)", - i, module_proc->proc_ports[i].pm_cpc_data_count, + i, ib_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)))); - module_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) - calloc(module_proc->proc_ports[i].pm_cpc_data_count, + ib_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) + calloc(ib_proc->proc_ports[i].pm_cpc_data_count, sizeof(opal_btl_openib_connect_base_module_data_t)); - if (NULL == module_proc->proc_ports[i].pm_cpc_data) { - return NULL; + if (NULL == ib_proc->proc_ports[i].pm_cpc_data) { + goto err_exit; } /* Unpack the CPCs */ - for (j = 0; j < module_proc->proc_ports[i].pm_cpc_data_count; ++j) { + for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) { uint8_t u8; opal_btl_openib_connect_base_module_data_t *cpcd; - cpcd = module_proc->proc_ports[i].pm_cpc_data + j; + cpcd = ib_proc->proc_ports[i].pm_cpc_data + j; unpack8(&offset, &u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)", i, j, u8, (int)(offset-(char*)message))); @@ -224,7 +266,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len); if (NULL == cpcd->cbm_modex_message) { BTL_ERROR(("Failed to malloc")); - return NULL; + goto err_exit; } memcpy(cpcd->cbm_modex_message, offset, cpcd->cbm_modex_message_len); @@ -238,20 +280,52 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) } } - if (0 == module_proc->proc_port_count) { - module_proc->proc_endpoints = NULL; + if (0 == ib_proc->proc_port_count) { + ib_proc->proc_endpoints = NULL; } else { - module_proc->proc_endpoints = (mca_btl_base_endpoint_t**) - malloc(module_proc->proc_port_count * + ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**) + malloc(ib_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*)); } - if (NULL == module_proc->proc_endpoints) { - OBJ_RELEASE(module_proc); - return NULL; + if (NULL == ib_proc->proc_endpoints) { + goto err_exit; } BTL_VERBOSE(("unpacking done!")); - return module_proc; + + /* Finally add this process to the initialized procs list */ + opal_mutex_lock(&mca_btl_openib_component.ib_lock); + + ib_proc_ret = ibproc_lookup_no_lock(proc); + if (NULL == ib_proc_ret) { + /* if process can't be found in this list - insert it locked + * it is safe to lock ib_proc here because this thread is + * the only one who knows about it so far */ + opal_mutex_lock(&ib_proc->proc_lock); + opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super); + ib_proc_ret = ib_proc; + is_new = true; + } else { + /* otherwise - release module_proc */ + OBJ_RELEASE(ib_proc); + } + opal_mutex_unlock(&mca_btl_openib_component.ib_lock); + + /* if we haven't insert the process - lock it here so we + * won't lock mca_btl_openib_component.ib_lock */ + if( !is_new ){ + opal_mutex_lock(&ib_proc_ret->proc_lock); + } + + return ib_proc_ret; + +err_exit: + + fprintf(stderr,"%d: error exit from mca_btl_openib_proc_create\n", OPAL_PROC_MY_NAME.vpid); + if( NULL != ib_proc ){ + OBJ_RELEASE(ib_proc); + } + return NULL; } int mca_btl_openib_proc_remove(opal_proc_t *proc, @@ -262,7 +336,7 @@ int mca_btl_openib_proc_remove(opal_proc_t *proc, /* Remove endpoint from the openib BTL version of the proc as well */ - ib_proc = mca_btl_openib_proc_lookup_proc(proc); + ib_proc = ibproc_lookup_and_lock(proc); if (NULL != ib_proc) { for (i = 0; i < ib_proc->proc_endpoint_count; ++i) { if (ib_proc->proc_endpoints[i] == endpoint) { @@ -270,6 +344,7 @@ int mca_btl_openib_proc_remove(opal_proc_t *proc, if (i == ib_proc->proc_endpoint_count - 1) { --ib_proc->proc_endpoint_count; } + opal_mutex_unlock(&ib_proc->proc_lock); return OPAL_SUCCESS; } } @@ -310,3 +385,27 @@ int mca_btl_openib_proc_insert(mca_btl_openib_proc_t* module_proc, module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint; return OPAL_SUCCESS; } + +int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc, + mca_btl_openib_module_t* openib_btl) +{ + mca_btl_openib_proc_btlptr_t* elem; + + + for(elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_first(&ib_proc->openib_btls); + elem != (mca_btl_openib_proc_btlptr_t*)opal_list_get_end(&ib_proc->openib_btls); + elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_next(elem)) { + if(elem->openib_btl == openib_btl) { + /* this is normal return meaning that this BTL has already touched this ib_proc */ + return OPAL_ERR_RESOURCE_BUSY; + } + } + + elem = OBJ_NEW(mca_btl_openib_proc_btlptr_t); + if( NULL == elem ){ + return OPAL_ERR_OUT_OF_RESOURCE; + } + elem->openib_btl = openib_btl; + opal_list_append(&ib_proc->openib_btls, &elem->super); + return OPAL_SUCCESS; +} diff --git a/opal/mca/btl/openib/btl_openib_proc.h b/opal/mca/btl/openib/btl_openib_proc.h index 0ca3c1358da..576018e5aac 100644 --- a/opal/mca/btl/openib/btl_openib_proc.h +++ b/opal/mca/btl/openib/btl_openib_proc.h @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,6 +54,19 @@ typedef struct mca_btl_openib_proc_modex_t { uint8_t pm_cpc_data_count; } mca_btl_openib_proc_modex_t; +/** + * The list element to hold pointers to openin_btls that are using this + * ib_proc. + */ + +struct mca_btl_openib_proc_btlptr_t { + opal_list_item_t super; + mca_btl_openib_module_t* openib_btl; +}; +typedef struct mca_btl_openib_proc_btlptr_t mca_btl_openib_proc_btlptr_t; + +OBJ_CLASS_DECLARATION(mca_btl_openib_proc_btlptr_t); + /** * Represents the state of a remote process and the set of addresses * that it exports. Also cache an instance of mca_btl_base_endpoint_t for @@ -71,11 +86,14 @@ struct mca_btl_openib_proc_t { /** length of proc_ports array */ uint8_t proc_port_count; + /** list of openib_btl's that touched this proc **/ + opal_list_t openib_btls; + /** array of endpoints that have been created to access this proc */ - struct mca_btl_base_endpoint_t **proc_endpoints; + volatile struct mca_btl_base_endpoint_t **proc_endpoints; /** number of endpoints (length of proc_endpoints array) */ - size_t proc_endpoint_count; + volatile size_t proc_endpoint_count; /** lock to protect against concurrent access to proc state */ opal_mutex_t proc_lock; @@ -84,10 +102,13 @@ typedef struct mca_btl_openib_proc_t mca_btl_openib_proc_t; OBJ_CLASS_DECLARATION(mca_btl_openib_proc_t); -mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc); +mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc); int mca_btl_openib_proc_insert(mca_btl_openib_proc_t*, mca_btl_base_endpoint_t*); int mca_btl_openib_proc_remove(opal_proc_t* proc, mca_btl_base_endpoint_t* module_endpoint); +int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc, + mca_btl_openib_module_t* openib_btl); + END_C_DECLS diff --git a/opal/mca/btl/openib/btl_openib_put.c b/opal/mca/btl/openib/btl_openib_put.c index 25b5d3f5322..2a9ee2ddd67 100644 --- a/opal/mca/btl/openib/btl_openib_put.c +++ b/opal/mca/btl/openib/btl_openib_put.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -49,7 +49,7 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint qp = mca_btl_openib_component.rdma_qp; } - if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle || + if (OPAL_UNLIKELY((btl->btl_put_local_registration_threshold < size && !local_handle) || !remote_handle || size > btl->btl_put_limit)) { return OPAL_ERR_BAD_PARAM; } @@ -101,19 +101,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint to_out_frag(frag)->sr_desc.wr.rdma.rkey = remote_handle->rkey; } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { - -#if OPAL_HAVE_CONNECTX_XRC - to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; -#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS - to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; -#else -#error "that should never happen" -#endif - } -#endif - if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&ep->endpoint_lock); rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_put_frags); @@ -153,6 +140,21 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base struct ibv_send_wr *bad_wr; int rc; +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { + /* NTH: the remote SRQ number is only available once the endpoint is connected. By + * setting the value here instead of mca_btl_openib_put we guarantee the rem_srqs + * array is initialized. */ +#if OPAL_HAVE_CONNECTX_XRC + to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; +#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS + to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; +#else +#error "that should never happen" +#endif + } +#endif + /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); @@ -164,7 +166,7 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base if (0 != (rc = ibv_post_send(ep->qps[qp].qp->lcl_qp, &to_out_frag(frag)->sr_desc, &bad_wr))) { qp_put_wqe(ep, qp); - return OPAL_ERROR;; + return OPAL_ERROR; } return OPAL_SUCCESS; diff --git a/opal/mca/btl/openib/btl_openib_xrc.c b/opal/mca/btl/openib/btl_openib_xrc.c index 3fc0e32c29f..1952c31b12f 100644 --- a/opal/mca/btl/openib/btl_openib_xrc.c +++ b/opal/mca/btl/openib/btl_openib_xrc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. @@ -5,6 +6,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -122,7 +125,10 @@ static void ib_address_constructor(ib_address_t *ib_addr) ib_addr->lid = 0; ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; ib_addr->qp = NULL; - OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t); + /* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call + * into the CPC with the lock held. The alternative would be to drop the lock but the + * lock is never obtained in a critical path. */ + OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t); OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t); } diff --git a/opal/mca/btl/openib/common_sym_whitelist.txt b/opal/mca/btl/openib/common_sym_whitelist.txt new file mode 100644 index 00000000000..7c16ac478d5 --- /dev/null +++ b/opal/mca/btl/openib/common_sym_whitelist.txt @@ -0,0 +1,4 @@ +# Ignore symbols in this component that are auto-generated and we +# can't do anything about them (e.g., flex/bison symbols). +btl_openib_ini_yyleng +btl_openib_ini_yytext diff --git a/opal/mca/btl/openib/configure.m4 b/opal/mca/btl/openib/configure.m4 index 3ac6a85445b..9c3d9025c6d 100644 --- a/opal/mca/btl/openib/configure.m4 +++ b/opal/mca/btl/openib/configure.m4 @@ -26,10 +26,10 @@ # MCA_btl_openib_POST_CONFIG([should_build]) # ------------------------------------------ AC_DEFUN([MCA_opal_btl_openib_POST_CONFIG], [ - AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 -a "x$btl_openib_have_xrc" = "x1"]) - AM_CONDITIONAL([MCA_btl_openib_have_rdmacm], [test $1 -eq 1 -a "x$btl_openib_have_rdmacm" = "x1"]) - AM_CONDITIONAL([MCA_btl_openib_have_dynamic_sl], [test $1 -eq 1 -a "x$btl_openib_have_opensm_devel" = "x1"]) - AM_CONDITIONAL([MCA_btl_openib_have_udcm], [test $1 -eq 1 -a "x$btl_openib_have_udcm" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 && test "x$btl_openib_have_xrc" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_rdmacm], [test $1 -eq 1 && test "x$btl_openib_have_rdmacm" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_dynamic_sl], [test $1 -eq 1 && test "x$btl_openib_have_opensm_devel" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_udcm], [test $1 -eq 1 && test "x$btl_openib_have_udcm" = "x1"]) ]) @@ -46,6 +46,7 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[ [btl_openib_happy="yes" OPAL_CHECK_OPENFABRICS_CM([btl_openib])], [btl_openib_happy="no"]) + OPAL_CHECK_EXP_VERBS([btl_openib], [], []) AS_IF([test "$btl_openib_happy" = "yes"], [# With the new openib flags, look for ibv_fork_init @@ -119,28 +120,6 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[ [enable openib BTL failover]) AM_CONDITIONAL([MCA_btl_openib_enable_failover], [test "x$btl_openib_failover_enabled" = "x1"]) - # Check for __malloc_hook availability - AC_ARG_ENABLE(btl-openib-malloc-alignment, - AC_HELP_STRING([--enable-btl-openib-malloc-alignment], [Enable support for allocated memory alignment. Default: enabled if supported, disabled otherwise.])) - - btl_openib_malloc_hooks_enabled=0 - AS_IF([test "$enable_btl_openib_malloc_alignment" != "no"], - [AC_CHECK_HEADER([malloc.h], - [AC_CHECK_FUNC([__malloc_hook], - [AC_CHECK_FUNC([__realloc_hook], - [AC_CHECK_FUNC([__free_hook], - [btl_openib_malloc_hooks_enabled=1])])])])]) - - AS_IF([test "$enable_btl_openib_malloc_alignment" = "yes" -a "$btl_openib_malloc_hooks_enabled" = "0"], - [AC_MSG_ERROR([openib malloc alignment is requested but __malloc_hook is not available])]) - AC_MSG_CHECKING([whether the openib BTL will use malloc hooks]) - AS_IF([test "$btl_openib_malloc_hooks_enabled" = "0"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes])]) - - AC_DEFINE_UNQUOTED(BTL_OPENIB_MALLOC_HOOKS_ENABLED, [$btl_openib_malloc_hooks_enabled], - [Whether the openib BTL malloc hooks are enabled]) - # make sure that CUDA-aware checks have been done AC_REQUIRE([OPAL_CHECK_CUDA]) diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index c29df267d72..2dd5caead03 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -3,12 +3,13 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * * $COPYRIGHT$ * @@ -239,6 +240,7 @@ typedef struct udcm_msg_hdr { #if HAVE_XRC /* UDCM_MESSAGE_XCONNECT, UDCM_MESSAGE_XCONNECT2 */ struct msg_xrc_connect { + opal_process_name_t rem_name; int32_t rem_ep_index; uint8_t rem_port_num; uint32_t rem_qp_num; @@ -342,11 +344,7 @@ static int udcm_xrc_start_connect (opal_btl_openib_connect_base_module_t *cpc, static int udcm_xrc_restart_connect (mca_btl_base_endpoint_t *lcl_ep); static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn); static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep); -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num); -#else -static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep); -#endif static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn); static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_endpoint_t *rem_ep, uint8_t msg_type); @@ -528,27 +526,24 @@ static int udcm_component_finalize(void) static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) { udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); + int32_t recv_qpn; int rc; opal_mutex_lock (&udep->udep_lock); do { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - rc = udcm_xrc_recv_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num); -#else - lcl_ep->xrc_recv_qp_num = lcl_ep->qps[0].qp->lcl_qp->qp_num; - rc = udcm_xrc_recv_qp_connect (lcl_ep); -#endif - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error connecting loopback XRC receive queue pair")); + if (OPAL_SUCCESS != (rc = udcm_endpoint_init_data (lcl_ep))) { + BTL_VERBOSE(("error initializing loopback endpoint cpc data")); break; } - rc = mca_btl_openib_endpoint_post_recvs (lcl_ep); + rc = udcm_xrc_send_qp_create (lcl_ep); if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error posting receives for loopback queue pair")); + BTL_VERBOSE(("error creating send queue pair for loopback endpoint")); break; } + lcl_ep->rem_info.rem_index = lcl_ep->index; + rc = udcm_xrc_recv_qp_create (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num, lcl_ep->qps[0].qp->lcl_psn); if (OPAL_SUCCESS != rc) { @@ -556,14 +551,35 @@ static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) break; } - rc = udcm_xrc_send_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num, - lcl_ep->qps[0].qp->lcl_psn); + for (int i = 0 ; i < mca_btl_openib_component.num_xrc_qps ; ++i) { + uint32_t srq_num; +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + if (ibv_get_srq_num(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq, &srq_num)) { + BTL_ERROR(("BTL openib UDCM internal error: can't get srq num")); + } +#else + srq_num = lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num; +#endif + lcl_ep->rem_info.rem_srqs[i].rem_srq_num = srq_num; + } + +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + recv_qpn = lcl_ep->xrc_recv_qp->qp_num; +#else + recv_qpn = lcl_ep->xrc_recv_qp_num; +#endif + + lcl_ep->ib_addr->remote_xrc_rcv_qp_num = recv_qpn; + lcl_ep->rem_info.rem_qps[0].rem_psn = lcl_ep->xrc_recv_psn; + lcl_ep->rem_info.rem_qps[0].rem_qp_num = recv_qpn; + + rc = udcm_xrc_send_qp_connect (lcl_ep, recv_qpn, lcl_ep->xrc_recv_psn); if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error creating loopback XRC send queue pair")); + BTL_VERBOSE(("error connecting loopback XRC send queue pair")); break; } - lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED; + BTL_VERBOSE(("successfully created loopback queue pair")); /* need to hold the endpoint lock before calling udcm_finish_connection */ OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); @@ -605,8 +621,6 @@ static int udcm_endpoint_init_self (struct mca_btl_base_endpoint_t *lcl_ep) break; } - lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED; - /* need to hold the endpoint lock before calling udcm_finish_connection */ OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); rc = udcm_finish_connection (lcl_ep); @@ -1307,7 +1321,11 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ uint32_t max_send_wr) { udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); +#if HAVE_DECL_IBV_EXP_CREATE_QP + struct ibv_exp_qp_init_attr init_attr; +#else struct ibv_qp_init_attr init_attr; +#endif size_t req_inline; int rc; @@ -1328,6 +1346,34 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ } init_attr.cap.max_send_wr = max_send_wr; +#if HAVE_DECL_IBV_EXP_CREATE_QP + /* use expanded verbs qp create to enable use of mlx5 atomics */ + init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD; + init_attr.pd = m->btl->device->ib_pd; + +#if HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG + init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG; + init_attr.max_atomic_arg = sizeof (int64_t); +#endif + +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + if (IBV_EXP_ATOMIC_HCA_REPLY_BE == m->btl->device->ib_exp_dev_attr.exp_atomic_cap) { + init_attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY; + init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; + } +#endif + + while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_exp_create_qp (m->btl->device->ib_dev_context, + &init_attr))) { + /* NTH: this process may be out of registered memory. try evicting an item from + the lru of this btl's mpool */ + if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) { + break; + } + } + +#else + while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_create_qp(m->btl->device->ib_pd, &init_attr))) { /* NTH: this process may be out of registered memory. try evicting an item from @@ -1337,6 +1383,8 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ } } +#endif + if (NULL == lcl_ep->qps[qp].qp->lcl_qp) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", "ibv_create_qp failed", true, opal_process_info.nodename, @@ -2067,6 +2115,8 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m) udcm_module_post_one_recv (m, msg_num); } + opal_atomic_wmb (); + if (0 == opal_atomic_swap_32 (&m->cm_message_event_active, 1)) { opal_event_active (&m->cm_message_event, OPAL_EV_READ, 1); } @@ -2124,6 +2174,10 @@ static void *udcm_message_callback (int fd, int flags, void *context) BTL_VERBOSE(("running message thread")); + /* Mark that the callback was started */ + opal_atomic_swap_32 (&m->cm_message_event_active, 0); + opal_atomic_wmb (); + while ((item = (udcm_message_recv_t *) opal_fifo_pop_atomic (&m->cm_recv_msg_fifo))) { mca_btl_openib_endpoint_t *lcl_ep = item->msg_hdr.lcl_ep; @@ -2165,8 +2219,6 @@ static void *udcm_message_callback (int fd, int flags, void *context) BTL_VERBOSE(("exiting message thread")); - opal_atomic_swap_32 (&m->cm_message_event_active, 0); - return NULL; } @@ -2570,11 +2622,7 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep) /* mark: xrc receive qp */ /* Recv qp connect */ -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num) -#else -static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) -#endif { mca_btl_openib_module_t *openib_btl = lcl_ep->endpoint_btl; @@ -2588,9 +2636,9 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) BTL_VERBOSE(("Connecting Recv QP\n")); lcl_ep->xrc_recv_qp = ibv_open_qp(openib_btl->device->ib_dev_context, &attr); if (NULL == lcl_ep->xrc_recv_qp) { /* failed to regester the qp, so it is already die and we should create new one */ - /* Return NOT READY !!!*/ - BTL_ERROR(("Failed to register qp_num: %d, get error: %s (%d)\n. Replying with RNR", - qp_num, strerror(errno), errno)); + /* Return NOT READY !!!*/ + BTL_VERBOSE(("Failed to register qp_num: %d, get error: %s (%d)\n. Replying with RNR", + qp_num, strerror(errno), errno)); return OPAL_ERROR; } else { BTL_VERBOSE(("Connected to XRC Recv qp [%d]", lcl_ep->xrc_recv_qp->qp_num)); @@ -2598,13 +2646,16 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) } #else int ret; + /* silence unused variable warning */ + (void) qp_num; + BTL_VERBOSE(("Connecting receive qp: %d", lcl_ep->xrc_recv_qp_num)); ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, lcl_ep->xrc_recv_qp_num); if (ret) { /* failed to regester the qp, so it is already die and we should create new one */ /* Return NOT READY !!!*/ lcl_ep->xrc_recv_qp_num = 0; - BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d). Replying with RNR", - lcl_ep->xrc_recv_qp_num, strerror(ret), ret)); + BTL_VERBOSE(("Failed to register qp_num: %d , get error: %s (%d). Replying with RNR", + lcl_ep->xrc_recv_qp_num, strerror(ret), ret)); return OPAL_ERROR; } #endif @@ -2780,9 +2831,9 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_ return rc; } - msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index); - msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num; - msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME; + msg->data->hdr.data.xreq.rem_ep_index = htonl(lcl_ep->index); + msg->data->hdr.data.xreq.rem_port_num = m->modex.mm_port_num; + msg->data->hdr.data.xreq.rem_name = OPAL_PROC_MY_NAME; if (UDCM_MESSAGE_XCONNECT == msg_type) { BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num, @@ -2886,11 +2937,7 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg if (UDCM_MESSAGE_XCONNECT2 == msg_hdr->type) { response_type = UDCM_MESSAGE_XRESPONSE2; -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS rc = udcm_xrc_recv_qp_connect (lcl_ep, msg_hdr->data.xreq.rem_qp_num); -#else - rc = udcm_xrc_recv_qp_connect (lcl_ep); -#endif if (OPAL_SUCCESS != rc) { /* return not ready. remote side will retry */ rej_reason = UDCM_REJ_NOT_READY; diff --git a/opal/mca/btl/openib/mca-btl-openib-device-params.ini b/opal/mca/btl/openib/mca-btl-openib-device-params.ini index 9074c1335b8..a61b91ffcb9 100644 --- a/opal/mca/btl/openib/mca-btl-openib-device-params.ini +++ b/opal/mca/btl/openib/mca-btl-openib-device-params.ini @@ -278,6 +278,13 @@ mtu = 2048 receive_queues = P,65536,256,192,128 max_inline_data = 64 +[Intel HFI1] +vendor_id = 0x1175 +vendor_part_id = 9456,9457 +use_eager_rdma = 1 +mtu = 4096 +max_inline_data = 0 + ############################################################################ # Intel has several OUI's, including 0x8086. Amusing. :-) Intel has diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 6594e4c0d29..8f7871423b6 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -292,13 +292,21 @@ create_peer_and_endpoint(int interface, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); + /* + * check if create_endpoint() already created the endpoint. + * if not, create it here. + */ if (NULL == *endpoint) { *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); if (NULL == *endpoint) { return OPAL_ERR_OUT_OF_RESOURCE; } - (*endpoint)->ptl_proc.rank = proc->proc_name.vpid; } + /* + * regardless of who created the endpoint, set the rank here + * because we are using logical mapping. + */ + (*endpoint)->ptl_proc.rank = proc->proc_name.vpid; phys_peer->phys.pid = id[interface].phys.pid; phys_peer->phys.nid = id[interface].phys.nid; @@ -500,7 +508,6 @@ mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base, size : portals4_btl->super.btl_max_send_size ; } - frag->md_h = PTL_INVALID_HANDLE; frag->base.des_segment_count = 1; frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; frag->base.order = MCA_BTL_NO_ORDER; diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index 2c6bb192f83..5834ac47449 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -692,8 +692,8 @@ mca_btl_portals4_component_progress(void) /* The distant PtlMEAppend is not finished (distant PTL_EVENT_LINK not received) */ /* Re-issue the PtlGet (see btl_portals4_rdma.c) */ - ret = PtlGet(frag->md_h, - 0, + ret = PtlGet(portals4_btl->send_md_h, + (ptl_size_t) frag->addr, frag->length, frag->peer_proc, portals4_btl->recv_idx, @@ -704,8 +704,6 @@ mca_btl_portals4_component_progress(void) opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: Re-issued PtlGet failed: %d", __FILE__, __LINE__, ret); - PtlMDRelease(frag->md_h); - frag->md_h = PTL_INVALID_HANDLE; return OPAL_ERROR; } @@ -724,8 +722,6 @@ mca_btl_portals4_component_progress(void) frag->rdma_cb.context, frag->rdma_cb.data, OPAL_SUCCESS); - PtlMDRelease(frag->md_h); - frag->md_h = PTL_INVALID_HANDLE; OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag); OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); diff --git a/opal/mca/btl/portals4/btl_portals4_frag.h b/opal/mca/btl/portals4/btl_portals4_frag.h index 76c610c8de1..8f3c6e49ac3 100644 --- a/opal/mca/btl/portals4/btl_portals4_frag.h +++ b/opal/mca/btl/portals4/btl_portals4_frag.h @@ -45,10 +45,9 @@ struct mca_btl_portals4_frag_t { mca_btl_base_header_t hdr; /* handle to use for communication */ ptl_handle_me_t me_h; - /* handle to use for communication */ - ptl_handle_md_t md_h; /* size of the allocated memory region -- not the amount of data we need to send */ + void *addr; size_t size; /* match bits for retransmit case */ ptl_match_bits_t match_bits; diff --git a/opal/mca/btl/portals4/btl_portals4_rdma.c b/opal/mca/btl/portals4/btl_portals4_rdma.c index 1ff765b98aa..be915d59b99 100644 --- a/opal/mca/btl/portals4/btl_portals4_rdma.c +++ b/opal/mca/btl/portals4/btl_portals4_rdma.c @@ -78,34 +78,17 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, frag->endpoint = btl_peer; frag->hdr.tag = MCA_BTL_TAG_MAX; - /* Bind the memory */ - md.start = (void *)local_address; - md.length = size; - md.options = 0; - md.eq_handle = portals4_btl->recv_eq_h; - md.ct_handle = PTL_CT_NONE; - - ret = PtlMDBind(portals4_btl->portals_ni_h, - &md, - &frag->md_h); - - if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d", - __FILE__, __LINE__, ret); - return OPAL_ERROR; - } - frag->match_bits = remote_handle->key; - frag->length = md.length; + frag->addr = local_address; + frag->length = size; frag->peer_proc = btl_peer->ptl_proc; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); - ret = PtlGet(frag->md_h, - 0, - md.length, + ret = PtlGet(portals4_btl->send_md_h, + (ptl_size_t) local_address, + size, btl_peer->ptl_proc, portals4_btl->recv_idx, frag->match_bits, /* match bits */ @@ -115,8 +98,6 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); - PtlMDRelease(frag->md_h); - frag->md_h = PTL_INVALID_HANDLE; return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", diff --git a/opal/mca/btl/scif/configure.m4 b/opal/mca/btl/scif/configure.m4 index 33b292197a8..b8826b0bd11 100644 --- a/opal/mca/btl/scif/configure.m4 +++ b/opal/mca/btl/scif/configure.m4 @@ -2,6 +2,8 @@ # # Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -20,14 +22,14 @@ AC_DEFUN([MCA_opal_btl_scif_CONFIG],[ opal_btl_scif_happy="no" if test "$with_scif" != "no" ; then - if test -n "$with_scif" -a "$with_scif" != "yes" ; then + if test -n "$with_scif" && test "$with_scif" != "yes" ; then opal_check_scif_dir=$with_scif fi OPAL_CHECK_PACKAGE([btl_scif], [scif.h], [scif], [scif_open], [], [$opal_check_scif_dir], [], [opal_btl_scif_happy="yes"], []) - if test "$opal_btl_scif_happy" != "yes" -a -n "$with_scif" ; then + if test "$opal_btl_scif_happy" != "yes" && test -n "$with_scif" ; then AC_MSG_ERROR([SCIF support requested but not found. Aborting]) fi fi diff --git a/opal/mca/btl/sm/btl_sm.c b/opal/mca/btl/sm/btl_sm.c index e0eee092793..d1758c9ddd0 100644 --- a/opal/mca/btl/sm/btl_sm.c +++ b/opal/mca/btl/sm/btl_sm.c @@ -30,6 +30,9 @@ #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #ifdef HAVE_FCNTL_H #include #endif /* HAVE_FCNTL_H */ diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index 5dee6205848..2414f8ca520 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -11,12 +11,12 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -607,6 +607,7 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr, int rc = OPAL_SUCCESS; int fd = -1; char *fname = NULL; + char *tmpfname = NULL; /* used as a temporary store so we can extract shmem_ds info */ mca_common_sm_module_t *tmp_modp = NULL; @@ -663,8 +664,19 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr, * file containing all the meta info required for attach. */ /* now just write the contents of tmp_modp->shmem_ds to the full - * sizeof(opal_shmem_ds_t), so we know where the mpool_res_size starts. */ - if (-1 == (fd = open(fname, O_CREAT | O_RDWR, 0600))) { + * sizeof(opal_shmem_ds_t), so we know where the mpool_res_size + * starts. Note that we write into a temporary file first and + * then do a rename(2) to move the full file into its final + * destination. This avoids a race condition where a peer process + * might open/read part of the file before this processes finishes + * writing it (see + * https://github.com/open-mpi/ompi/issues/1230). */ + asprintf(&tmpfname, "%s.tmp", fname); + if (NULL == tmpfname) { + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto out; + } + if (-1 == (fd = open(tmpfname, O_CREAT | O_RDWR, 0600))) { int err = errno; opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, "open(2)", strerror(err), err); @@ -690,11 +702,20 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr, /* only do this for the mpool case */ OBJ_RELEASE(tmp_modp); } + (void)close(fd); + fd = -1; + if (0 != rename(tmpfname, fname)) { + rc = OPAL_ERR_IN_ERRNO; + goto out; + } out: if (-1 != fd) { (void)close(fd); } + if (NULL != tmpfname) { + free(tmpfname); + } return rc; } @@ -743,6 +764,11 @@ mca_btl_sm_component_init(int *num_btls, int rc; #endif /* OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA */ + /* if we are in a container, then we must disqualify ourselves */ + if (NULL != getenv("OPAL_PROC_CONTAINER")) { + return NULL; + } + *num_btls = 0; /* lookup/create shared memory pool only when used */ mca_btl_sm_component.sm_mpool = NULL; diff --git a/opal/mca/btl/smcuda/configure.m4 b/opal/mca/btl/smcuda/configure.m4 index d56cb178580..016f691944f 100644 --- a/opal/mca/btl/smcuda/configure.m4 +++ b/opal/mca/btl/smcuda/configure.m4 @@ -4,7 +4,7 @@ # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,8 +21,8 @@ AC_DEFUN([MCA_opal_btl_smcuda_CONFIG],[ # make sure that CUDA-aware checks have been done AC_REQUIRE([OPAL_CHECK_CUDA]) - # Only build if CUDA 4.1 support is available - AS_IF([test "x$CUDA_SUPPORT_41" = "x1"], + # Only build if CUDA support is available + AS_IF([test "x$CUDA_SUPPORT" = "x1"], [$1], [$2]) diff --git a/opal/mca/btl/tcp/btl_tcp.h b/opal/mca/btl/tcp/btl_tcp.h index a9b5ad9d4dd..6b6b0e62e38 100644 --- a/opal/mca/btl/tcp/btl_tcp.h +++ b/opal/mca/btl/tcp/btl_tcp.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. @@ -37,6 +37,9 @@ #ifdef HAVE_NETINET_IN_H #include #endif +#ifdef HAVE_UNISTD_H +#include +#endif /* Open MPI includes */ #include "opal/mca/event/event.h" diff --git a/opal/mca/btl/tcp/btl_tcp_frag.c b/opal/mca/btl/tcp/btl_tcp_frag.c index 02e82df4034..d9bf9b76e67 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.c +++ b/opal/mca/btl/tcp/btl_tcp_frag.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -206,7 +207,7 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) cnt = readv(sd, frag->iov_ptr, num_vecs); if( 0 < cnt ) goto advance_iov_position; if( cnt == 0 ) { - btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); return false; } @@ -220,16 +221,16 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len, strerror(opal_socket_errno), (unsigned long) frag->iov_cnt)); btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; - default: + mca_btl_tcp_endpoint_close(btl_endpoint); + return false; + default: BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; - } + mca_btl_tcp_endpoint_close(btl_endpoint); + return false; + } } advance_iov_position: diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 409a10e803e..323cf367c8e 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -589,32 +589,21 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module) int count; OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); - count = opal_list_get_size(&ugni_module->ep_wait_list); - OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); + count = opal_list_get_size(&ugni_module->ep_wait_list); do { - OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list); - OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); if (endpoint != NULL) { - - endpoint->wait_listed = false; - rc = mca_btl_ugni_progress_send_wait_list (endpoint); - if (OPAL_SUCCESS != rc && false == endpoint->wait_listed) { - - endpoint->wait_listed = true; - OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); + if (OPAL_SUCCESS != rc) { opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); - OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); + } else { + endpoint->wait_listed = false; } } - - --count; - if (count == 0) break; - - } while (endpoint != NULL) ; + } while (endpoint != NULL && --count > 0) ; + OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); return rc; } diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index 49551f8a8db..fdaafffd5a4 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -158,9 +158,11 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { rc = mca_btl_ugni_progress_send_wait_list (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - ep->wait_listed = true; OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock); - opal_list_append (&ep->btl->ep_wait_list, &ep->super); + if (false == ep->wait_listed) { + opal_list_append (&ep->btl->ep_wait_list, &ep->super); + ep->wait_listed = true; + } OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock); } diff --git a/opal/mca/btl/ugni/btl_ugni_frag.h b/opal/mca/btl/ugni/btl_ugni_frag.h index c912b9abc52..2b04564c618 100644 --- a/opal/mca/btl/ugni/btl_ugni_frag.h +++ b/opal/mca/btl/ugni/btl_ugni_frag.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2013 The University of Tennessee and The University @@ -66,6 +66,7 @@ struct mca_btl_ugni_base_frag_t; typedef struct mca_btl_ugni_base_frag_t { mca_btl_base_descriptor_t base; + volatile int32_t ref_cnt; uint32_t msg_id; uint16_t hdr_size; uint16_t flags; @@ -148,6 +149,7 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep, if (OPAL_LIKELY(NULL != *frag)) { (*frag)->my_list = list; (*frag)->endpoint = ep; + (*frag)->ref_cnt = 1; return OPAL_SUCCESS; } @@ -169,10 +171,16 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag) return OPAL_SUCCESS; } -static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, int rc) { - frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE; +static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, int rc) { + int32_t ref_cnt; - BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); + opal_atomic_mb (); + + ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1); + if (ref_cnt) { + assert (ref_cnt > 0); + return false; + } /* call callback if specified */ if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { @@ -182,6 +190,20 @@ static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, i if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) { mca_btl_ugni_frag_return (frag); } + + return true; +} + +static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, int rc) { + BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); + + frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE; + + mca_btl_ugni_frag_del_ref (frag, rc); +} + +static inline bool mca_btl_ugni_frag_check_complete (mca_btl_ugni_base_frag_t *frag) { + return !!(MCA_BTL_UGNI_FRAG_COMPLETE & frag->flags); } #define MCA_BTL_UGNI_FRAG_ALLOC_SMSG(ep, frag) \ diff --git a/opal/mca/btl/ugni/btl_ugni_send.c b/opal/mca/btl/ugni/btl_ugni_send.c index d20881aca0a..45e17ec13f9 100644 --- a/opal/mca/btl/ugni/btl_ugni_send.c +++ b/opal/mca/btl/ugni/btl_ugni_send.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -25,7 +25,6 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl, mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor; size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len; mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; - int flags_save = frag->base.des_flags; int rc; /* tag and len are at the same location in eager and smsg frag hdrs */ @@ -43,42 +42,48 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl, BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor, OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size)); - /* temporarily disable ownership and callback flags so we can reliably check the complete flag */ - frag->base.des_flags &= ~(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + /* add a reference to prevent the fragment from being returned until after the + * completion flag is checked. */ + ++frag->ref_cnt; frag->flags &= ~MCA_BTL_UGNI_FRAG_COMPLETE; rc = mca_btl_ugni_send_frag (endpoint, frag); - - if (OPAL_LIKELY(frag->flags & MCA_BTL_UGNI_FRAG_COMPLETE)) { + if (OPAL_LIKELY(mca_btl_ugni_frag_check_complete (frag))) { /* fast path: remote side has received the frag */ - frag->base.des_flags = flags_save; - mca_btl_ugni_frag_complete (frag, OPAL_SUCCESS); + (void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); return 1; } - if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED) && (flags_save & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { + if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED) && (frag->flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { /* fast(ish) path: btl owned buffered frag. report send as complete */ - frag->base.des_flags = flags_save & ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + bool call_callback = !!(frag->flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - if (OPAL_LIKELY(flags_save & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { + if (call_callback) { frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc); } + (void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); + return 1; } /* slow(ish) path: remote side hasn't received the frag. call the frag's callback when we get the local smsg/msgq or remote rdma completion */ - frag->base.des_flags = flags_save | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + + mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) { /* queue up request */ if (false == endpoint->wait_listed) { OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); - opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); + if (false == endpoint->wait_listed) { + opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); + endpoint->wait_listed = true; + } OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); - endpoint->wait_listed = true; } OPAL_THREAD_LOCK(&endpoint->lock); diff --git a/opal/mca/btl/usnic/btl_usnic.h b/opal/mca/btl/usnic/btl_usnic.h index 0d815cc4389..cc094ce38f4 100644 --- a/opal/mca/btl/usnic/btl_usnic.h +++ b/opal/mca/btl/usnic/btl_usnic.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -181,6 +181,9 @@ typedef struct opal_btl_usnic_component_t { /** max completion queue entries per module */ int32_t cq_num; + /** max number of entries in AV EQ */ + int32_t av_eq_num; + /** retrans characteristics */ int retrans_timeout; diff --git a/opal/mca/btl/usnic/btl_usnic_ack.c b/opal/mca/btl/usnic/btl_usnic_ack.c index 3a6ae5baac6..4616516f525 100644 --- a/opal/mca/btl/usnic/btl_usnic_ack.c +++ b/opal/mca/btl/usnic/btl_usnic_ack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -207,8 +207,7 @@ opal_btl_usnic_ack_send( /* send the seq of the lowest item in the window that we've received */ ack->ss_base.us_btl_header->ack_seq = - endpoint->endpoint_next_contig_seq_to_recv - 1; - + SEQ_DIFF(endpoint->endpoint_next_contig_seq_to_recv, 1); ack->ss_len = sizeof(opal_btl_usnic_btl_header_t); #if MSGDEBUG1 diff --git a/opal/mca/btl/usnic/btl_usnic_ack.h b/opal/mca/btl/usnic/btl_usnic_ack.h index 0aaf8306d70..1ef85544c06 100644 --- a/opal/mca/btl/usnic/btl_usnic_ack.h +++ b/opal/mca/btl/usnic/btl_usnic_ack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,7 +92,7 @@ opal_btl_usnic_piggyback_ack( if (endpoint->endpoint_ack_needed) { opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint); sseg->ss_base.us_btl_header->ack_seq = - endpoint->endpoint_next_contig_seq_to_recv - 1; + SEQ_DIFF(endpoint->endpoint_next_contig_seq_to_recv, 1); sseg->ss_base.us_btl_header->ack_present = 1; #if MSGDEBUG1 opal_output(0, "Piggy-backing ACK for sequence %"UDSEQ"\n", diff --git a/opal/mca/btl/usnic/btl_usnic_cagent.c b/opal/mca/btl/usnic/btl_usnic_cagent.c index 19d01bd22f9..e71c51bf76b 100644 --- a/opal/mca/btl/usnic/btl_usnic_cagent.c +++ b/opal/mca/btl/usnic/btl_usnic_cagent.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,7 +16,9 @@ #include #include #include +#ifdef HAVE_ALLOCA_H #include +#endif #include "opal_stdint.h" #include "opal/threads/mutex.h" @@ -298,9 +302,16 @@ static void agent_sendto(int fd, char *buffer, ssize_t numbytes, } else if (rc < 0) { if (errno == EAGAIN || errno == EINTR) { continue; + } else if (errno == EPERM) { + // We're sending too fast + usleep(5); + continue; } - ABORT("Unexpected sendto() error"); + char *msg; + asprintf(&msg, "Unexpected sendto() error: errno=%d (%s)", + errno, strerror(errno)); + ABORT(msg); /* Will not return */ } diff --git a/opal/mca/btl/usnic/btl_usnic_cclient.c b/opal/mca/btl/usnic/btl_usnic_cclient.c index 13c9331cc18..f468f495a87 100644 --- a/opal/mca/btl/usnic/btl_usnic_cclient.c +++ b/opal/mca/btl/usnic/btl_usnic_cclient.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,7 +18,9 @@ #include #include #include +#ifdef HAVE_ALLOCA_H #include +#endif #include #include "opal_stdint.h" diff --git a/opal/mca/btl/usnic/btl_usnic_compat.h b/opal/mca/btl/usnic/btl_usnic_compat.h index 44f2263a8e7..7a19a7e8515 100644 --- a/opal/mca/btl/usnic/btl_usnic_compat.h +++ b/opal/mca/btl/usnic/btl_usnic_compat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +34,14 @@ /* Inclue the progress thread stuff */ # include "opal/runtime/opal_progress_threads.h" -/* Hhwloc is now guaranteed */ +/* Hwloc support is now guaranteed, and the rest of the code base does + not define OPAL_HAVE_HWLOC any more (because it would always be 1). + + Note: The usnic BTL still uses OPAL_HAVE_HWLOC because Cisco + continues to sync it against a v1.10-based tree (where + OPAL_HAVE_HWLOC may still be 0 or 1). Once Cisco stops syncing the + usnic BTL against v1.10.x, all the OPAL_HAVE_HWLOC code in the + usnic BTL can go away. */ # define OPAL_HAVE_HWLOC 1 # define USNIC_OUT opal_btl_base_framework.framework_output diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index b33e11df9ce..b49e51bb57f 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. @@ -87,7 +87,7 @@ #define OPAL_BTL_USNIC_NUM_COMPLETIONS 500 /* RNG buffer definition */ -opal_rng_buff_t opal_btl_usnic_rand_buff; +opal_rng_buff_t opal_btl_usnic_rand_buff = {0}; /* simulated clock */ uint64_t opal_btl_usnic_ticks = 0; @@ -956,11 +956,12 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, /* Output all of this module's values. */ const char *devname = module->fabric_info->fabric_attr->name; opal_output_verbose(5, USNIC_OUT, - "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d", + "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d, num aveqe=%d", devname, module->sd_num, module->rd_num, - module->cq_num); + module->cq_num, + module->av_eq_num); opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s priority MTU = %" PRIsize_t, devname, diff --git a/opal/mca/btl/usnic/btl_usnic_endpoint.h b/opal/mca/btl/usnic/btl_usnic_endpoint.h index c76eee6d95a..11dc1793d73 100644 --- a/opal/mca/btl/usnic/btl_usnic_endpoint.h +++ b/opal/mca/btl/usnic/btl_usnic_endpoint.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,6 +69,7 @@ typedef struct opal_btl_usnic_modex_t { uint32_t ipv4_addr; /* Stored in host order */ uint32_t ports[USNIC_NUM_CHANNELS]; + /* Stored in network order */ uint32_t netmask; /* Stored in host order */ uint32_t connectivity_udp_port; diff --git a/opal/mca/btl/usnic/btl_usnic_mca.c b/opal/mca/btl/usnic/btl_usnic_mca.c index c0df778871d..910131d8f4f 100644 --- a/opal/mca/btl/usnic/btl_usnic_mca.c +++ b/opal/mca/btl/usnic/btl_usnic_mca.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. @@ -162,6 +162,7 @@ int opal_btl_usnic_component_register(void) static int prio_sd_num; static int prio_rd_num; static int cq_num; + static int av_eq_num; static int udp_port_base; static int max_tiny_msg_size; static int eager_limit; @@ -235,12 +236,16 @@ int opal_btl_usnic_component_register(void) -1, &cq_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5)); mca_btl_usnic_component.cq_num = (int32_t) cq_num; + CHECK(reg_int("av_eq_num", "Number of event queue entries for peer address resolution", + 1024, &av_eq_num, REGINT_GE_ONE, OPAL_INFO_LVL_5)); + mca_btl_usnic_component.av_eq_num = (int32_t) av_eq_num; + CHECK(reg_int("base_udp_port", "Base UDP port to use for usNIC communications. If 0, system will pick the port number. If non-zero, it will be added to each process' local rank to obtain the final port number (default: 0)", 0, &udp_port_base, REGINT_GE_ZERO, OPAL_INFO_LVL_5)); mca_btl_usnic_component.udp_port_base = (int) udp_port_base; CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame", - 1000, &mca_btl_usnic_component.retrans_timeout, + 5000, &mca_btl_usnic_component.retrans_timeout, REGINT_GE_ONE, OPAL_INFO_LVL_5)); CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)", diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 6105fd4281a..53696ce0fa3 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved @@ -69,13 +69,14 @@ static void finalize_one_channel(opal_btl_usnic_module_t *module, /* - * Loop over all procs sent to us in add_procs and see if we want to - * add a proc/endpoint for them. + * Loop over a block of procs sent to us in add_procs and see if we + * want to add a proc/endpoint for them. */ -static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, - size_t nprocs, - opal_proc_t **procs, - mca_btl_base_endpoint_t **endpoints) +static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, + size_t block_offset, + size_t block_len, + opal_proc_t **procs, + mca_btl_base_endpoint_t **endpoints) { int rc; opal_proc_t* my_proc; @@ -87,8 +88,8 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, return OPAL_ERR_OUT_OF_RESOURCE; } - /* Loop over the procs we were given */ - for (size_t i = 0; i < nprocs; i++) { + /* Loop over a block in the procs we were given */ + for (size_t i = block_offset; i < (block_offset + block_len); i++) { struct opal_proc_t* opal_proc = procs[i]; opal_btl_usnic_proc_t* usnic_proc; mca_btl_base_endpoint_t* usnic_endpoint; @@ -97,11 +98,18 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, /* Do not create loopback usnic connections */ if (opal_proc == my_proc) { + opal_output_verbose(75, USNIC_OUT, + "btl:usnic:add_procs:%s: not connecting to self", + module->fabric_info->fabric_attr->name); continue; } /* usNIC does not support loopback to the same machine */ if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { + opal_output_verbose(75, USNIC_OUT, + "btl:usnic:add_procs:%s: not connecting to %s on same server", + module->fabric_info->fabric_attr->name, + usnic_compat_proc_name_print(&opal_proc->proc_name)); continue; } @@ -114,6 +122,11 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, if (OPAL_ERR_UNREACH == rc) { /* If the peer doesn't have usnic modex info, then we just skip it */ + opal_output_verbose(75, USNIC_OUT, + "btl:usnic:add_procs:%s: peer %s on %s does not have usnic modex info; skipping", + module->fabric_info->fabric_attr->name, + usnic_compat_proc_name_print(&opal_proc->proc_name), + opal_get_proc_hostname(opal_proc)); continue; } else if (OPAL_SUCCESS != rc) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -126,8 +139,10 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, &usnic_endpoint); if (OPAL_SUCCESS != rc) { opal_output_verbose(5, USNIC_OUT, - "btl:usnic:%s: unable to create endpoint for module=%p proc=%p\n", - __func__, (void *)module, (void *)usnic_proc); + "btl:usnic:add_procs:%s: unable to create endpoint to peer %s on %s", + module->fabric_info->fabric_attr->name, + usnic_compat_proc_name_print(&opal_proc->proc_name), + opal_get_proc_hostname(opal_proc)); OBJ_RELEASE(usnic_proc); continue; } @@ -143,7 +158,8 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, modex->netmask); opal_output_verbose(5, USNIC_OUT, - "btl:usnic: new usnic peer endpoint: %s, proirity port %d, data port %d", + "btl:usnic:add_procs:%s: new usnic peer endpoint: %s, proirity port %d, data port %d", + module->fabric_info->fabric_attr->name, str, modex->ports[USNIC_PRIORITY_CHANNEL], modex->ports[USNIC_DATA_CHANNEL]); @@ -195,9 +211,10 @@ static void add_procs_warn_unreachable(opal_btl_usnic_module_t *module, * invoked. Go reap them all. */ static int -add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, - size_t array_len, - struct mca_btl_base_endpoint_t **endpoints) +add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module, + size_t block_offset, + size_t block_len, + struct mca_btl_base_endpoint_t **endpoints) { int ret = OPAL_SUCCESS; int num_left; @@ -205,12 +222,11 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, uint32_t event; struct fi_eq_entry entry; struct fi_eq_err_entry err_entry; - bool error_occurred = false; /* compute num fi_av_insert completions we are waiting for */ num_left = 0; - for (i = 0; i < array_len; ++i) { + for (i = block_offset; i < (block_offset + block_len); ++i) { if (NULL != endpoints[i]) { num_left += USNIC_NUM_CHANNELS; } @@ -266,7 +282,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, We therefore only want to print a pretty warning about (and OBJ_RELEASE) that endpoint the *first* time it is reported. */ - for (i = 0; i < array_len; ++i) { + for (i = block_offset; i < (block_offset + block_len); ++i) { if (endpoints[i] == context->endpoint) { add_procs_warn_unreachable(module, context->endpoint); @@ -348,7 +364,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, - If an otherwise-valid endpoint has no dest, that means we timed out trying to resolve it, so just release that endpoint. */ size_t num_endpoints_created = 0; - for (i = 0; i < array_len; i++) { + for (i = block_offset; i < (block_offset + block_len); i++) { if (NULL != endpoints[i]) { bool happy; @@ -382,6 +398,79 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, return ret; } +/* + * Create endpoints for the procs we were given in add_procs. + */ +static int add_procs_create_endpoints(struct opal_btl_usnic_module_t* module, + size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t** endpoints) +{ + /* We need to ensure that we don't overrun the libfabric AV EQ. + Divide up all the peer address resolutions we need to do into a + series of blocks; insert and complete each block before moving + to the next (note: if performance mandates it, we can move to a + sliding window style of AV inserts to get better concurrency of + AV resolution). */ + + /* Leave a few empty slots in the AV EQ, just for good measure */ + if (module->av_eq_size < 8) { + opal_show_help("help-mpi-btl-usnic.txt", "fi_av_eq too small", + true, + opal_process_info.nodename, + module->av_eq_size, + 8); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + size_t eq_size = module->av_eq_size - 8; + size_t block_len = eq_size; + size_t num_av_inserts = nprocs * USNIC_NUM_CHANNELS; + size_t num_blocks = num_av_inserts / block_len; + if (num_av_inserts % block_len != 0) { + ++num_blocks; + } + + /* Per above, the blocks are expressed in terms of number of AV + inserts. Convert them to be expressed in terms of number of + procs. */ + block_len /= USNIC_NUM_CHANNELS; + + /* Per above, loop over creating the endpoints so that we do not + overrun the libfabric AV EQ. */ + int rc; + for (size_t block_offset = 0, block = 0; block < num_blocks; + block_offset += block_len, ++block) { + /* Adjust for the last block */ + if (block_len > (nprocs - block_offset)) { + block_len = nprocs - block_offset; + } + + /* First, create endpoints (and procs, if they're not already + created) for the usnic-reachable procs we were given. */ + rc = add_procs_block_create_endpoints(module, + block_offset, block_len, + procs, endpoints); + if (OPAL_SUCCESS != rc) { + return rc; + } + + /* For each endpoint that was created, we initiated the + process to create NUM_CHANNELS fi_addrs. Go finish all of + those. This will be the final determination of whether we + can use the endpoint or not because we'll find out if each + endpoint is reachable or not. */ + rc = add_procs_block_reap_fi_av_inserts(module, + block_offset, block_len, + endpoints); + if (OPAL_SUCCESS != rc) { + return rc; + } + } + + return OPAL_SUCCESS; +} + /* * Add procs to this BTL module, receiving endpoint information from * the modex. This is done in 2 phases: @@ -408,23 +497,13 @@ static int usnic_add_procs(struct mca_btl_base_module_t* base_module, opal_btl_usnic_module_t* module = (opal_btl_usnic_module_t*) base_module; int rc; - /* First, create endpoints (and procs, if they're not already - created) for all the usnic-reachable procs we were given. */ + /* Go create the endpoints (including all relevant address + resolution) */ rc = add_procs_create_endpoints(module, nprocs, procs, endpoints); if (OPAL_SUCCESS != rc) { goto fail; } - /* For each endpoint that was created, we initiated the process to - create NUM_CHANNELS fi_addrs. Go finish all of those. This - will be the final determination of whether we can use the - endpoint or not because we'll find out if each endpoint is - reachable or not. */ - rc = add_procs_reap_fi_av_inserts(module, nprocs, endpoints); - if (OPAL_SUCCESS != rc) { - goto fail; - } - /* Find all the endpoints with a complete set of USD destinations and mark them as reachable */ for (size_t i = 0; NULL != reachable && i < nprocs; ++i) { @@ -1205,7 +1284,7 @@ usnic_send( /* assign length */ sseg->ss_len = sizeof(opal_btl_usnic_btl_header_t) + frag->sf_size; - sseg->ss_channel = USNIC_PRIORITY_CHANNEL; + sseg->ss_channel = USNIC_DATA_CHANNEL; sseg->ss_base.us_btl_header->tag = tag; #if MSGDEBUG1 opal_output(0, "INLINE send, sseg=%p", (void *)sseg); @@ -1831,6 +1910,7 @@ static void init_queue_lengths(opal_btl_usnic_module_t *module) } else { module->cq_num = mca_btl_usnic_component.cq_num; } + module->av_eq_num = mca_btl_usnic_component.av_eq_num; /* * Queue sizes for priority channel scale with # of endpoint. A @@ -2018,12 +2098,15 @@ static int init_channels(opal_btl_usnic_module_t *module) } memset(&eq_attr, 0, sizeof(eq_attr)); - eq_attr.size = 1024; + eq_attr.size = module->av_eq_num; eq_attr.wait_obj = FI_WAIT_UNSPEC; rc = fi_eq_open(module->fabric, &eq_attr, &module->av_eq, NULL); if (rc != OPAL_SUCCESS) { goto destroy; } + // Save the size of the created EQ + module->av_eq_size = eq_attr.size; + eq_attr.wait_obj = FI_WAIT_FD; rc = fi_eq_open(module->fabric, &eq_attr, &module->dom_eq, NULL); if (rc != OPAL_SUCCESS) { diff --git a/opal/mca/btl/usnic/btl_usnic_module.h b/opal/mca/btl/usnic/btl_usnic_module.h index 4f23eeac447..b4f5d0c7390 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.h +++ b/opal/mca/btl/usnic/btl_usnic_module.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,6 +110,8 @@ typedef struct opal_btl_usnic_module_t { struct fid_eq *av_eq; struct fid_av *av; + size_t av_eq_size; + mca_btl_base_module_error_cb_fn_t pml_error_callback; /* Information about the events */ @@ -127,6 +129,7 @@ typedef struct opal_btl_usnic_module_t { int sd_num; int rd_num; int cq_num; + int av_eq_num; int prio_sd_num; int prio_rd_num; diff --git a/opal/mca/btl/usnic/btl_usnic_send.h b/opal/mca/btl/usnic/btl_usnic_send.h index 796008d2f7d..2020544f205 100644 --- a/opal/mca/btl/usnic/btl_usnic_send.h +++ b/opal/mca/btl/usnic/btl_usnic_send.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,6 +76,8 @@ opal_btl_usnic_post_segment( sseg->ss_len); #endif + assert(channel_id == USNIC_DATA_CHANNEL); + /* Send the segment */ ret = fi_send(channel->ep, sseg->ss_ptr, @@ -126,6 +128,8 @@ opal_btl_usnic_post_ack( sseg->ss_len); #endif + assert(channel_id == USNIC_PRIORITY_CHANNEL); + ret = fi_send(channel->ep, sseg->ss_ptr, sseg->ss_len + mca_btl_usnic_component.prefix_send_offset, diff --git a/opal/mca/btl/usnic/btl_usnic_stats.c b/opal/mca/btl/usnic/btl_usnic_stats.c index 18f24aa7c55..9c3acac868c 100644 --- a/opal/mca/btl/usnic/btl_usnic_stats.c +++ b/opal/mca/btl/usnic/btl_usnic_stats.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,10 +82,12 @@ void opal_btl_usnic_print_stats( char tmp[128], str[2048]; /* The usuals */ - snprintf(str, sizeof(str), "%s:MCW:%3u, ST(P+D)/F/C/R(T+F)/A:%8lu(%8u+%8u)/%8lu/%8lu/%4lu(%4lu+%4lu)/%8lu, RcvTot/Chk/F/C/L/H/D/BF/A:%8lu/%c%c/%8lu/%8lu/%4lu+%2lu/%4lu/%4lu/%6lu OA/DA %4lu/%4lu CRC:%4lu ", + snprintf(str, sizeof(str), "%s:MCW:%3u, %s, ST(P+D)/F/C/R(T+F)/A:%8lu(%8u+%8u)/%8lu/%8lu/%4lu(%4lu+%4lu)/%8lu, RcvTot/Chk/F/C/L/H/D/BF/A:%8lu/%c%c/%8lu/%8lu/%4lu+%2lu/%4lu/%4lu/%6lu OA/DA %4lu/%4lu CRC:%4lu ", prefix, opal_proc_local_get()->proc_name.vpid, + module->fabric_info->fabric_attr->name, + module->stats.num_total_sends, module->mod_channels[USNIC_PRIORITY_CHANNEL].num_channel_sends, module->mod_channels[USNIC_DATA_CHANNEL].num_channel_sends, @@ -143,8 +145,9 @@ void opal_btl_usnic_print_stats( /* Number of un-acked sends (i.e., sends for which we're still waiting for ACK) */ send_unacked = - endpoint->endpoint_next_seq_to_send - - endpoint->endpoint_ack_seq_rcvd - 1; + SEQ_DIFF(endpoint->endpoint_next_seq_to_send, + SEQ_DIFF(endpoint->endpoint_ack_seq_rcvd, 1)); + if (send_unacked > su_max) su_max = send_unacked; if (send_unacked < su_min) su_min = send_unacked; @@ -194,11 +197,6 @@ static void usnic_stats_callback(int fd, short flags, void *arg) opal_btl_usnic_print_stats(module, tmp, /*reset=*/mca_btl_usnic_component.stats_relative); - - /* In OMPI v1.6, we have to re-add this event (because there's an - old libevent in OMPI v1.6) */ - opal_event_add(&(module->stats.timer_event), - &(module->stats.timeout)); } /* diff --git a/opal/mca/btl/usnic/btl_usnic_util.c b/opal/mca/btl/usnic/btl_usnic_util.c index 1039bf00196..9c1db480cd4 100644 --- a/opal/mca/btl/usnic/btl_usnic_util.c +++ b/opal/mca/btl/usnic/btl_usnic_util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,7 +65,7 @@ void opal_btl_usnic_util_abort(const char *msg, const char *file, int line) opal_show_help("help-mpi-btl-usnic.txt", "internal error after init", true, opal_process_info.nodename, - msg, file, line); + file, line, msg); opal_btl_usnic_exit(NULL); /* Never returns */ @@ -115,24 +115,27 @@ opal_btl_usnic_dump_hex(void *vaddr, int len) * using inet_ntop()). */ void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen, - uint32_t addr, uint32_t netmask) + uint32_t addr_be, uint32_t netmask_be) { int prefixlen; + uint32_t netmask = ntohl(netmask_be); + uint32_t addr = ntohl(addr_be); uint8_t *p = (uint8_t*) &addr; + if (netmask != 0) { prefixlen = 33 - ffs(netmask); snprintf(out, maxlen, "%u.%u.%u.%u/%u", - p[0], - p[1], - p[2], p[3], + p[2], + p[1], + p[0], prefixlen); } else { snprintf(out, maxlen, "%u.%u.%u.%u", - p[0], - p[1], + p[3], p[2], - p[3]); + p[1], + p[0]); } } diff --git a/opal/mca/btl/usnic/btl_usnic_util.h b/opal/mca/btl/usnic/btl_usnic_util.h index 4fd08f93b93..389deafd652 100644 --- a/opal/mca/btl/usnic/btl_usnic_util.h +++ b/opal/mca/btl/usnic/btl_usnic_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -113,7 +113,7 @@ void opal_btl_usnic_util_abort(const char *msg, const char *file, int line); * expected to be in network byte order. */ void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen, - uint32_t addr, uint32_t netmask); + uint32_t addr_be, uint32_t netmask_be); void opal_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen); diff --git a/opal/mca/btl/usnic/help-mpi-btl-usnic.txt b/opal/mca/btl/usnic/help-mpi-btl-usnic.txt index d6efab02681..055b0954b77 100644 --- a/opal/mca/btl/usnic/help-mpi-btl-usnic.txt +++ b/opal/mca/btl/usnic/help-mpi-btl-usnic.txt @@ -1,6 +1,6 @@ # -*- text -*- # -# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. # # $COPYRIGHT$ # @@ -77,10 +77,9 @@ something wrong with the usNIC or OpenFabrics configuration on this server. Server: %s - Message: %s File: %s Line: %d - Error: %s + Message: %s # [check_reg_mem_basics fail] The usNIC BTL failed to initialize while trying to register some @@ -241,6 +240,19 @@ abort. usNIC interface: %s Current ARP timeout: %d (btl_usnic_arp_timeout MCA param) # +[fi_av_eq too small] +The usnic BTL was told to create an address resolution queue that was +too small via the mca_btl_usnic_av_eq_num MCA parameter. This +parameter controls how many outstanding peer address resolutions can +be outstanding at a time. Larger values allow more concurrent address +resolutions, but consume more memory. + + Server: %s + av_eq_num param value: %d + av_eq_num minimum value: %d + +Your job will likely either perform poorly, or will abort. +# [unreachable peer IP] WARNING: Open MPI failed to find a route to a peer IP address via a specific usNIC interface. This usually indicates a problem in the IP diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index 12ce6351f8b..90eedba4308 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -14,6 +14,8 @@ * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -263,6 +265,14 @@ mca_btl_base_descriptor_t* mca_btl_vader_alloc (struct mca_btl_base_module_t* bt struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, uint32_t flags); +/** + * Return a segment allocated by this BTL. + * + * @param btl (IN) BTL module + * @param segment (IN) Allocated segment. + */ +int mca_btl_vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des); + END_C_DECLS diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 2f46785ff4e..586ec0da21c 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -297,10 +297,13 @@ static int mca_btl_vader_component_close(void) OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); - if (NULL != mca_btl_vader_component.my_segment) { + if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism && + NULL != mca_btl_vader_component.my_segment) { munmap (mca_btl_vader_component.my_segment, mca_btl_vader_component.segment_size); } + mca_btl_vader_component.my_segment = NULL; + #if OPAL_BTL_VADER_HAVE_KNEM mca_btl_vader_knem_fini (); #endif diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index 0f3dfddf5c7..59ef0ecb903 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -43,8 +43,6 @@ static int vader_register_error_cb (struct mca_btl_base_module_t* btl, static int vader_finalize (struct mca_btl_base_module_t* btl); -static int vader_free (struct mca_btl_base_module_t* btl, mca_btl_base_descriptor_t* des); - static struct mca_btl_base_descriptor_t *vader_prepare_src ( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -69,7 +67,7 @@ mca_btl_vader_t mca_btl_vader = { .btl_del_procs = vader_del_procs, .btl_finalize = vader_finalize, .btl_alloc = mca_btl_vader_alloc, - .btl_free = vader_free, + .btl_free = mca_btl_vader_free, .btl_prepare_src = vader_prepare_src, .btl_send = mca_btl_vader_send, .btl_sendi = mca_btl_vader_sendi, @@ -411,7 +409,7 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl * @param btl (IN) BTL module * @param segment (IN) Allocated segment. */ -static int vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des) +int mca_btl_vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des) { MCA_BTL_VADER_FRAG_RETURN((mca_btl_vader_frag_t *) des); diff --git a/opal/mca/btl/vader/btl_vader_sendi.c b/opal/mca/btl/vader/btl_vader_sendi.c index be9768d53c0..4b48560fb20 100644 --- a/opal/mca/btl/vader/btl_vader_sendi.c +++ b/opal/mca/btl/vader/btl_vader_sendi.c @@ -14,6 +14,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,6 +107,8 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl, if (!vader_fifo_write_ep (frag->hdr, endpoint)) { if (descriptor) { *descriptor = &frag->base; + } else { + mca_btl_vader_free (btl, &frag->base); } return OPAL_ERR_OUT_OF_RESOURCE; } diff --git a/opal/mca/btl/vader/configure.m4 b/opal/mca/btl/vader/configure.m4 index bd6fa606632..6d9e2650663 100644 --- a/opal/mca/btl/vader/configure.m4 +++ b/opal/mca/btl/vader/configure.m4 @@ -6,6 +6,8 @@ # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -33,18 +35,18 @@ AC_DEFUN([OPAL_CHECK_XPMEM], [ opal_check_xpmem_happy="no" if test ! "$with_xpmem" = "no" ; then - if test ! -z "$with_xpmem" -a "$with_xpmem" != "yes" ; then + if test ! -z "$with_xpmem" && test "$with_xpmem" != "yes" ; then opal_check_xpmem_dir="$with_xpmem" fi - if test ! -z "$with_xpmem_libdir" -a "$with_xpmem_libdir" != "yes" ; then + if test ! -z "$with_xpmem_libdir" && test "$with_xpmem_libdir" != "yes" ; then opal_check_xpmem_libdir="$with_xpmem_libdir" fi OPAL_CHECK_PACKAGE([$1],[xpmem.h],[xpmem],[xpmem_make],[], [$opal_check_xpmem_dir],[$opal_check_xpmem_libdir], [opal_check_xpmem_happy="yes"], []) - if test "$opal_check_xpmem_happy" = "no" -a -n "$with_xpmem" -a "$with_xpmem" != "yes" ; then + if test "$opal_check_xpmem_happy" = "no" && test -n "$with_xpmem" && test "$with_xpmem" != "yes" ; then AC_MSG_ERROR([XPMEM support requested but not found. Aborting]) fi fi diff --git a/opal/mca/common/cuda/Makefile.am b/opal/mca/common/cuda/Makefile.am index 76b830ac6a7..38b0434c1d5 100644 --- a/opal/mca/common/cuda/Makefile.am +++ b/opal/mca/common/cuda/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -82,7 +82,7 @@ endif lib@OPAL_LIB_PREFIX@mca_common_cuda_la_SOURCES = $(headers) $(sources) lib@OPAL_LIB_PREFIX@mca_common_cuda_la_LDFLAGS = \ - -version-info $(libmca_common_cuda_so_version) + -version-info $(libmca_opal_common_cuda_so_version) lib@OPAL_LIB_PREFIX@mca_common_cuda_la_LIBADD = $(common_cuda_LIBS) lib@OPAL_LIB_PREFIX@mca_common_cuda_noinst_la_SOURCES = $(headers) $(sources) diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index bf966747a50..0afe0dd94a2 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -10,7 +10,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,9 +22,7 @@ /** * This file contains various support functions for doing CUDA - * operations. Some of the features are only available in CUDA 4.1 - * and later, so some code is conditionalized around the - * OPAL_CUDA_SUPPORT_41 macro. + * operations. */ #include "opal_config.h" @@ -36,6 +36,7 @@ #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/util/proc.h" +#include "opal/util/argv.h" #include "opal/mca/mpool/base/base.h" #include "opal/runtime/opal_params.h" @@ -88,13 +89,11 @@ struct cudaFunctionTable { int (*cuEventDestroy)(CUevent); int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int); int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr); -#if OPAL_CUDA_SUPPORT_41 int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent); int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle); int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int); int (*cuIpcCloseMemHandle)(CUdeviceptr); int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr); -#endif /* OPAL_CUDA_SUPPORT_41 */ int (*cuCtxGetDevice)(CUdevice *); int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice); int (*cuDeviceGet)(CUdevice *, int); @@ -156,7 +155,6 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t, NULL, NULL); -#if OPAL_CUDA_SUPPORT_41 static int mca_common_cuda_async = 1; static int mca_common_cuda_cumemcpy_async; #if OPAL_ENABLE_DEBUG @@ -223,8 +221,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ; #define CUDA_DUMP_EVTHANDLE(a) #endif /* OPAL_ENABLE_DEBUG */ -#endif /* OPAL_CUDA_SUPPORT_41 */ - /* This is a seperate function so we can see these variables with ompi_info and * also set them with the tools interface */ void mca_common_cuda_register_mca_variables(void) @@ -263,7 +259,6 @@ void mca_common_cuda_register_mca_variables(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_common_cuda_warning); -#if OPAL_CUDA_SUPPORT_41 /* Use this flag to test async vs sync copies */ mca_common_cuda_async = 1; (void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async", @@ -280,7 +275,6 @@ void mca_common_cuda_register_mca_variables(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cuda_event_max); -#endif /* OPAL_CUDA_SUPPORT_41 */ /* Use this flag to test cuMemcpyAsync vs cuMemcpy */ mca_common_cuda_cumemcpy_async = 1; @@ -465,13 +459,11 @@ int mca_common_cuda_stage_one_init(void) OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree); OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc); OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange); -#if OPAL_CUDA_SUPPORT_41 OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle); -#endif /* OPAL_CUDA_SUPPORT_41 */ OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet); @@ -595,7 +587,6 @@ static int mca_common_cuda_stage_three_init(void) return OPAL_ERROR; } -#if OPAL_CUDA_SUPPORT_41 if (true == mca_common_cuda_enabled) { /* Set up an array to store outstanding IPC async copy events */ cuda_event_ipc_num_used = 0; @@ -633,7 +624,6 @@ static int mca_common_cuda_stage_three_init(void) } } -#endif /* OPAL_CUDA_SUPPORT_41 */ if (true == mca_common_cuda_enabled) { /* Set up an array to store outstanding async dtoh events. Used on the * sending side for asynchronous copies. */ @@ -1006,7 +996,6 @@ void mca_common_cuda_unregister(void *ptr, char *msg) { } } -#if OPAL_CUDA_SUPPORT_41 /* * Get the memory handle of a local section of memory that can be sent * to the remote size so it can access the memory. This is the @@ -1739,8 +1728,6 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) { } #endif /* OPAL_ENABLE_DEBUG */ -#endif /* OPAL_CUDA_SUPPORT_41 */ - /* Routines that get plugged into the opal datatype code */ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor) { diff --git a/opal/mca/common/sm/Makefile.am b/opal/mca/common/sm/Makefile.am index bb270f5a788..46e8b258bb5 100644 --- a/opal/mca/common/sm/Makefile.am +++ b/opal/mca/common/sm/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010-2013 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ @@ -84,7 +84,7 @@ endif lib@OPAL_LIB_PREFIX@mca_common_sm_la_SOURCES = \ $(headers) $(sources) lib@OPAL_LIB_PREFIX@mca_common_sm_la_LDFLAGS = \ - -version-info $(libmca_common_sm_so_version) + -version-info $(libmca_opal_common_sm_so_version) lib@OPAL_LIB_PREFIX@mca_common_sm_noinst_la_SOURCES = \ $(headers) $(sources) diff --git a/opal/mca/common/ugni/Makefile.am b/opal/mca/common/ugni/Makefile.am index 0053474b8bb..ac7482c345c 100644 --- a/opal/mca/common/ugni/Makefile.am +++ b/opal/mca/common/ugni/Makefile.am @@ -35,7 +35,7 @@ lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(headers) $(ugni_SOURCES) nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES) lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LIBADD = $(common_ugni_LIBS) lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LDFLAGS = \ - -version-info $(libmca_common_ugni_so_version) \ + -version-info $(libmca_opal_common_ugni_so_version) \ $(common_ugni_LDFLAGS) lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \ diff --git a/opal/mca/common/verbs/Makefile.am b/opal/mca/common/verbs/Makefile.am index 43c70a19c12..8cd08eb2ac0 100644 --- a/opal/mca/common/verbs/Makefile.am +++ b/opal/mca/common/verbs/Makefile.am @@ -49,7 +49,7 @@ endif lib@OPAL_LIB_PREFIX@mca_common_verbs_la_SOURCES = $(headers) $(sources) lib@OPAL_LIB_PREFIX@mca_common_verbs_la_CPPFLAGS = $(common_verbs_CPPFLAGS) lib@OPAL_LIB_PREFIX@mca_common_verbs_la_LDFLAGS = \ - -version-info $(libmca_common_verbs_so_version) \ + -version-info $(libmca_opal_common_verbs_so_version) \ $(common_verbs_LDFLAGS) lib@OPAL_LIB_PREFIX@mca_common_verbs_la_LIBADD = $(common_verbs_LIBS) lib@OPAL_LIB_PREFIX@mca_common_verbs_noinst_la_SOURCES = $(headers) $(sources) diff --git a/opal/mca/common/verbs/common_verbs_basics.c b/opal/mca/common/verbs/common_verbs_basics.c index 4c1a0438ae2..bd23f08ea3d 100644 --- a/opal/mca/common/verbs/common_verbs_basics.c +++ b/opal/mca/common/verbs/common_verbs_basics.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -21,7 +21,9 @@ #include #endif +#if OPAL_COMMON_VERBS_USNIC_HAPPY #include "opal/mca/common/verbs_usnic/common_verbs_usnic.h" +#endif /* This is crummy, but doesn't work on all platforms with all compilers. Specifically, trying to include it @@ -91,12 +93,14 @@ int opal_common_verbs_fork_test(void) } #endif +#if OPAL_COMMON_VERBS_USNIC_HAPPY /* Now register any necessary fake libibverbs drivers. We piggyback loading these fake drivers on the fork test because they must be loaded before ibv_get_device_list() is invoked. Note that this routine is in a different common component (see comments over there for an explanation why). */ opal_common_verbs_usnic_register_fake_drivers(); +#endif return ret; } diff --git a/opal/mca/common/verbs_usnic/configure.m4 b/opal/mca/common/verbs_usnic/configure.m4 index 17d076c62dd..68fed9404b8 100644 --- a/opal/mca/common/verbs_usnic/configure.m4 +++ b/opal/mca/common/verbs_usnic/configure.m4 @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. # Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. # $COPYRIGHT$ @@ -20,6 +20,22 @@ # $HEADER$ # +# +# This component is a workaround to a bug in libibverbs that prints a +# dire warning that usNIC devices are not supported (of course not -- +# usNIC devices provide functionality through libfabric, not +# libibverbs). This component was written before a better workaround +# was created: a "no op" libibverbs plugin for usNIC devices +# (https://github.com/cisco/libusnic_verbs, and is also available in +# binary form on cisco.com). +# +# Hence, this component no longer builds by default. It's still +# available if a user specifically asks for it (e.g., if they do not +# want to install the "no op" libibverbs plugin), but it's not the +# default. This component also has the side-effect of making +# libopen-pal.so depend on libibverbs.so, which can be annoying for +# packagers (which is another reason it isn't built by default any +# more). # # This component must be linked statically into libopen-pal because it # registers a provider for libibverbs at run time, and there's no @@ -38,12 +54,26 @@ AC_DEFUN([MCA_opal_common_verbs_usnic_COMPILE_MODE], [ # ------------------------------------------------ AC_DEFUN([MCA_opal_common_verbs_usnic_CONFIG],[ AC_CONFIG_FILES([opal/mca/common/verbs_usnic/Makefile]) - common_verbs_usnic_happy="no" + common_verbs_usnic_happy=0 + + AC_ARG_WITH(verbs-usnic, + AC_HELP_STRING([--with-verbs-usnic], + [Add support in Open MPI to defeat a seemingly dire warning message from libibverbs that Cisco usNIC devices are not supported. This support is not compiled by default because you can also avoid this libibverbs bug by installing the libibverbs_usnic "no no" plugin, available from https://github.com/cisco/libusnic_verbs or in binary form from cisco.com])) + + AS_IF([test "$with_verbs_usnic" = "yes"], + [common_verbs_usnic_happy=1]) + + AS_IF([test $common_verbs_usnic_happy -eq 1], + [OPAL_CHECK_OPENFABRICS([common_verbs_usnic], + [common_verbs_usnic_happy=1], + [common_verbs_usnic_happy=0]) + ]) - OPAL_CHECK_OPENFABRICS([common_verbs_usnic], - [common_verbs_usnic_happy="yes"]) + AC_DEFINE_UNQUOTED([OPAL_COMMON_VERBS_USNIC_HAPPY], + [$common_verbs_usnic_happy], + [Whether the common/usnic_verbs component is being built or not]) - AS_IF([test "$common_verbs_usnic_happy" = "yes"], + AS_IF([test $common_verbs_usnic_happy -eq 1], [$1], [$2]) diff --git a/opal/mca/crs/blcr/configure.m4 b/opal/mca/crs/blcr/configure.m4 index f252d33e434..3aea23106bc 100644 --- a/opal/mca/crs/blcr/configure.m4 +++ b/opal/mca/crs/blcr/configure.m4 @@ -10,6 +10,8 @@ # All rights reserved. # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -40,14 +42,14 @@ AC_DEFUN([MCA_opal_crs_blcr_CONFIG],[ AS_IF([test "$opal_want_ft_cr" = "0"], [$2 check_crs_blcr_good="no" - AS_IF([test ! -z "$with_blcr" -a "$with_blcr" != "no"], + AS_IF([test ! -z "$with_blcr" && test "$with_blcr" != "no"], [AC_MSG_WARN([BLCR support requested, but FT support not requested. You need to specify the --with-ft=cr configure option.]) AC_MSG_ERROR([Aborting.])]) ], [check_crs_blcr_good="yes"]) # If we do not want BLCR, then do not compile it - AS_IF([test "$with_blcr" = "no" -o "$check_crs_blcr_good" = "no"], + AS_IF([test "$with_blcr" = "no" || test "$check_crs_blcr_good" = "no"], [$2 check_crs_blcr_good="no"], [check_crs_blcr_good="yes"]) @@ -60,10 +62,10 @@ AC_DEFUN([MCA_opal_crs_blcr_CONFIG],[ # Determine the search paths for the headers and libraries AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [AS_IF([test ! -z "$with_blcr" -a "$with_blcr" != "yes"], + [AS_IF([test ! -z "$with_blcr" && test "$with_blcr" != "yes"], [check_crs_blcr_dir="$with_blcr" check_crs_blcr_dir_msg="$with_blcr (from --with-blcr)"]) - AS_IF([test ! -z "$with_blcr_libdir" -a "$with_blcr_libdir" != "yes"], + AS_IF([test ! -z "$with_blcr_libdir" && test "$with_blcr_libdir" != "yes"], [check_crs_blcr_libdir="$with_blcr_libdir" check_crs_blcr_libdir_msg="$with_blcr_libdir (from --with-blcr-libdir)"]) ]) @@ -174,7 +176,7 @@ AC_DEFUN([MCA_opal_crs_blcr_CONFIG],[ # # Require either a working cr_request_file() or cr_request_checkpoint() function # - AS_IF([test "$crs_blcr_have_working_cr_request" = "0" -a "$crs_blcr_have_cr_request_checkpoint" = "0"], + AS_IF([test "$crs_blcr_have_working_cr_request" = "0" && test "$crs_blcr_have_cr_request_checkpoint" = "0"], [$2 check_crs_blcr_good="no" AC_MSG_WARN([The BLCR CRS component requires either the cr_request_checkpoint() or cr_request_file() functions])]) @@ -194,7 +196,7 @@ AC_DEFUN([MCA_opal_crs_blcr_CONFIG],[ AC_SUBST([crs_blcr_LDFLAGS]) AC_SUBST([crs_blcr_LIBS]) $1], - [AS_IF([test ! -z "$with_blcr" -a "$with_blcr" != "no"], + [AS_IF([test ! -z "$with_blcr" && test "$with_blcr" != "no"], [AC_MSG_WARN([BLCR support requested but not found. Perhaps you need to specify the location of the BLCR libraries.]) AC_MSG_ERROR([Aborting.])]) $3]) diff --git a/opal/mca/crs/criu/configure.m4 b/opal/mca/crs/criu/configure.m4 index d1f28bbdc18..94ea29d2248 100644 --- a/opal/mca/crs/criu/configure.m4 +++ b/opal/mca/crs/criu/configure.m4 @@ -11,6 +11,8 @@ # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2014 Hochschule Esslingen. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ # @@ -47,10 +49,10 @@ AC_DEFUN([MCA_opal_crs_criu_CONFIG],[ # Determine the search paths for the headers and libraries AS_IF([test $check_crs_criu_good = yes], - [AS_IF([test ! -z "$with_criu" -a "$with_criu" != "yes"], + [AS_IF([test ! -z "$with_criu" && test "$with_criu" != "yes"], [check_crs_criu_dir="$with_criu" check_crs_criu_dir_msg="$with_criu (from --with-criu)"]) - AS_IF([test ! -z "$with_criu_libdir" -a "$with_criu_libdir" != "yes"], + AS_IF([test ! -z "$with_criu_libdir" && test "$with_criu_libdir" != "yes"], [check_crs_criu_libdir="$with_criu_libdir" check_crs_criu_libdir_msg="$with_criu_libdir (from --with-criu-libdir)"]) ]) diff --git a/opal/mca/crs/dmtcp/configure.m4 b/opal/mca/crs/dmtcp/configure.m4 index 07f5c54da3a..af61f228a00 100644 --- a/opal/mca/crs/dmtcp/configure.m4 +++ b/opal/mca/crs/dmtcp/configure.m4 @@ -1,8 +1,10 @@ # -*- shell-script -*- # -# Copyright (c) 2010 The Trustees of Indiana University. +# Copyright (c) 2010 The Trustees of Indiana University. # All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -47,7 +49,7 @@ AC_DEFUN([MCA_opal_crs_dmtcp_CONFIG],[ # Check if the user explicitly requested -not- to build the DMTCP component # If so, the we do not compile this component # - AS_IF([test "$with_dmtcp" = "no" -o "$opal_check_crs_dmtcp_good" = "no"], + AS_IF([test "$with_dmtcp" = "no" || test "$opal_check_crs_dmtcp_good" = "no"], [opal_check_crs_dmtcp_good="no"], [opal_check_crs_dmtcp_good="yes"]) @@ -66,10 +68,10 @@ AC_DEFUN([MCA_opal_crs_dmtcp_CONFIG],[ # Determine the search paths for the headers and libraries AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [AS_IF([test ! -z "$with_dmtcp" -a "$with_dmtcp" != "yes"], + [AS_IF([test ! -z "$with_dmtcp" && test "$with_dmtcp" != "yes"], [opal_check_crs_dmtcp_dir="$with_dmtcp" opal_check_crs_dmtcp_dir_msg="$with_dmtcp (from --with-dmtcp)"]) - AS_IF([test ! -z "$with_dmtcp_libdir" -a "$with_dmtcp_libdir" != "yes"], + AS_IF([test ! -z "$with_dmtcp_libdir" && test "$with_dmtcp_libdir" != "yes"], [opal_check_crs_dmtcp_libdir="$with_dmtcp_libdir" opal_check_crs_dmtcp_libdir_msg="$with_dmtcp_libdir (from --with-dmtcp-libdir)"]) ]) @@ -97,7 +99,7 @@ AC_DEFUN([MCA_opal_crs_dmtcp_CONFIG],[ AC_CHECK_PROG([mtcp_restart_command_exists], ["mtcp_restart"], ["yes"], ["no"]) AS_IF([test "$mtcp_restart_command_exists" = "no"], [opal_check_crs_dmtcp_good="no" - AS_IF([test ! -z "$with_dmtcp" -a "$with_dmtcp" != "no"], + AS_IF([test ! -z "$with_dmtcp" && test "$with_dmtcp" != "no"], [AC_MSG_WARN([mtcp_restart not found in PATH.]) AC_MSG_ERROR([Aborting.])])]) @@ -130,7 +132,7 @@ AC_DEFUN([MCA_opal_crs_dmtcp_CONFIG],[ AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], [$1], - [AS_IF([test ! -z "$with_dmtcp" -a "$with_dmtcp" != "no"], + [AS_IF([test ! -z "$with_dmtcp" && test "$with_dmtcp" != "no"], [AC_MSG_WARN([DMTCP support requested but not found. Perhaps you need to specify the location of the DMTCP libraries.]) AC_MSG_ERROR([Aborting.])]) $2]) diff --git a/opal/mca/event/external/configure.m4 b/opal/mca/event/external/configure.m4 index 543ded8f0c3..cc789e3726c 100644 --- a/opal/mca/event/external/configure.m4 +++ b/opal/mca/event/external/configure.m4 @@ -2,6 +2,8 @@ # # Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ # @@ -71,7 +73,7 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ # Make sure the user didn't specify --with-libevent=internal and # --with-libevent-libdir=whatever (because you can only specify # --with-libevent-libdir when external libevent is being used). - AS_IF([test "$with_libevent" = "internal" -a "$with_libevent_libdir" != ""], + AS_IF([test "$with_libevent" = "internal" && test -n "$with_libevent_libdir"], [AC_MSG_WARN([Both --with-libevent=internal and --with-libevent-libdir=DIR]) AC_MSG_WARN([were specified, which does not make sense.]) AC_MSG_ERROR([Cannot continue])]) @@ -80,8 +82,8 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ # but hopefully slightly more clear...) opal_event_external_want=no AS_IF([test "$with_libevent" = "external"], [opal_event_external_want=yes]) - AS_IF([test "$with_libevent_libdir" != ""], [opal_event_external_want=yes]) - AS_IF([test "$with_libevent" != "" -a "$with_libevent" != "no" -a "$with_libevent" != "internal"], [opal_event_external_want=yes]) + AS_IF([test -n "$with_libevent_libdir"], [opal_event_external_want=yes]) + AS_IF([test -n "$with_libevent" && test "$with_libevent" != "no" && test "$with_libevent" != "internal"], [opal_event_external_want=yes]) # If we want external support, try it AS_IF([test "$opal_event_external_want" = "yes"], @@ -90,14 +92,14 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ [libevent.*]) AC_MSG_CHECKING([for external libevent in]) - AS_IF([test "$with_libevent" != "external" -a "$with_libevent" != "yes"], + AS_IF([test "$with_libevent" != "external" && test "$with_libevent" != "yes"], [opal_event_dir=$with_libevent AC_MSG_RESULT([$opal_event_dir]) OPAL_CHECK_WITHDIR([libevent], [$with_libdir], [include/event.h]) ], [AC_MSG_RESULT([(default search paths)])]) - AS_IF([test ! -z "$with_libevent_libdir" -a "$with_libevent_libdir" != "yes"], + AS_IF([test ! -z "$with_libevent_libdir" && test "$with_libevent_libdir" != "yes"], [opal_event_libdir="$with_libevent_libdir"]) opal_event_external_CPPFLAGS_save=$CPPFLAGS diff --git a/opal/mca/event/external/external.h b/opal/mca/event/external/external.h index 792ad2e794c..1cdfbe688bb 100644 --- a/opal/mca/event/external/external.h +++ b/opal/mca/event/external/external.h @@ -2,6 +2,8 @@ * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -17,6 +19,8 @@ #ifndef MCA_OPAL_EVENT_EXTERNAL_H #define MCA_OPAL_EVENT_EXTERNAL_H +#include "opal_config.h" + #include "event.h" #include "event2/event.h" #include "event2/thread.h" diff --git a/opal/mca/event/libevent2022/configure.m4 b/opal/mca/event/libevent2022/configure.m4 index 25e06544b3b..77460c40749 100644 --- a/opal/mca/event/libevent2022/configure.m4 +++ b/opal/mca/event/libevent2022/configure.m4 @@ -3,6 +3,8 @@ # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ # @@ -90,7 +92,7 @@ AC_DEFUN([MCA_opal_event_libevent2022_CONFIG],[ libevent_basedir="opal/mca/event/libevent2022" # If we're not building externally, configure this component - AS_IF([test "$with_libevent" = "internal" -o "$with_libevent" = "" -o "$with_libevent" = "yes"], + AS_IF([test "$with_libevent" = "internal" || test -z "$with_libevent" || test "$with_libevent" = "yes"], [MCA_opal_event_libevent2022_DO_THE_CONFIG], [AC_MSG_WARN([using an external libevent; disqualifiying this component]) $2]) @@ -161,6 +163,7 @@ AC_DEFUN([MCA_opal_event_libevent2022_DO_THE_CONFIG], [ AC_HELP_STRING([--enable-event-debug], [enable event library debug output])) if test "$enable_event_debug" = "yes"; then event_args="$event_args --enable-debug-mode" + CFLAGS="-DUSE_DEBUG $CFLAGS" fi AC_MSG_RESULT([$event_args]) @@ -186,7 +189,7 @@ AC_DEFUN([MCA_opal_event_libevent2022_DO_THE_CONFIG], [ # libevent/include/event2/event-config.h!). Otherwise, set it to # 0. libevent_file=$libevent_basedir/libevent/config.h - AS_IF([test "$libevent_happy" = "yes" -a -r $libevent_file], + AS_IF([test "$libevent_happy" = "yes" && test -r $libevent_file], [OPAL_HAVE_WORKING_EVENTOPS=`grep HAVE_WORKING_EVENTOPS $libevent_file | awk '{print [$]3 }'` $1], [$2 diff --git a/opal/mca/event/libevent2022/libevent/event.c b/opal/mca/event/libevent2022/libevent/event.c index dfee932868a..b9f47c42a08 100644 --- a/opal/mca/event/libevent2022/libevent/event.c +++ b/opal/mca/event/libevent2022/libevent/event.c @@ -1566,8 +1566,11 @@ event_base_loop(struct event_base *base, int flags) EVBASE_ACQUIRE_LOCK(base, th_base_lock); if (base->running_loop) { - event_warnx("%s: reentrant invocation. Only one event_base_loop" - " can run on each event_base at once.", __func__); +/***** OMPI change ****/ +#if OPAL_ENABLE_DEBUG + event_warnx("%s: reentrant invocation. Only one event_base_loop" + " can run on each event_base at once.", __func__); +#endif EVBASE_RELEASE_LOCK(base, th_base_lock); return -1; } diff --git a/opal/mca/event/libevent2022/libevent/opal_rename.h b/opal/mca/event/libevent2022/libevent/opal_rename.h index be9b64ba248..9a104d091ef 100644 --- a/opal/mca/event/libevent2022/libevent/opal_rename.h +++ b/opal/mca/event/libevent2022/libevent/opal_rename.h @@ -21,367 +21,520 @@ extern "C" { #endif -/* buffer.c */ -#define _evbuffer_chain_pin opal_libevent2022_evbuffer_chain_pin -#define _evbuffer_chain_unpin opal_libevent2022_evbuffer_chain_unpin -#define _evbuffer_decref_and_unlock opal_libevent2022_evbuffer_decref_and_unlock -#define _evbuffer_expand_fast opal_libevent2022_evbuffer_expand_fast -#define _evbuffer_incref opal_libevent2022_evbuffer_incref -#define _evbuffer_incref_and_lock opal_libevent2022_evbuffer_incref_and_lock -#define _evbuffer_read_setup_vecs opal_libevent2022_evbuffer_read_setup_vecs -#define _evbuffer_testing_use_linear_file_access opal_libevent2022_evbuffer_testing_use_linear_file_access -#define _evbuffer_testing_use_mmap opal_libevent2022_evbuffer_testing_use_mmap -#define _evbuffer_testing_use_sendfile opal_libevent2022_evbuffer_testing_use_sendfile -#define evbuffer_add opal_libevent2022_evbuffer_add -#define evbuffer_add_buffer opal_libevent2022_evbuffer_add_buffer -#define evbuffer_add_cb opal_libevent2022_evbuffer_add_cb -#define evbuffer_add_file opal_libevent2022_evbuffer_add_file -#define evbuffer_add_printf opal_libevent2022_evbuffer_add_printf -#define evbuffer_add_reference opal_libevent2022_evbuffer_add_reference -#define evbuffer_add_vprintf opal_libevent2022_evbuffer_add_vprintf -#define evbuffer_cb_clear_flags opal_libevent2022_evbuffer_cb_clear_flags -#define evbuffer_cb_set_flags opal_libevent2022_evbuffer_cb_set_flags -#define evbuffer_clear_flags opal_libevent2022_evbuffer_clear_flags -#define evbuffer_commit_space opal_libevent2022_evbuffer_commit_space -#define evbuffer_copyout opal_libevent2022_evbuffer_copyout -#define evbuffer_defer_callbacks opal_libevent2022_evbuffer_defer_callbacks -#define evbuffer_drain opal_libevent2022_evbuffer_drain -#define evbuffer_enable_locking opal_libevent2022_evbuffer_enable_locking -#define evbuffer_expand opal_libevent2022_evbuffer_expand -#define evbuffer_find opal_libevent2022_evbuffer_find -#define evbuffer_free opal_libevent2022_evbuffer_free -#define evbuffer_freeze opal_libevent2022_evbuffer_freeze -#define evbuffer_get_contiguous_space opal_libevent2022_evbuffer_get_contiguous_space -#define evbuffer_get_length opal_libevent2022_evbuffer_get_length -#define evbuffer_invoke_callbacks opal_libevent2022_evbuffer_invoke_callbacks -#define evbuffer_lock opal_libevent2022_evbuffer_lock -#define evbuffer_new opal_libevent2022_evbuffer_new -#define evbuffer_peek opal_libevent2022_evbuffer_peek -#define evbuffer_prepend opal_libevent2022_evbuffer_prepend -#define evbuffer_prepend_buffer opal_libevent2022_evbuffer_prepend_buffer -#define evbuffer_ptr_set opal_libevent2022_evbuffer_ptr_set -#define evbuffer_pullup opal_libevent2022_evbuffer_pullup -#define evbuffer_read opal_libevent2022_evbuffer_read -#define evbuffer_readline opal_libevent2022_evbuffer_readline -#define evbuffer_readln opal_libevent2022_evbuffer_readln -#define evbuffer_remove opal_libevent2022_evbuffer_remove -#define evbuffer_remove_buffer opal_libevent2022_evbuffer_remove_buffer -#define evbuffer_remove_cb opal_libevent2022_evbuffer_remove_cb -#define evbuffer_remove_cb_entry opal_libevent2022_evbuffer_remove_cb_entry -#define evbuffer_reserve_space opal_libevent2022_evbuffer_reserve_space -#define evbuffer_search opal_libevent2022_evbuffer_search -#define evbuffer_search_eol opal_libevent2022_evbuffer_search_eol -#define evbuffer_search_range opal_libevent2022_evbuffer_search_range -#define evbuffer_set_flags opal_libevent2022_evbuffer_set_flags -#define evbuffer_set_parent opal_libevent2022_evbuffer_set_parent -#define evbuffer_setcb opal_libevent2022_evbuffer_setcb -#define evbuffer_unfreeze opal_libevent2022_evbuffer_unfreeze -#define evbuffer_unlock opal_libevent2022_evbuffer_unlock -#define evbuffer_write opal_libevent2022_evbuffer_write -#define evbuffer_write_atmost opal_libevent2022_evbuffer_write_atmost -#define _bufferevent_add_event opal_libevent2022__bufferevent_add_event -#define _bufferevent_decref_and_unlock opal_libevent2022__bufferevent_decref_and_unlock -#define _bufferevent_del_generic_timeout_cbs opal_libevent2022__bufferevent_del_generic_timeout_cbs -#define _bufferevent_generic_adj_timeouts opal_libevent2022__bufferevent_generic_adj_timeouts -#define _bufferevent_incref_and_lock opal_libevent2022__bufferevent_incref_and_lock -#define _bufferevent_init_generic_timeout_cbs opal_libevent2022__bufferevent_init_generic_timeout_cbs -#define _bufferevent_run_eventcb opal_libevent2022__bufferevent_run_eventcb -#define _bufferevent_run_readcb opal_libevent2022__bufferevent_run_readcb -#define _bufferevent_run_writecb opal_libevent2022__bufferevent_run_writecb -#define bufferevent_decref opal_libevent2022_bufferevent_decref -#define bufferevent_disable opal_libevent2022_bufferevent_disable -#define bufferevent_disable_hard opal_libevent2022_bufferevent_disable_hard -#define bufferevent_enable opal_libevent2022_bufferevent_enable -#define bufferevent_enable_locking opal_libevent2022_bufferevent_enable_locking -#define bufferevent_flush opal_libevent2022_bufferevent_flush -#define bufferevent_free opal_libevent2022_bufferevent_free -#define bufferevent_get_base opal_libevent2022_bufferevent_get_base -#define bufferevent_get_enabled opal_libevent2022_bufferevent_get_enabled -#define bufferevent_get_input opal_libevent2022_bufferevent_get_input -#define bufferevent_get_output opal_libevent2022_bufferevent_get_output -#define bufferevent_get_underlying opal_libevent2022_bufferevent_get_underlying -#define bufferevent_getfd opal_libevent2022_bufferevent_getfd -#define bufferevent_incref opal_libevent2022_bufferevent_incref -#define bufferevent_init_common opal_libevent2022_bufferevent_init_common -#define bufferevent_lock opal_libevent2022_bufferevent_lock -#define bufferevent_read opal_libevent2022_bufferevent_read -#define bufferevent_read_buffer opal_libevent2022_bufferevent_read_buffer -#define bufferevent_set_timeouts opal_libevent2022_bufferevent_set_timeouts -#define bufferevent_setcb opal_libevent2022_bufferevent_setcb -#define bufferevent_setfd opal_libevent2022_bufferevent_setfd -#define bufferevent_settimeout opal_libevent2022_bufferevent_settimeout -#define bufferevent_setwatermark opal_libevent2022_bufferevent_setwatermark -#define bufferevent_suspend_read opal_libevent2022_bufferevent_suspend_read -#define bufferevent_suspend_write opal_libevent2022_bufferevent_suspend_write -#define bufferevent_unlock opal_libevent2022_bufferevent_unlock -#define bufferevent_unsuspend_read opal_libevent2022_bufferevent_unsuspend_read -#define bufferevent_unsuspend_write opal_libevent2022_bufferevent_unsuspend_write -#define bufferevent_write opal_libevent2022_bufferevent_write -#define bufferevent_write_buffer opal_libevent2022_bufferevent_write_buffer -#define bufferevent_filter_new opal_libevent2022_bufferevent_filter_new -#define bufferevent_get_openssl_error opal_libevent2022_bufferevent_get_openssl_error -#define bufferevent_openssl_filter_new opal_libevent2022_bufferevent_openssl_filter_new -#define bufferevent_openssl_get_ssl opal_libevent2022_bufferevent_openssl_get_ssl -#define bufferevent_openssl_socket_new opal_libevent2022_bufferevent_openssl_socket_new -#define bufferevent_ssl_renegotiate opal_libevent2022_bufferevent_ssl_renegotiate -#define bufferevent_pair_get_partner opal_libevent2022_bufferevent_pair_get_partner -#define bufferevent_pair_new opal_libevent2022_bufferevent_pair_new -#define _bufferevent_decrement_read_buckets opal_libevent2022__bufferevent_decrement_read_buckets -#define _bufferevent_decrement_write_buckets opal_libevent2022__bufferevent_decrement_write_buckets -#define _bufferevent_get_read_max opal_libevent2022__bufferevent_get_read_max -#define _bufferevent_get_write_max opal_libevent2022__bufferevent_get_write_max -#define bufferevent_add_to_rate_limit_group opal_libevent2022_bufferevent_add_to_rate_limit_group -#define bufferevent_decrement_read_limit opal_libevent2022_bufferevent_decrement_read_limit -#define bufferevent_decrement_write_limit opal_libevent2022_bufferevent_decrement_write_limit -#define bufferevent_get_max_to_read opal_libevent2022_bufferevent_get_max_to_read -#define bufferevent_get_max_to_write opal_libevent2022_bufferevent_get_max_to_write -#define bufferevent_get_read_limit opal_libevent2022_bufferevent_get_read_limit -#define bufferevent_get_write_limit opal_libevent2022_bufferevent_get_write_limit -#define bufferevent_rate_limit_group_decrement_read opal_libevent2022_bufferevent_rate_limit_group_decrement_read -#define bufferevent_rate_limit_group_decrement_write opal_libevent2022_bufferevent_rate_limit_group_decrement_write -#define bufferevent_rate_limit_group_free opal_libevent2022_bufferevent_rate_limit_group_free -#define bufferevent_rate_limit_group_get_read_limit opal_libevent2022_bufferevent_rate_limit_group_get_read_limit -#define bufferevent_rate_limit_group_get_totals opal_libevent2022_bufferevent_rate_limit_group_get_totals -#define bufferevent_rate_limit_group_get_write_limit opal_libevent2022_bufferevent_rate_limit_group_get_write_limit -#define bufferevent_rate_limit_group_new opal_libevent2022_bufferevent_rate_limit_group_new -#define bufferevent_rate_limit_group_reset_totals opal_libevent2022_bufferevent_rate_limit_group_reset_totals -#define bufferevent_rate_limit_group_set_cfg opal_libevent2022_bufferevent_rate_limit_group_set_cfg -#define bufferevent_rate_limit_group_set_min_share opal_libevent2022_bufferevent_rate_limit_group_set_min_share -#define bufferevent_remove_from_rate_limit_group opal_libevent2022_bufferevent_remove_from_rate_limit_group -#define bufferevent_remove_from_rate_limit_group_internal opal_libevent2022_bufferevent_remove_from_rate_limit_group_internal -#define bufferevent_set_rate_limit opal_libevent2022_bufferevent_set_rate_limit -#define bufferevent_base_set opal_libevent2022_bufferevent_base_set -#define bufferevent_new opal_libevent2022_bufferevent_new -#define bufferevent_priority_set opal_libevent2022_bufferevent_priority_set -#define bufferevent_socket_connect opal_libevent2022_bufferevent_socket_connect -#define bufferevent_socket_connect_hostname opal_libevent2022_bufferevent_socket_connect_hostname -#define bufferevent_socket_get_dns_error opal_libevent2022_bufferevent_socket_get_dns_error -#define bufferevent_socket_new opal_libevent2022_bufferevent_socket_new - - -/* tokens */ -#define ev_token_bucket_cfg_free opal_libevent2022_ev_token_bucket_cfg_free -#define ev_token_bucket_cfg_new opal_libevent2022_ev_token_bucket_cfg_new -#define ev_token_bucket_get_tick opal_libevent2022_ev_token_bucket_get_tick -#define ev_token_bucket_init opal_libevent2022_ev_token_bucket_init -#define ev_token_bucket_update opal_libevent2022_ev_token_bucket_update - - -/* debug */ -#define _event_debug_map_HT_REP_IS_BAD opal_libevent2022__event_debug_map_HT_REP_IS_BAD -#define event_debug_map_HT_CLEAR opal_libevent2022_event_debug_map_HT_CLEAR -#define event_debug_map_HT_GROW opal_libevent2022_event_debug_map_HT_GROW -#define event_debug_unassign opal_libevent2022_event_debug_unassign -#define _event_debugx opal_libevent2022__event_debugx -#define _event_debug_mode_on opal_libevent2022__event_debug_mode_on - -/* event.c */ -#define event_active opal_libevent2022_event_active -#define event_active_nolock opal_libevent2022_event_active_nolock -#define event_add opal_libevent2022_event_add -#define event_assign opal_libevent2022_event_assign -#define event_base_add_virtual opal_libevent2022_event_base_add_virtual -#define event_base_assert_ok opal_libevent2022_event_base_assert_ok -#define event_base_del_virtual opal_libevent2022_event_base_del_virtual -#define event_base_dispatch opal_libevent2022_event_base_dispatch -#define event_base_dump_events opal_libevent2022_event_base_dump_events -#define event_base_free opal_libevent2022_event_base_free -#define event_base_get_deferred_cb_queue opal_libevent2022_event_base_get_deferred_cb_queue -#define event_base_get_features opal_libevent2022_event_base_get_features -#define event_base_get_method opal_libevent2022_event_base_get_method -#define event_base_gettimeofday_cached opal_libevent2022_event_base_gettimeofday_cached -#define event_base_got_break opal_libevent2022_event_base_got_break -#define event_base_got_exit opal_libevent2022_event_base_got_exit -#define event_base_init_common_timeout opal_libevent2022_event_base_init_common_timeout -#define event_base_loop opal_libevent2022_event_base_loop -#define event_base_loopexit opal_libevent2022_event_base_loopexit -#define event_base_new opal_libevent2022_event_base_new -#define event_base_new_with_config opal_libevent2022_event_base_new_with_config -#define event_base_once opal_libevent2022_event_base_once -#define event_base_priority_init opal_libevent2022_event_base_priority_init -#define event_base_set opal_libevent2022_event_base_set -#define event_base_start_iocp opal_libevent2022_event_base_start_iocp -#define event_base_stop_iocp opal_libevent2022_event_base_stop_iocp -#define event_config_avoid_method opal_libevent2022_event_config_avoid_method -#define event_config_free opal_libevent2022_event_config_free -#define event_config_new opal_libevent2022_event_config_new -#define event_config_require_features opal_libevent2022_event_config_require_features -#define event_config_set_flag opal_libevent2022_event_config_set_flag -#define event_config_set_num_cpus_hint opal_libevent2022_event_config_set_num_cpus_hint -#define event_deferred_cb_cancel opal_libevent2022_event_deferred_cb_cancel -#define event_deferred_cb_init opal_libevent2022_event_deferred_cb_init -#define event_deferred_cb_queue_init opal_libevent2022_event_deferred_cb_queue_init -#define event_deferred_cb_schedule opal_libevent2022_event_deferred_cb_schedule -#define event_del opal_libevent2022_event_del -#define event_dispatch opal_libevent2022_event_dispatch -#define event_enable_debug_mode opal_libevent2022_event_enable_debug_mode -#define event_enable_debug_output opal_libevent2022_event_enable_debug_output -#define event_free opal_libevent2022_event_free -#define event_get_assignment opal_libevent2022_event_get_assignment -#define event_get_base opal_libevent2022_event_get_base -#define event_get_callback opal_libevent2022_event_get_callback -#define event_get_callback_arg opal_libevent2022_event_get_callback_arg -#define event_get_events opal_libevent2022_event_get_events -#define event_get_fd opal_libevent2022_event_get_fd -#define event_get_method opal_libevent2022_event_get_method -#define event_get_struct_event_size opal_libevent2022_event_get_struct_event_size -#define event_get_supported_methods opal_libevent2022_event_get_supported_methods -#define event_get_version opal_libevent2022_event_get_version -#define event_get_version_number opal_libevent2022_event_get_version_number -#define event_init opal_libevent2022_event_init -#define _event_initialized opal_libevent2022__event_initialized -#define event_initialized opal_libevent2022_event_initialized -#define event_loop opal_libevent2022_event_loop -#define event_loopbreak opal_libevent2022_event_loopbreak -#define event_loopexit opal_libevent2022_event_loopexit -#define event_mm_calloc_ opal_libevent2022_event_mm_calloc_ -#define event_mm_free_ opal_libevent2022_event_mm_free_ -#define event_mm_malloc_ opal_libevent2022_event_mm_malloc_ -#define event_mm_realloc_ opal_libevent2022_event_mm_realloc_ -#define event_mm_strdup_ opal_libevent2022_event_mm_strdup_ -#define event_new opal_libevent2022_event_new -#define event_once opal_libevent2022_event_once -#define event_pending opal_libevent2022_event_pending -#define event_priority_init opal_libevent2022_event_priority_init -#define event_priority_set opal_libevent2022_event_priority_set -#define event_reinit opal_libevent2022_event_reinit -#define event_set opal_libevent2022_event_set -#define event_set_mem_functions opal_libevent2022_event_set_mem_functions -#define event_changelist_add opal_libevent2022_event_changelist_add -#define event_changelist_del opal_libevent2022_event_changelist_del -#define event_changelist_freemem opal_libevent2022_event_changelist_freemem -#define event_changelist_init opal_libevent2022_event_changelist_init -#define event_changelist_remove_all opal_libevent2022_event_changelist_remove_all -#define event_err opal_libevent2022_event_err -#define event_errx opal_libevent2022_event_errx -#define event_msgx opal_libevent2022_event_msgx -#define event_set_fatal_callback opal_libevent2022_event_set_fatal_callback -#define event_set_log_callback opal_libevent2022_event_set_log_callback -#define event_sock_err opal_libevent2022_event_sock_err -#define event_sock_warn opal_libevent2022_event_sock_warn -#define event_warn opal_libevent2022_event_warn -#define event_warnx opal_libevent2022_event_warnx - -/* evutil.c*/ -#define EVUTIL_ISALNUM opal_libevent2022_EVUTIL_ISALNUM -#define EVUTIL_ISALPHA opal_libevent2022_EVUTIL_ISALPHA -#define EVUTIL_ISDIGIT opal_libevent2022_EVUTIL_ISDIGIT -#define EVUTIL_ISLOWER opal_libevent2022_EVUTIL_ISLOWER -#define EVUTIL_ISPRINT opal_libevent2022_EVUTIL_ISPRINT -#define EVUTIL_ISSPACE opal_libevent2022_EVUTIL_ISSPACE -#define EVUTIL_ISUPPER opal_libevent2022_EVUTIL_ISUPPER -#define EVUTIL_ISXDIGIT opal_libevent2022_EVUTIL_ISXDIGIT -#define EVUTIL_TOLOWER opal_libevent2022_EVUTIL_TOLOWER -#define EVUTIL_TOUPPER opal_libevent2022_EVUTIL_TOUPPER -#define _evutil_weakrand opal_libevent2022__evutil_weakrand -#define evutil_addrinfo_append opal_libevent2022_evutil_addrinfo_append -#define evutil_adjust_hints_for_addrconfig opal_libevent2022_evutil_adjust_hints_for_addrconfig -#define evutil_ascii_strcasecmp opal_libevent2022_evutil_ascii_strcasecmp -#define evutil_ascii_strncasecmp opal_libevent2022_evutil_ascii_strncasecmp -#define evutil_closesocket opal_libevent2022_evutil_closesocket -#define evutil_ersatz_socketpair opal_libevent2022_evutil_ersatz_socketpair -#define evutil_format_sockaddr_port opal_libevent2022_evutil_format_sockaddr_port -#define evutil_freeaddrinfo opal_libevent2022_evutil_freeaddrinfo -#define evutil_gai_strerror opal_libevent2022_evutil_gai_strerror -#define evutil_getaddrinfo opal_libevent2022_evutil_getaddrinfo -#define evutil_getaddrinfo_async opal_libevent2022_evutil_getaddrinfo_async -#define evutil_getaddrinfo_common opal_libevent2022_evutil_getaddrinfo_common -#define evutil_getenv opal_libevent2022_evutil_getenv -#define evutil_hex_char_to_int opal_libevent2022_evutil_hex_char_to_int -#define evutil_inet_ntop opal_libevent2022_evutil_inet_ntop -#define evutil_inet_pton opal_libevent2022_evutil_inet_pton -#define evutil_make_listen_socket_reuseable opal_libevent2022_evutil_make_listen_socket_reuseable -#define evutil_make_socket_closeonexec opal_libevent2022_evutil_make_socket_closeonexec -#define evutil_make_socket_nonblocking opal_libevent2022_evutil_make_socket_nonblocking -#define evutil_new_addrinfo opal_libevent2022_evutil_new_addrinfo -#define evutil_open_closeonexec opal_libevent2022_evutil_open_closeonexec -#define evutil_parse_sockaddr_port opal_libevent2022_evutil_parse_sockaddr_port -#define evutil_read_file opal_libevent2022_evutil_read_file -#define evutil_secure_rng_get_bytes opal_libevent2022_evutil_secure_rng_get_bytes -#define evutil_secure_rng_global_setup_locks_ opal_libevent2022_evutil_secure_rng_global_setup_locks_ -#define evutil_secure_rng_init opal_libevent2022_evutil_secure_rng_init -#define evutil_set_evdns_getaddrinfo_fn opal_libevent2022_evutil_set_evdns_getaddrinfo_fn -#define evutil_snprintf opal_libevent2022_evutil_snprintf -#define evutil_sockaddr_cmp opal_libevent2022_evutil_sockaddr_cmp -#define evutil_sockaddr_is_loopback opal_libevent2022_evutil_sockaddr_is_loopback -#define evutil_socket_connect opal_libevent2022_evutil_socket_connect -#define evutil_socket_finished_connecting opal_libevent2022_evutil_socket_finished_connecting -#define evutil_socketpair opal_libevent2022_evutil_socketpair -#define evutil_strtoll opal_libevent2022_evutil_strtoll -#define evutil_tv_to_msec opal_libevent2022_evutil_tv_to_msec -#define evutil_vsnprintf opal_libevent2022_evutil_vsnprintf - - -/* threads */ -#define evthread_make_base_notifiable opal_libevent2022_evthread_make_base_notifiable -#define _evthread_debug_get_real_lock opal_libevent2022__evthread_debug_get_real_lock -#define _evthread_is_debug_lock_held opal_libevent2022__evthread_is_debug_lock_held -#define evthread_enable_lock_debuging opal_libevent2022_evthread_enable_lock_debuging -#define evthread_set_condition_callbacks opal_libevent2022_evthread_set_condition_callbacks -#define evthread_set_id_callback opal_libevent2022_evthread_set_id_callback -#define evthread_set_lock_callbacks opal_libevent2022_evthread_set_lock_callbacks -#define evthread_use_pthreads opal_libevent2022_evthread_use_pthreads - - -/* tags */ -#define evtag_consume opal_libevent2022_evtag_consume -#define evtag_decode_int opal_libevent2022_evtag_decode_int -#define evtag_decode_int64 opal_libevent2022_evtag_decode_int64 -#define evtag_decode_tag opal_libevent2022_evtag_decode_tag -#define evtag_encode_int opal_libevent2022_evtag_encode_int -#define evtag_encode_int64 opal_libevent2022_evtag_encode_int64 -#define evtag_encode_tag opal_libevent2022_evtag_encode_tag -#define evtag_init opal_libevent2022_evtag_init -#define evtag_marshal opal_libevent2022_evtag_marshal -#define evtag_marshal_buffer opal_libevent2022_evtag_marshal_buffer -#define evtag_marshal_int opal_libevent2022_evtag_marshal_int -#define evtag_marshal_int64 opal_libevent2022_evtag_marshal_int64 -#define evtag_marshal_string opal_libevent2022_evtag_marshal_string -#define evtag_marshal_timeval opal_libevent2022_evtag_marshal_timeval -#define evtag_payload_length opal_libevent2022_evtag_payload_length -#define evtag_peek opal_libevent2022_evtag_peek -#define evtag_peek_length opal_libevent2022_evtag_peek_length -#define evtag_unmarshal opal_libevent2022_evtag_unmarshal -#define evtag_unmarshal_fixed opal_libevent2022_evtag_unmarshal_fixed -#define evtag_unmarshal_header opal_libevent2022_evtag_unmarshal_header -#define evtag_unmarshal_int opal_libevent2022_evtag_unmarshal_int -#define evtag_unmarshal_int64 opal_libevent2022_evtag_unmarshal_int64 -#define evtag_unmarshal_string opal_libevent2022_evtag_unmarshal_string -#define evtag_unmarshal_timeval opal_libevent2022_evtag_unmarshal_timeval - -/* map */ -#define evmap_check_integrity opal_libevent2022_evmap_check_integrity -#define evmap_io_active opal_libevent2022_evmap_io_active -#define evmap_io_add opal_libevent2022_evmap_io_add -#define evmap_io_clear opal_libevent2022_evmap_io_clear -#define evmap_io_del opal_libevent2022_evmap_io_del -#define evmap_io_get_fdinfo opal_libevent2022_evmap_io_get_fdinfo -#define evmap_io_initmap opal_libevent2022_evmap_io_initmap -#define evmap_signal_active opal_libevent2022_evmap_signal_active -#define evmap_signal_add opal_libevent2022_evmap_signal_add -#define evmap_signal_clear opal_libevent2022_evmap_signal_clear -#define evmap_signal_del opal_libevent2022_evmap_signal_del -#define evmap_signal_initmap opal_libevent2022_evmap_signal_initmap - - -/* connections */ -#define evconnlistener_disable opal_libevent2022_evconnlistener_disable -#define evconnlistener_enable opal_libevent2022_evconnlistener_enable -#define evconnlistener_free opal_libevent2022_evconnlistener_free -#define evconnlistener_get_base opal_libevent2022_evconnlistener_get_base -#define evconnlistener_get_fd opal_libevent2022_evconnlistener_get_fd -#define evconnlistener_new opal_libevent2022_evconnlistener_new -#define evconnlistener_new_bind opal_libevent2022_evconnlistener_new_bind -#define evconnlistener_set_cb opal_libevent2022_evconnlistener_set_cb -#define evconnlistener_set_error_cb opal_libevent2022_evconnlistener_set_error_cb - -/* signal */ -#define _evsig_restore_handler opal_libevent2022__evsig_restore_handler -#define _evsig_set_handler opal_libevent2022__evsig_set_handler -#define evsig_dealloc opal_libevent2022_evsig_dealloc -#define evsig_init opal_libevent2022_evsig_init -#define evsig_process opal_libevent2022_evsig_process -#define evsig_set_base opal_libevent2022_evsig_set_base +#define _bufferevent_add_event opal_libevent2022__bufferevent_add_event +#define bufferevent_add_to_rate_limit_group opal_libevent2022_bufferevent_add_to_rate_limit_group +#define bufferevent_base_set opal_libevent2022_bufferevent_base_set +#define bufferevent_decref opal_libevent2022_bufferevent_decref +#define _bufferevent_decref_and_unlock opal_libevent2022__bufferevent_decref_and_unlock +#define _bufferevent_decrement_read_buckets opal_libevent2022__bufferevent_decrement_read_buckets +#define bufferevent_decrement_read_limit opal_libevent2022_bufferevent_decrement_read_limit +#define _bufferevent_decrement_write_buckets opal_libevent2022__bufferevent_decrement_write_buckets +#define bufferevent_decrement_write_limit opal_libevent2022_bufferevent_decrement_write_limit +#define _bufferevent_del_generic_timeout_cbs opal_libevent2022__bufferevent_del_generic_timeout_cbs +#define bufferevent_disable opal_libevent2022_bufferevent_disable +#define bufferevent_disable_hard opal_libevent2022_bufferevent_disable_hard +#define bufferevent_enable opal_libevent2022_bufferevent_enable +#define bufferevent_enable_locking opal_libevent2022_bufferevent_enable_locking +#define bufferevent_filter_new opal_libevent2022_bufferevent_filter_new +#define bufferevent_flush opal_libevent2022_bufferevent_flush +#define bufferevent_free opal_libevent2022_bufferevent_free +#define _bufferevent_generic_adj_timeouts opal_libevent2022__bufferevent_generic_adj_timeouts +#define bufferevent_get_base opal_libevent2022_bufferevent_get_base +#define bufferevent_get_enabled opal_libevent2022_bufferevent_get_enabled +#define bufferevent_getfd opal_libevent2022_bufferevent_getfd +#define bufferevent_get_input opal_libevent2022_bufferevent_get_input +#define bufferevent_get_max_to_read opal_libevent2022_bufferevent_get_max_to_read +#define bufferevent_get_max_to_write opal_libevent2022_bufferevent_get_max_to_write +#define bufferevent_get_output opal_libevent2022_bufferevent_get_output +#define bufferevent_get_read_limit opal_libevent2022_bufferevent_get_read_limit +#define _bufferevent_get_read_max opal_libevent2022__bufferevent_get_read_max +#define bufferevent_get_underlying opal_libevent2022_bufferevent_get_underlying +#define bufferevent_get_write_limit opal_libevent2022_bufferevent_get_write_limit +#define _bufferevent_get_write_max opal_libevent2022__bufferevent_get_write_max +#define bufferevent_incref opal_libevent2022_bufferevent_incref +#define _bufferevent_incref_and_lock opal_libevent2022__bufferevent_incref_and_lock +#define bufferevent_init_common opal_libevent2022_bufferevent_init_common +#define _bufferevent_init_generic_timeout_cbs opal_libevent2022__bufferevent_init_generic_timeout_cbs +#define bufferevent_lock opal_libevent2022_bufferevent_lock +#define bufferevent_new opal_libevent2022_bufferevent_new +#define bufferevent_pair_get_partner opal_libevent2022_bufferevent_pair_get_partner +#define bufferevent_pair_new opal_libevent2022_bufferevent_pair_new +#define bufferevent_priority_set opal_libevent2022_bufferevent_priority_set +#define bufferevent_rate_limit_group_decrement_read opal_libevent2022_bufferevent_rate_limit_group_decrement_read +#define bufferevent_rate_limit_group_decrement_write opal_libevent2022_bufferevent_rate_limit_group_decrement_write +#define bufferevent_rate_limit_group_free opal_libevent2022_bufferevent_rate_limit_group_free +#define bufferevent_rate_limit_group_get_read_limit opal_libevent2022_bufferevent_rate_limit_group_get_read_limit +#define bufferevent_rate_limit_group_get_totals opal_libevent2022_bufferevent_rate_limit_group_get_totals +#define bufferevent_rate_limit_group_get_write_limit opal_libevent2022_bufferevent_rate_limit_group_get_write_limit +#define bufferevent_rate_limit_group_new opal_libevent2022_bufferevent_rate_limit_group_new +#define bufferevent_rate_limit_group_reset_totals opal_libevent2022_bufferevent_rate_limit_group_reset_totals +#define bufferevent_rate_limit_group_set_cfg opal_libevent2022_bufferevent_rate_limit_group_set_cfg +#define bufferevent_rate_limit_group_set_min_share opal_libevent2022_bufferevent_rate_limit_group_set_min_share +#define bufferevent_read opal_libevent2022_bufferevent_read +#define bufferevent_read_buffer opal_libevent2022_bufferevent_read_buffer +#define bufferevent_remove_from_rate_limit_group opal_libevent2022_bufferevent_remove_from_rate_limit_group +#define bufferevent_remove_from_rate_limit_group_internal opal_libevent2022_bufferevent_remove_from_rate_limit_group_internal +#define _bufferevent_run_eventcb opal_libevent2022__bufferevent_run_eventcb +#define _bufferevent_run_readcb opal_libevent2022__bufferevent_run_readcb +#define _bufferevent_run_writecb opal_libevent2022__bufferevent_run_writecb +#define bufferevent_setcb opal_libevent2022_bufferevent_setcb +#define bufferevent_setfd opal_libevent2022_bufferevent_setfd +#define bufferevent_set_rate_limit opal_libevent2022_bufferevent_set_rate_limit +#define bufferevent_settimeout opal_libevent2022_bufferevent_settimeout +#define bufferevent_set_timeouts opal_libevent2022_bufferevent_set_timeouts +#define bufferevent_setwatermark opal_libevent2022_bufferevent_setwatermark +#define bufferevent_socket_connect opal_libevent2022_bufferevent_socket_connect +#define bufferevent_socket_connect_hostname opal_libevent2022_bufferevent_socket_connect_hostname +#define bufferevent_socket_get_dns_error opal_libevent2022_bufferevent_socket_get_dns_error +#define bufferevent_socket_new opal_libevent2022_bufferevent_socket_new +#define bufferevent_suspend_read opal_libevent2022_bufferevent_suspend_read +#define bufferevent_suspend_write opal_libevent2022_bufferevent_suspend_write +#define bufferevent_unlock opal_libevent2022_bufferevent_unlock +#define bufferevent_unsuspend_read opal_libevent2022_bufferevent_unsuspend_read +#define bufferevent_unsuspend_write opal_libevent2022_bufferevent_unsuspend_write +#define bufferevent_write opal_libevent2022_bufferevent_write +#define bufferevent_write_buffer opal_libevent2022_bufferevent_write_buffer +#define evbuffer_add opal_libevent2022_evbuffer_add +#define evbuffer_add_buffer opal_libevent2022_evbuffer_add_buffer +#define evbuffer_add_cb opal_libevent2022_evbuffer_add_cb +#define evbuffer_add_file opal_libevent2022_evbuffer_add_file +#define evbuffer_add_printf opal_libevent2022_evbuffer_add_printf +#define evbuffer_add_reference opal_libevent2022_evbuffer_add_reference +#define evbuffer_add_vprintf opal_libevent2022_evbuffer_add_vprintf +#define evbuffer_cb_clear_flags opal_libevent2022_evbuffer_cb_clear_flags +#define evbuffer_cb_set_flags opal_libevent2022_evbuffer_cb_set_flags +#define _evbuffer_chain_pin opal_libevent2022__evbuffer_chain_pin +#define _evbuffer_chain_unpin opal_libevent2022__evbuffer_chain_unpin +#define evbuffer_clear_flags opal_libevent2022_evbuffer_clear_flags +#define evbuffer_commit_space opal_libevent2022_evbuffer_commit_space +#define evbuffer_copyout opal_libevent2022_evbuffer_copyout +#define _evbuffer_decref_and_unlock opal_libevent2022__evbuffer_decref_and_unlock +#define evbuffer_defer_callbacks opal_libevent2022_evbuffer_defer_callbacks +#define evbuffer_drain opal_libevent2022_evbuffer_drain +#define evbuffer_enable_locking opal_libevent2022_evbuffer_enable_locking +#define evbuffer_expand opal_libevent2022_evbuffer_expand +#define _evbuffer_expand_fast opal_libevent2022__evbuffer_expand_fast +#define evbuffer_find opal_libevent2022_evbuffer_find +#define evbuffer_free opal_libevent2022_evbuffer_free +#define evbuffer_freeze opal_libevent2022_evbuffer_freeze +#define evbuffer_get_contiguous_space opal_libevent2022_evbuffer_get_contiguous_space +#define evbuffer_get_length opal_libevent2022_evbuffer_get_length +#define _evbuffer_incref opal_libevent2022__evbuffer_incref +#define _evbuffer_incref_and_lock opal_libevent2022__evbuffer_incref_and_lock +#define evbuffer_invoke_callbacks opal_libevent2022_evbuffer_invoke_callbacks +#define evbuffer_lock opal_libevent2022_evbuffer_lock +#define evbuffer_new opal_libevent2022_evbuffer_new +#define evbuffer_peek opal_libevent2022_evbuffer_peek +#define evbuffer_prepend opal_libevent2022_evbuffer_prepend +#define evbuffer_prepend_buffer opal_libevent2022_evbuffer_prepend_buffer +#define evbuffer_ptr_set opal_libevent2022_evbuffer_ptr_set +#define evbuffer_pullup opal_libevent2022_evbuffer_pullup +#define evbuffer_read opal_libevent2022_evbuffer_read +#define evbuffer_readline opal_libevent2022_evbuffer_readline +#define evbuffer_readln opal_libevent2022_evbuffer_readln +#define _evbuffer_read_setup_vecs opal_libevent2022__evbuffer_read_setup_vecs +#define evbuffer_remove opal_libevent2022_evbuffer_remove +#define evbuffer_remove_buffer opal_libevent2022_evbuffer_remove_buffer +#define evbuffer_remove_cb opal_libevent2022_evbuffer_remove_cb +#define evbuffer_remove_cb_entry opal_libevent2022_evbuffer_remove_cb_entry +#define evbuffer_reserve_space opal_libevent2022_evbuffer_reserve_space +#define evbuffer_search opal_libevent2022_evbuffer_search +#define evbuffer_search_eol opal_libevent2022_evbuffer_search_eol +#define evbuffer_search_range opal_libevent2022_evbuffer_search_range +#define evbuffer_setcb opal_libevent2022_evbuffer_setcb +#define evbuffer_set_flags opal_libevent2022_evbuffer_set_flags +#define evbuffer_set_parent opal_libevent2022_evbuffer_set_parent +#define _evbuffer_testing_use_linear_file_access opal_libevent2022__evbuffer_testing_use_linear_file_access +#define _evbuffer_testing_use_mmap opal_libevent2022__evbuffer_testing_use_mmap +#define _evbuffer_testing_use_sendfile opal_libevent2022__evbuffer_testing_use_sendfile +#define evbuffer_unfreeze opal_libevent2022_evbuffer_unfreeze +#define evbuffer_unlock opal_libevent2022_evbuffer_unlock +#define evbuffer_write opal_libevent2022_evbuffer_write +#define evbuffer_write_atmost opal_libevent2022_evbuffer_write_atmost +#define evconnlistener_disable opal_libevent2022_evconnlistener_disable +#define evconnlistener_enable opal_libevent2022_evconnlistener_enable +#define evconnlistener_free opal_libevent2022_evconnlistener_free +#define evconnlistener_get_base opal_libevent2022_evconnlistener_get_base +#define evconnlistener_get_fd opal_libevent2022_evconnlistener_get_fd +#define evconnlistener_new opal_libevent2022_evconnlistener_new +#define evconnlistener_new_bind opal_libevent2022_evconnlistener_new_bind +#define evconnlistener_set_cb opal_libevent2022_evconnlistener_set_cb +#define evconnlistener_set_error_cb opal_libevent2022_evconnlistener_set_error_cb +#define evdns_add_server_port opal_libevent2022_evdns_add_server_port +#define evdns_add_server_port_with_base opal_libevent2022_evdns_add_server_port_with_base +#define evdns_base_clear_nameservers_and_suspend opal_libevent2022_evdns_base_clear_nameservers_and_suspend +#define evdns_base_count_nameservers opal_libevent2022_evdns_base_count_nameservers +#define evdns_base_free opal_libevent2022_evdns_base_free +#define evdns_base_load_hosts opal_libevent2022_evdns_base_load_hosts +#define evdns_base_nameserver_add opal_libevent2022_evdns_base_nameserver_add +#define evdns_base_nameserver_ip_add opal_libevent2022_evdns_base_nameserver_ip_add +#define evdns_base_nameserver_sockaddr_add opal_libevent2022_evdns_base_nameserver_sockaddr_add +#define evdns_base_new opal_libevent2022_evdns_base_new +#define evdns_base_resolv_conf_parse opal_libevent2022_evdns_base_resolv_conf_parse +#define evdns_base_resolve_ipv4 opal_libevent2022_evdns_base_resolve_ipv4 +#define evdns_base_resolve_ipv6 opal_libevent2022_evdns_base_resolve_ipv6 +#define evdns_base_resolve_reverse opal_libevent2022_evdns_base_resolve_reverse +#define evdns_base_resolve_reverse_ipv6 opal_libevent2022_evdns_base_resolve_reverse_ipv6 +#define evdns_base_resume opal_libevent2022_evdns_base_resume +#define evdns_base_search_add opal_libevent2022_evdns_base_search_add +#define evdns_base_search_clear opal_libevent2022_evdns_base_search_clear +#define evdns_base_search_ndots_set opal_libevent2022_evdns_base_search_ndots_set +#define evdns_base_set_option opal_libevent2022_evdns_base_set_option +#define evdns_cancel_request opal_libevent2022_evdns_cancel_request +#define evdns_clear_nameservers_and_suspend opal_libevent2022_evdns_clear_nameservers_and_suspend +#define evdns_close_server_port opal_libevent2022_evdns_close_server_port +#define evdns_count_nameservers opal_libevent2022_evdns_count_nameservers +#define evdns_err_to_string opal_libevent2022_evdns_err_to_string +#define evdns_getaddrinfo opal_libevent2022_evdns_getaddrinfo +#define evdns_getaddrinfo_cancel opal_libevent2022_evdns_getaddrinfo_cancel +#define evdns_get_global_base opal_libevent2022_evdns_get_global_base +#define evdns_init opal_libevent2022_evdns_init +#define evdns_nameserver_add opal_libevent2022_evdns_nameserver_add +#define evdns_nameserver_ip_add opal_libevent2022_evdns_nameserver_ip_add +#define evdns_resolv_conf_parse opal_libevent2022_evdns_resolv_conf_parse +#define evdns_resolve_ipv4 opal_libevent2022_evdns_resolve_ipv4 +#define evdns_resolve_ipv6 opal_libevent2022_evdns_resolve_ipv6 +#define evdns_resolve_reverse opal_libevent2022_evdns_resolve_reverse +#define evdns_resolve_reverse_ipv6 opal_libevent2022_evdns_resolve_reverse_ipv6 +#define evdns_resume opal_libevent2022_evdns_resume +#define evdns_search_add opal_libevent2022_evdns_search_add +#define evdns_search_clear opal_libevent2022_evdns_search_clear +#define evdns_search_ndots_set opal_libevent2022_evdns_search_ndots_set +#define evdns_server_request_add_aaaa_reply opal_libevent2022_evdns_server_request_add_aaaa_reply +#define evdns_server_request_add_a_reply opal_libevent2022_evdns_server_request_add_a_reply +#define evdns_server_request_add_cname_reply opal_libevent2022_evdns_server_request_add_cname_reply +#define evdns_server_request_add_ptr_reply opal_libevent2022_evdns_server_request_add_ptr_reply +#define evdns_server_request_add_reply opal_libevent2022_evdns_server_request_add_reply +#define evdns_server_request_drop opal_libevent2022_evdns_server_request_drop +#define evdns_server_request_get_requesting_addr opal_libevent2022_evdns_server_request_get_requesting_addr +#define evdns_server_request_respond opal_libevent2022_evdns_server_request_respond +#define evdns_server_request_set_flags opal_libevent2022_evdns_server_request_set_flags +#define evdns_set_log_fn opal_libevent2022_evdns_set_log_fn +#define evdns_set_option opal_libevent2022_evdns_set_option +#define evdns_set_random_bytes_fn opal_libevent2022_evdns_set_random_bytes_fn +#define evdns_set_transaction_id_fn opal_libevent2022_evdns_set_transaction_id_fn +#define evdns_shutdown opal_libevent2022_evdns_shutdown +#define event_active opal_libevent2022_event_active +#define event_active_nolock opal_libevent2022_event_active_nolock +#define event_add opal_libevent2022_event_add +#define event_assign opal_libevent2022_event_assign +#define event_base_add_virtual opal_libevent2022_event_base_add_virtual +#define event_base_assert_ok opal_libevent2022_event_base_assert_ok +#define event_base_del_virtual opal_libevent2022_event_base_del_virtual +#define event_base_dispatch opal_libevent2022_event_base_dispatch +#define event_base_dump_events opal_libevent2022_event_base_dump_events +#define event_base_free opal_libevent2022_event_base_free +#define event_base_get_deferred_cb_queue opal_libevent2022_event_base_get_deferred_cb_queue +#define event_base_get_features opal_libevent2022_event_base_get_features +#define event_base_get_method opal_libevent2022_event_base_get_method +#define event_base_gettimeofday_cached opal_libevent2022_event_base_gettimeofday_cached +#define event_base_got_break opal_libevent2022_event_base_got_break +#define event_base_got_exit opal_libevent2022_event_base_got_exit +#define event_base_init_common_timeout opal_libevent2022_event_base_init_common_timeout +#define event_base_loop opal_libevent2022_event_base_loop +#define event_base_loopbreak opal_libevent2022_event_base_loopbreak +#define event_base_loopexit opal_libevent2022_event_base_loopexit +#define event_base_new opal_libevent2022_event_base_new +#define event_base_new_with_config opal_libevent2022_event_base_new_with_config +#define event_base_once opal_libevent2022_event_base_once +#define event_base_priority_init opal_libevent2022_event_base_priority_init +#define event_base_set opal_libevent2022_event_base_set +#define event_base_start_iocp opal_libevent2022_event_base_start_iocp +#define event_base_stop_iocp opal_libevent2022_event_base_stop_iocp +#define event_changelist_add opal_libevent2022_event_changelist_add +#define event_changelist_del opal_libevent2022_event_changelist_del +#define event_changelist_freemem opal_libevent2022_event_changelist_freemem +#define event_changelist_init opal_libevent2022_event_changelist_init +#define event_changelist_remove_all opal_libevent2022_event_changelist_remove_all +#define event_config_avoid_method opal_libevent2022_event_config_avoid_method +#define event_config_free opal_libevent2022_event_config_free +#define event_config_new opal_libevent2022_event_config_new +#define event_config_require_features opal_libevent2022_event_config_require_features +#define event_config_set_flag opal_libevent2022_event_config_set_flag +#define event_config_set_num_cpus_hint opal_libevent2022_event_config_set_num_cpus_hint +#define event_debug_map_HT_CLEAR opal_libevent2022_event_debug_map_HT_CLEAR +#define event_debug_map_HT_GROW opal_libevent2022_event_debug_map_HT_GROW +#define _event_debug_map_HT_REP_IS_BAD opal_libevent2022__event_debug_map_HT_REP_IS_BAD +#define event_debug_unassign opal_libevent2022_event_debug_unassign +#define _event_debugx opal_libevent2022__event_debugx +#define event_deferred_cb_cancel opal_libevent2022_event_deferred_cb_cancel +#define event_deferred_cb_init opal_libevent2022_event_deferred_cb_init +#define event_deferred_cb_queue_init opal_libevent2022_event_deferred_cb_queue_init +#define event_deferred_cb_schedule opal_libevent2022_event_deferred_cb_schedule +#define event_del opal_libevent2022_event_del +#define event_dispatch opal_libevent2022_event_dispatch +#define event_enable_debug_mode opal_libevent2022_event_enable_debug_mode +#define event_err opal_libevent2022_event_err +#define event_errx opal_libevent2022_event_errx +#define event_free opal_libevent2022_event_free +#define event_get_assignment opal_libevent2022_event_get_assignment +#define event_get_base opal_libevent2022_event_get_base +#define event_get_callback opal_libevent2022_event_get_callback +#define event_get_callback_arg opal_libevent2022_event_get_callback_arg +#define event_get_events opal_libevent2022_event_get_events +#define event_get_fd opal_libevent2022_event_get_fd +#define event_get_method opal_libevent2022_event_get_method +#define event_get_struct_event_size opal_libevent2022_event_get_struct_event_size +#define event_get_supported_methods opal_libevent2022_event_get_supported_methods +#define event_get_version opal_libevent2022_event_get_version +#define event_get_version_number opal_libevent2022_event_get_version_number +#define event_global_setup_locks_ opal_libevent2022_event_global_setup_locks_ +#define event_init opal_libevent2022_event_init +#define event_initialized opal_libevent2022_event_initialized +#define event_loop opal_libevent2022_event_loop +#define event_loopbreak opal_libevent2022_event_loopbreak +#define event_loopexit opal_libevent2022_event_loopexit +#define event_mm_calloc_ opal_libevent2022_event_mm_calloc_ +#define event_mm_free_ opal_libevent2022_event_mm_free_ +#define event_mm_malloc_ opal_libevent2022_event_mm_malloc_ +#define event_mm_realloc_ opal_libevent2022_event_mm_realloc_ +#define event_mm_strdup_ opal_libevent2022_event_mm_strdup_ +#define event_msgx opal_libevent2022_event_msgx +#define event_new opal_libevent2022_event_new +#define event_once opal_libevent2022_event_once +#define event_pending opal_libevent2022_event_pending +#define event_priority_init opal_libevent2022_event_priority_init +#define event_priority_set opal_libevent2022_event_priority_set +#define event_reinit opal_libevent2022_event_reinit +#define event_set opal_libevent2022_event_set +#define event_set_fatal_callback opal_libevent2022_event_set_fatal_callback +#define event_set_log_callback opal_libevent2022_event_set_log_callback +#define event_set_mem_functions opal_libevent2022_event_set_mem_functions +#define event_sock_err opal_libevent2022_event_sock_err +#define event_sock_warn opal_libevent2022_event_sock_warn +#define event_warn opal_libevent2022_event_warn +#define event_warnx opal_libevent2022_event_warnx +#define evhttp_accept_socket opal_libevent2022_evhttp_accept_socket +#define evhttp_accept_socket_with_handle opal_libevent2022_evhttp_accept_socket_with_handle +#define evhttp_add_header opal_libevent2022_evhttp_add_header +#define evhttp_add_server_alias opal_libevent2022_evhttp_add_server_alias +#define evhttp_add_virtual_host opal_libevent2022_evhttp_add_virtual_host +#define evhttp_bind_listener opal_libevent2022_evhttp_bind_listener +#define evhttp_bind_socket opal_libevent2022_evhttp_bind_socket +#define evhttp_bind_socket_with_handle opal_libevent2022_evhttp_bind_socket_with_handle +#define evhttp_bound_socket_get_fd opal_libevent2022_evhttp_bound_socket_get_fd +#define evhttp_bound_socket_get_listener opal_libevent2022_evhttp_bound_socket_get_listener +#define evhttp_cancel_request opal_libevent2022_evhttp_cancel_request +#define evhttp_clear_headers opal_libevent2022_evhttp_clear_headers +#define evhttp_connection_base_new opal_libevent2022_evhttp_connection_base_new +#define evhttp_connection_connect opal_libevent2022_evhttp_connection_connect +#define evhttp_connection_fail opal_libevent2022_evhttp_connection_fail +#define evhttp_connection_free opal_libevent2022_evhttp_connection_free +#define evhttp_connection_get_base opal_libevent2022_evhttp_connection_get_base +#define evhttp_connection_get_bufferevent opal_libevent2022_evhttp_connection_get_bufferevent +#define evhttp_connection_get_peer opal_libevent2022_evhttp_connection_get_peer +#define evhttp_connection_new opal_libevent2022_evhttp_connection_new +#define evhttp_connection_reset opal_libevent2022_evhttp_connection_reset +#define evhttp_connection_set_base opal_libevent2022_evhttp_connection_set_base +#define evhttp_connection_set_closecb opal_libevent2022_evhttp_connection_set_closecb +#define evhttp_connection_set_local_address opal_libevent2022_evhttp_connection_set_local_address +#define evhttp_connection_set_local_port opal_libevent2022_evhttp_connection_set_local_port +#define evhttp_connection_set_max_body_size opal_libevent2022_evhttp_connection_set_max_body_size +#define evhttp_connection_set_max_headers_size opal_libevent2022_evhttp_connection_set_max_headers_size +#define evhttp_connection_set_retries opal_libevent2022_evhttp_connection_set_retries +#define evhttp_connection_set_timeout opal_libevent2022_evhttp_connection_set_timeout +#define evhttp_decode_uri opal_libevent2022_evhttp_decode_uri +#define evhttp_del_accept_socket opal_libevent2022_evhttp_del_accept_socket +#define evhttp_del_cb opal_libevent2022_evhttp_del_cb +#define evhttp_encode_uri opal_libevent2022_evhttp_encode_uri +#define evhttp_find_header opal_libevent2022_evhttp_find_header +#define evhttp_free opal_libevent2022_evhttp_free +#define evhttp_htmlescape opal_libevent2022_evhttp_htmlescape +#define evhttp_make_request opal_libevent2022_evhttp_make_request +#define evhttp_new opal_libevent2022_evhttp_new +#define evhttp_parse_firstline opal_libevent2022_evhttp_parse_firstline +#define evhttp_parse_headers opal_libevent2022_evhttp_parse_headers +#define evhttp_parse_query opal_libevent2022_evhttp_parse_query +#define evhttp_parse_query_str opal_libevent2022_evhttp_parse_query_str +#define evhttp_remove_header opal_libevent2022_evhttp_remove_header +#define evhttp_remove_server_alias opal_libevent2022_evhttp_remove_server_alias +#define evhttp_remove_virtual_host opal_libevent2022_evhttp_remove_virtual_host +#define evhttp_request_free opal_libevent2022_evhttp_request_free +#define evhttp_request_get_command opal_libevent2022_evhttp_request_get_command +#define evhttp_request_get_connection opal_libevent2022_evhttp_request_get_connection +#define evhttp_request_get_evhttp_uri opal_libevent2022_evhttp_request_get_evhttp_uri +#define evhttp_request_get_host opal_libevent2022_evhttp_request_get_host +#define evhttp_request_get_input_buffer opal_libevent2022_evhttp_request_get_input_buffer +#define evhttp_request_get_input_headers opal_libevent2022_evhttp_request_get_input_headers +#define evhttp_request_get_output_buffer opal_libevent2022_evhttp_request_get_output_buffer +#define evhttp_request_get_output_headers opal_libevent2022_evhttp_request_get_output_headers +#define evhttp_request_get_response_code opal_libevent2022_evhttp_request_get_response_code +#define evhttp_request_get_uri opal_libevent2022_evhttp_request_get_uri +#define evhttp_request_is_owned opal_libevent2022_evhttp_request_is_owned +#define evhttp_request_new opal_libevent2022_evhttp_request_new +#define evhttp_request_own opal_libevent2022_evhttp_request_own +#define evhttp_request_set_chunked_cb opal_libevent2022_evhttp_request_set_chunked_cb +#define evhttp_response_code opal_libevent2022_evhttp_response_code +#define evhttp_send_error opal_libevent2022_evhttp_send_error +#define evhttp_send_page opal_libevent2022_evhttp_send_page +#define evhttp_send_reply opal_libevent2022_evhttp_send_reply +#define evhttp_send_reply_chunk opal_libevent2022_evhttp_send_reply_chunk +#define evhttp_send_reply_end opal_libevent2022_evhttp_send_reply_end +#define evhttp_send_reply_start opal_libevent2022_evhttp_send_reply_start +#define evhttp_set_allowed_methods opal_libevent2022_evhttp_set_allowed_methods +#define evhttp_set_cb opal_libevent2022_evhttp_set_cb +#define evhttp_set_gencb opal_libevent2022_evhttp_set_gencb +#define evhttp_set_max_body_size opal_libevent2022_evhttp_set_max_body_size +#define evhttp_set_max_headers_size opal_libevent2022_evhttp_set_max_headers_size +#define evhttp_set_timeout opal_libevent2022_evhttp_set_timeout +#define evhttp_start opal_libevent2022_evhttp_start +#define evhttp_start_read opal_libevent2022_evhttp_start_read +#define evhttp_uridecode opal_libevent2022_evhttp_uridecode +#define evhttp_uriencode opal_libevent2022_evhttp_uriencode +#define evhttp_uri_free opal_libevent2022_evhttp_uri_free +#define evhttp_uri_get_fragment opal_libevent2022_evhttp_uri_get_fragment +#define evhttp_uri_get_host opal_libevent2022_evhttp_uri_get_host +#define evhttp_uri_get_path opal_libevent2022_evhttp_uri_get_path +#define evhttp_uri_get_port opal_libevent2022_evhttp_uri_get_port +#define evhttp_uri_get_query opal_libevent2022_evhttp_uri_get_query +#define evhttp_uri_get_scheme opal_libevent2022_evhttp_uri_get_scheme +#define evhttp_uri_get_userinfo opal_libevent2022_evhttp_uri_get_userinfo +#define evhttp_uri_join opal_libevent2022_evhttp_uri_join +#define evhttp_uri_new opal_libevent2022_evhttp_uri_new +#define evhttp_uri_parse opal_libevent2022_evhttp_uri_parse +#define evhttp_uri_parse_with_flags opal_libevent2022_evhttp_uri_parse_with_flags +#define evhttp_uri_set_flags opal_libevent2022_evhttp_uri_set_flags +#define evhttp_uri_set_fragment opal_libevent2022_evhttp_uri_set_fragment +#define evhttp_uri_set_host opal_libevent2022_evhttp_uri_set_host +#define evhttp_uri_set_path opal_libevent2022_evhttp_uri_set_path +#define evhttp_uri_set_port opal_libevent2022_evhttp_uri_set_port +#define evhttp_uri_set_query opal_libevent2022_evhttp_uri_set_query +#define evhttp_uri_set_scheme opal_libevent2022_evhttp_uri_set_scheme +#define evhttp_uri_set_userinfo opal_libevent2022_evhttp_uri_set_userinfo +#define evmap_check_integrity opal_libevent2022_evmap_check_integrity +#define evmap_io_active opal_libevent2022_evmap_io_active +#define evmap_io_add opal_libevent2022_evmap_io_add +#define evmap_io_clear opal_libevent2022_evmap_io_clear +#define evmap_io_del opal_libevent2022_evmap_io_del +#define evmap_io_get_fdinfo opal_libevent2022_evmap_io_get_fdinfo +#define evmap_io_initmap opal_libevent2022_evmap_io_initmap +#define evmap_signal_active opal_libevent2022_evmap_signal_active +#define evmap_signal_add opal_libevent2022_evmap_signal_add +#define evmap_signal_clear opal_libevent2022_evmap_signal_clear +#define evmap_signal_del opal_libevent2022_evmap_signal_del +#define evmap_signal_initmap opal_libevent2022_evmap_signal_initmap +#define evrpc_add_hook opal_libevent2022_evrpc_add_hook +#define evrpc_free opal_libevent2022_evrpc_free +#define evrpc_get_reply opal_libevent2022_evrpc_get_reply +#define evrpc_get_request opal_libevent2022_evrpc_get_request +#define evrpc_hook_add_meta opal_libevent2022_evrpc_hook_add_meta +#define evrpc_hook_find_meta opal_libevent2022_evrpc_hook_find_meta +#define evrpc_hook_get_connection opal_libevent2022_evrpc_hook_get_connection +#define evrpc_init opal_libevent2022_evrpc_init +#define evrpc_make_request opal_libevent2022_evrpc_make_request +#define evrpc_make_request_ctx opal_libevent2022_evrpc_make_request_ctx +#define evrpc_pool_add_connection opal_libevent2022_evrpc_pool_add_connection +#define evrpc_pool_free opal_libevent2022_evrpc_pool_free +#define evrpc_pool_new opal_libevent2022_evrpc_pool_new +#define evrpc_pool_remove_connection opal_libevent2022_evrpc_pool_remove_connection +#define evrpc_pool_set_timeout opal_libevent2022_evrpc_pool_set_timeout +#define evrpc_register_generic opal_libevent2022_evrpc_register_generic +#define evrpc_register_rpc opal_libevent2022_evrpc_register_rpc +#define evrpc_remove_hook opal_libevent2022_evrpc_remove_hook +#define evrpc_reqstate_free opal_libevent2022_evrpc_reqstate_free +#define evrpc_request_done opal_libevent2022_evrpc_request_done +#define evrpc_request_get_pool opal_libevent2022_evrpc_request_get_pool +#define evrpc_request_set_cb opal_libevent2022_evrpc_request_set_cb +#define evrpc_request_set_pool opal_libevent2022_evrpc_request_set_pool +#define evrpc_resume_request opal_libevent2022_evrpc_resume_request +#define evrpc_send_request_generic opal_libevent2022_evrpc_send_request_generic +#define evrpc_unregister_rpc opal_libevent2022_evrpc_unregister_rpc +#define evsig_dealloc opal_libevent2022_evsig_dealloc +#define evsig_global_setup_locks_ opal_libevent2022_evsig_global_setup_locks_ +#define evsig_init opal_libevent2022_evsig_init +#define _evsig_restore_handler opal_libevent2022__evsig_restore_handler +#define evsig_set_base opal_libevent2022_evsig_set_base +#define _evsig_set_handler opal_libevent2022__evsig_set_handler +#define evtag_consume opal_libevent2022_evtag_consume +#define evtag_decode_int opal_libevent2022_evtag_decode_int +#define evtag_decode_int64 opal_libevent2022_evtag_decode_int64 +#define evtag_decode_tag opal_libevent2022_evtag_decode_tag +#define evtag_encode_int opal_libevent2022_evtag_encode_int +#define evtag_encode_int64 opal_libevent2022_evtag_encode_int64 +#define evtag_encode_tag opal_libevent2022_evtag_encode_tag +#define evtag_init opal_libevent2022_evtag_init +#define evtag_marshal opal_libevent2022_evtag_marshal +#define evtag_marshal_buffer opal_libevent2022_evtag_marshal_buffer +#define evtag_marshal_int opal_libevent2022_evtag_marshal_int +#define evtag_marshal_int64 opal_libevent2022_evtag_marshal_int64 +#define evtag_marshal_string opal_libevent2022_evtag_marshal_string +#define evtag_marshal_timeval opal_libevent2022_evtag_marshal_timeval +#define evtag_payload_length opal_libevent2022_evtag_payload_length +#define evtag_peek opal_libevent2022_evtag_peek +#define evtag_peek_length opal_libevent2022_evtag_peek_length +#define evtag_unmarshal opal_libevent2022_evtag_unmarshal +#define evtag_unmarshal_fixed opal_libevent2022_evtag_unmarshal_fixed +#define evtag_unmarshal_header opal_libevent2022_evtag_unmarshal_header +#define evtag_unmarshal_int opal_libevent2022_evtag_unmarshal_int +#define evtag_unmarshal_int64 opal_libevent2022_evtag_unmarshal_int64 +#define evtag_unmarshal_string opal_libevent2022_evtag_unmarshal_string +#define evtag_unmarshal_timeval opal_libevent2022_evtag_unmarshal_timeval +#define _evthread_debug_get_real_lock opal_libevent2022__evthread_debug_get_real_lock +#define evthread_enable_lock_debuging opal_libevent2022_evthread_enable_lock_debuging +#define _evthread_is_debug_lock_held opal_libevent2022__evthread_is_debug_lock_held +#define evthread_make_base_notifiable opal_libevent2022_evthread_make_base_notifiable +#define evthread_set_condition_callbacks opal_libevent2022_evthread_set_condition_callbacks +#define evthread_set_id_callback opal_libevent2022_evthread_set_id_callback +#define evthread_set_lock_callbacks opal_libevent2022_evthread_set_lock_callbacks +#define evthread_setup_global_lock_ opal_libevent2022_evthread_setup_global_lock_ +#define ev_token_bucket_cfg_free opal_libevent2022_ev_token_bucket_cfg_free +#define ev_token_bucket_cfg_new opal_libevent2022_ev_token_bucket_cfg_new +#define ev_token_bucket_get_tick opal_libevent2022_ev_token_bucket_get_tick +#define ev_token_bucket_init opal_libevent2022_ev_token_bucket_init +#define ev_token_bucket_update opal_libevent2022_ev_token_bucket_update +#define evutil_addrinfo_append opal_libevent2022_evutil_addrinfo_append +#define evutil_adjust_hints_for_addrconfig opal_libevent2022_evutil_adjust_hints_for_addrconfig +#define evutil_ascii_strcasecmp opal_libevent2022_evutil_ascii_strcasecmp +#define evutil_ascii_strncasecmp opal_libevent2022_evutil_ascii_strncasecmp +#define evutil_closesocket opal_libevent2022_evutil_closesocket +#define evutil_ersatz_socketpair opal_libevent2022_evutil_ersatz_socketpair +#define evutil_format_sockaddr_port opal_libevent2022_evutil_format_sockaddr_port +#define evutil_freeaddrinfo opal_libevent2022_evutil_freeaddrinfo +#define evutil_gai_strerror opal_libevent2022_evutil_gai_strerror +#define evutil_getaddrinfo opal_libevent2022_evutil_getaddrinfo +#define evutil_getaddrinfo_async opal_libevent2022_evutil_getaddrinfo_async +#define evutil_getaddrinfo_common opal_libevent2022_evutil_getaddrinfo_common +#define evutil_getenv opal_libevent2022_evutil_getenv +#define evutil_hex_char_to_int opal_libevent2022_evutil_hex_char_to_int +#define evutil_inet_ntop opal_libevent2022_evutil_inet_ntop +#define evutil_inet_pton opal_libevent2022_evutil_inet_pton +#define EVUTIL_ISALNUM opal_libevent2022_EVUTIL_ISALNUM +#define EVUTIL_ISALPHA opal_libevent2022_EVUTIL_ISALPHA +#define EVUTIL_ISDIGIT opal_libevent2022_EVUTIL_ISDIGIT +#define EVUTIL_ISLOWER opal_libevent2022_EVUTIL_ISLOWER +#define EVUTIL_ISPRINT opal_libevent2022_EVUTIL_ISPRINT +#define EVUTIL_ISSPACE opal_libevent2022_EVUTIL_ISSPACE +#define EVUTIL_ISUPPER opal_libevent2022_EVUTIL_ISUPPER +#define EVUTIL_ISXDIGIT opal_libevent2022_EVUTIL_ISXDIGIT +#define evutil_make_listen_socket_reuseable opal_libevent2022_evutil_make_listen_socket_reuseable +#define evutil_make_socket_closeonexec opal_libevent2022_evutil_make_socket_closeonexec +#define evutil_make_socket_nonblocking opal_libevent2022_evutil_make_socket_nonblocking +#define evutil_memclear_ opal_libevent2022_evutil_memclear_ +#define evutil_new_addrinfo opal_libevent2022_evutil_new_addrinfo +#define evutil_open_closeonexec opal_libevent2022_evutil_open_closeonexec +#define evutil_parse_sockaddr_port opal_libevent2022_evutil_parse_sockaddr_port +#define evutil_read_file opal_libevent2022_evutil_read_file +#define evutil_secure_rng_add_bytes opal_libevent2022_evutil_secure_rng_add_bytes +#define evutil_secure_rng_get_bytes opal_libevent2022_evutil_secure_rng_get_bytes +#define evutil_secure_rng_global_setup_locks_ opal_libevent2022_evutil_secure_rng_global_setup_locks_ +#define evutil_secure_rng_init opal_libevent2022_evutil_secure_rng_init +#define evutil_secure_rng_set_urandom_device_file opal_libevent2022_evutil_secure_rng_set_urandom_device_file +#define evutil_set_evdns_getaddrinfo_fn opal_libevent2022_evutil_set_evdns_getaddrinfo_fn +#define evutil_snprintf opal_libevent2022_evutil_snprintf +#define evutil_sockaddr_cmp opal_libevent2022_evutil_sockaddr_cmp +#define evutil_sockaddr_is_loopback opal_libevent2022_evutil_sockaddr_is_loopback +#define evutil_socket_connect opal_libevent2022_evutil_socket_connect +#define evutil_socket_finished_connecting opal_libevent2022_evutil_socket_finished_connecting +#define evutil_socketpair opal_libevent2022_evutil_socketpair +#define evutil_strtoll opal_libevent2022_evutil_strtoll +#define EVUTIL_TOLOWER opal_libevent2022_EVUTIL_TOLOWER +#define EVUTIL_TOUPPER opal_libevent2022_EVUTIL_TOUPPER +#define evutil_tv_to_msec opal_libevent2022_evutil_tv_to_msec +#define evutil_vsnprintf opal_libevent2022_evutil_vsnprintf +#define _evutil_weakrand opal_libevent2022__evutil_weakrand +#define _fini opal_libevent2022__fini +#define _init opal_libevent2022__init +#define evthread_use_pthreads opal_libevent2022_evthread_use_pthreads /* eventop */ #ifdef _EVENT_HAVE_EVENT_PORTS diff --git a/opal/mca/event/libevent2022/libevent2022_component.c b/opal/mca/event/libevent2022/libevent2022_component.c index caf5b82fc40..1151428f915 100644 --- a/opal/mca/event/libevent2022/libevent2022_component.c +++ b/opal/mca/event/libevent2022/libevent2022_component.c @@ -123,7 +123,7 @@ const opal_event_component_t mca_event_libevent2022_component = { static int libevent2022_register (void) { const struct eventop** _eventop = eventops; - char available_eventops[1024] = "none"; + char available_eventops[BUFSIZ] = "none"; char *help_msg = NULL; int ret; diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index cd429eeb006..50e389025fd 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -14,7 +14,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -71,7 +71,7 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, So first we have to see if we can find *any* cores by looking for the 0th core. If we find it, then try to find the Nth core. Otherwise, try to find the Nth PU. */ - if (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0)) { + if (opal_hwloc_use_hwthreads_as_cpus || (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0))) { obj_type = HWLOC_OBJ_PU; } @@ -492,8 +492,11 @@ static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt) obj->userdata = (void*)data; } if (NULL == opal_hwloc_base_cpu_set) { - if (!hwloc_bitmap_isincluded(obj->cpuset, obj->allowed_cpuset)) { - /* do not count not allowed cores */ + if (!hwloc_bitmap_intersects(obj->cpuset, obj->allowed_cpuset)) { + /* + * do not count not allowed cores (e.g. cores with zero allowed PU) + * if SMT is enabled, do count cores with at least one allowed hwthread + */ return; } data->npus = 1; @@ -541,7 +544,6 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo, hwloc_obj_t obj) { opal_hwloc_obj_data_t *data; - int i; unsigned int cnt = 0; hwloc_cpuset_t cpuset; @@ -579,16 +581,10 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo, * one bit for each available pu. We could just * subtract the first and last indices, but there * may be "holes" in the bitmap corresponding to - * offline or unallowed cpus - so we have to - * search for them + * offline or unallowed cpus - so we count them with + * the bitmap "weight" (a.k.a. population count) function */ - for (i=hwloc_bitmap_first(cpuset), cnt=0; - i <= hwloc_bitmap_last(cpuset); - i++) { - if (hwloc_bitmap_isset(cpuset, i)) { - cnt++; - } - } + cnt = hwloc_bitmap_weight(cpuset); } /* cache the info */ data = (opal_hwloc_obj_data_t*)obj->userdata; // in case it was added diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4 index 07868e8d4e2..9bd2d9e2361 100644 --- a/opal/mca/hwloc/external/configure.m4 +++ b/opal/mca/hwloc/external/configure.m4 @@ -181,6 +181,21 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_ERROR([Cannot continue])]) + AC_MSG_CHECKING([if external hwloc version is lower than 2.0]) + AS_IF([test "$opal_hwloc_dir" != ""], + [opal_hwloc_external_CFLAGS_save=$CFLAGS + CFLAGS="-I$opal_hwloc_dir/include $opal_hwloc_external_CFLAGS_save"]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if HWLOC_API_VERSION >= 0x00020000 +#error "hwloc API version is greater or equal than 0x00020000" +#endif + ]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_ERROR([OMPI does not currently support hwloc v2 API +Cannot continue])]) AS_IF([test "$opal_hwloc_dir" != ""], [CFLAGS=$opal_hwloc_external_CFLAGS_save]) $1], diff --git a/opal/mca/hwloc/hwloc1111/configure.m4 b/opal/mca/hwloc/hwloc1111/configure.m4 deleted file mode 100644 index 5a61bd7d0d2..00000000000 --- a/opal/mca/hwloc/hwloc1111/configure.m4 +++ /dev/null @@ -1,170 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# Priority -# -AC_DEFUN([MCA_opal_hwloc_hwloc1111_PRIORITY], [90]) - -# -# Force this component to compile in static-only mode -# -AC_DEFUN([MCA_opal_hwloc_hwloc1111_COMPILE_MODE], [ - AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) - $4="static" - AC_MSG_RESULT([$$4]) -]) - -# Include hwloc m4 files -m4_include(opal/mca/hwloc/hwloc1111/hwloc/config/hwloc.m4) -m4_include(opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_pkg.m4) -m4_include(opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_attributes.m4) -m4_include(opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_visibility.m4) -m4_include(opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_vendor.m4) -m4_include(opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_components.m4) - -# MCA_hwloc_hwloc1111_POST_CONFIG() -# --------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc1111_POST_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc1111_basedir]) - - # If we won, then do all the rest of the setup - AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc1111_support" = "yes"], - [ - # Set this variable so that the framework m4 knows what - # file to include in opal/mca/hwloc/hwloc.h - opal_hwloc_hwloc1111_basedir=opal/mca/hwloc/hwloc1111 - opal_hwloc_base_include="$opal_hwloc_hwloc1111_basedir/hwloc1111.h" - - # Add some stuff to CPPFLAGS so that the rest of the source - # tree can be built - file=$opal_hwloc_hwloc1111_basedir/hwloc - CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_SRCDIR/$file/include" - AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], - [CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_BUILDDIR/$file/include"]) - unset file - ]) - OPAL_VAR_SCOPE_POP - - # This must be run unconditionally - HWLOC_DO_AM_CONDITIONALS -])dnl - - -# MCA_hwloc_hwloc1111_CONFIG([action-if-found], [action-if-not-found]) -# -------------------------------------------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc1111_CONFIG],[ - # Hwloc needs to know if we have Verbs support - AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) - - AC_CONFIG_FILES([opal/mca/hwloc/hwloc1111/Makefile]) - - OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc1111_save_CPPFLAGS opal_hwloc_hwloc1111_save_LDFLAGS opal_hwloc_hwloc1111_save_LIBS opal_hwloc_hwloc1111_save_cairo opal_hwloc_hwloc1111_save_xml opal_hwloc_hwloc1111_basedir opal_hwloc_hwloc1111_file opal_hwloc_hwloc1111_save_cflags CPPFLAGS_save LIBS_save]) - - # default to this component not providing support - opal_hwloc_hwloc1111_basedir=opal/mca/hwloc/hwloc1111 - opal_hwloc_hwloc1111_support=no - - if test "$with_hwloc" = "internal" -o "$with_hwloc" = "" -o "$with_hwloc" = "yes"; then - opal_hwloc_hwloc1111_save_CPPFLAGS=$CPPFLAGS - opal_hwloc_hwloc1111_save_LDFLAGS=$LDFLAGS - opal_hwloc_hwloc1111_save_LIBS=$LIBS - - # Run the hwloc configuration - set the prefix to minimize - # the chance that someone will use the internal symbols - HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1111_]) - - # save XML or graphical options - opal_hwloc_hwloc1111_save_cairo=$enable_cairo - opal_hwloc_hwloc1111_save_xml=$enable_xml - opal_hwloc_hwloc1111_save_static=$enable_static - opal_hwloc_hwloc1111_save_shared=$enable_shared - opal_hwloc_hwloc1111_save_plugins=$enable_plugins - - # never enable hwloc's graphical option - enable_cairo=no - - # never enable hwloc's plugin system - enable_plugins=no - enable_static=yes - enable_shared=no - - # Override -- disable hwloc's libxml2 support, but enable the - # native hwloc XML support - enable_libxml2=no - enable_xml=yes - - # hwloc checks for compiler visibility, and its needs to do - # this without "picky" flags. - opal_hwloc_hwloc1111_save_cflags=$CFLAGS - CFLAGS=$OPAL_CFLAGS_BEFORE_PICKY - HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1111/hwloc], - [AC_MSG_CHECKING([whether hwloc configure succeeded]) - AC_MSG_RESULT([yes]) - HWLOC_VERSION="internal v`$srcdir/$opal_hwloc_hwloc1111_basedir/hwloc/config/hwloc_get_version.sh $srcdir/$opal_hwloc_hwloc1111_basedir/hwloc/VERSION`" - - # Build flags for our Makefile.am - opal_hwloc_hwloc1111_LDFLAGS='$(HWLOC_EMBEDDED_LDFLAGS)' - opal_hwloc_hwloc1111_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_hwloc_hwloc1111_basedir"'/hwloc/src/libhwloc_embedded.la $(HWLOC_EMBEDDED_LIBS)' - opal_hwloc_hwloc1111_support=yes - - AC_DEFINE_UNQUOTED([HWLOC_HWLOC1111_HWLOC_VERSION], - ["$HWLOC_VERSION"], - [Version of hwloc]) - - # Do we have verbs support? - CPPFLAGS_save=$CPPFLAGS - AS_IF([test "$opal_want_verbs" = "yes"], - [CPPFLAGS="-I$opal_verbs_dir/include $CPPFLAGS"]) - AC_CHECK_HEADERS([infiniband/verbs.h]) - CPPFLAGS=$CPPFLAGS_save - ], - [AC_MSG_CHECKING([whether hwloc configure succeeded]) - AC_MSG_RESULT([no]) - opal_hwloc_hwloc1111_support=no]) - CFLAGS=$opal_hwloc_hwloc1111_save_cflags - - # Restore some env variables, if necessary - AS_IF([test -n "$opal_hwloc_hwloc1111_save_cairo"], - [enable_cairo=$opal_hwloc_hwloc1111_save_cairo]) - AS_IF([test -n "$opal_hwloc_hwloc1111_save_xml"], - [enable_xml=$opal_hwloc_hwloc1111_save_xml]) - AS_IF([test -n "$opal_hwloc_hwloc1111_save_static"], - [enable_static=$opal_hwloc_hwloc1111_save_static]) - AS_IF([test -n "$opal_hwloc_hwloc1111_save_shared"], - [enable_shared=$opal_hwloc_hwloc1111_save_shared]) - AS_IF([test -n "$opal_hwloc_hwloc1111_save_plugins"], - [enable_plugins=$opal_hwloc_hwloc1111_save_shared]) - - CPPFLAGS=$opal_hwloc_hwloc1111_save_CPPFLAGS - LDFLAGS=$opal_hwloc_hwloc1111_save_LDFLAGS - LIBS=$opal_hwloc_hwloc1111_save_LIBS - - AC_SUBST([opal_hwloc_hwloc1111_CFLAGS]) - AC_SUBST([opal_hwloc_hwloc1111_CPPFLAGS]) - AC_SUBST([opal_hwloc_hwloc1111_LDFLAGS]) - AC_SUBST([opal_hwloc_hwloc1111_LIBS]) - - # Finally, add some flags to the wrapper compiler so that our - # headers can be found. - hwloc_hwloc1111_WRAPPER_EXTRA_LDFLAGS="$HWLOC_EMBEDDED_LDFLAGS" - hwloc_hwloc1111_WRAPPER_EXTRA_LIBS="$HWLOC_EMBEDDED_LIBS" - hwloc_hwloc1111_WRAPPER_EXTRA_CPPFLAGS='-I${pkgincludedir}/'"$opal_hwloc_hwloc1111_basedir/hwloc/include" - fi - - # Done! - AS_IF([test "$opal_hwloc_hwloc1111_support" = "yes"], - [$1], - [$2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/hwloc/hwloc1111/Makefile.am b/opal/mca/hwloc/hwloc1112/Makefile.am similarity index 82% rename from opal/mca/hwloc/hwloc1111/Makefile.am rename to opal/mca/hwloc/hwloc1112/Makefile.am index c0704225121..5ab0b36f50f 100644 --- a/opal/mca/hwloc/hwloc1111/Makefile.am +++ b/opal/mca/hwloc/hwloc1112/Makefile.am @@ -1,6 +1,8 @@ # # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014-2015 Intel, Inc. All right reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,16 +27,16 @@ EXTRA_DIST = \ SUBDIRS = hwloc # Headers and sources -headers = hwloc1111.h -sources = hwloc1111_component.c +headers = hwloc1112.h +sources = hwloc1112_component.c # We only ever build this component statically -noinst_LTLIBRARIES = libmca_hwloc_hwloc1111.la -libmca_hwloc_hwloc1111_la_SOURCES = $(headers) $(sources) -nodist_libmca_hwloc_hwloc1111_la_SOURCES = $(nodist_headers) -libmca_hwloc_hwloc1111_la_LDFLAGS = -module -avoid-version $(opal_hwloc_hwloc1111_LDFLAGS) -libmca_hwloc_hwloc1111_la_LIBADD = $(opal_hwloc_hwloc1111_LIBS) -libmca_hwloc_hwloc1111_la_DEPENDENCIES = \ +noinst_LTLIBRARIES = libmca_hwloc_hwloc1112.la +libmca_hwloc_hwloc1112_la_SOURCES = $(headers) $(sources) +nodist_libmca_hwloc_hwloc1112_la_SOURCES = $(nodist_headers) +libmca_hwloc_hwloc1112_la_LDFLAGS = -module -avoid-version $(opal_hwloc_hwloc1112_LDFLAGS) +libmca_hwloc_hwloc1112_la_LIBADD = $(opal_hwloc_hwloc1112_LIBS) +libmca_hwloc_hwloc1112_la_DEPENDENCIES = \ $(HWLOC_top_builddir)/src/libhwloc_embedded.la # Since the rest of the code base includes the underlying hwloc.h, we diff --git a/opal/mca/hwloc/hwloc1111/README-ompi.txt b/opal/mca/hwloc/hwloc1112/README-ompi.txt similarity index 100% rename from opal/mca/hwloc/hwloc1111/README-ompi.txt rename to opal/mca/hwloc/hwloc1112/README-ompi.txt diff --git a/opal/mca/hwloc/hwloc1112/configure.m4 b/opal/mca/hwloc/hwloc1112/configure.m4 new file mode 100644 index 00000000000..28f40a6d957 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/configure.m4 @@ -0,0 +1,174 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Priority +# +AC_DEFUN([MCA_opal_hwloc_hwloc1112_PRIORITY], [90]) + +# +# Force this component to compile in static-only mode +# +AC_DEFUN([MCA_opal_hwloc_hwloc1112_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# Include hwloc m4 files +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_pkg.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_visibility.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_vendor.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_components.m4) + +# MCA_hwloc_hwloc1112_POST_CONFIG() +# --------------------------------- +AC_DEFUN([MCA_opal_hwloc_hwloc1112_POST_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc1112_basedir]) + + # If we won, then do all the rest of the setup + AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc1112_support" = "yes"], + [ + # Set this variable so that the framework m4 knows what + # file to include in opal/mca/hwloc/hwloc.h + opal_hwloc_hwloc1112_basedir=opal/mca/hwloc/hwloc1112 + opal_hwloc_base_include="$opal_hwloc_hwloc1112_basedir/hwloc1112.h" + + # Add some stuff to CPPFLAGS so that the rest of the source + # tree can be built + file=$opal_hwloc_hwloc1112_basedir/hwloc + CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_SRCDIR/$file/include" + AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], + [CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_BUILDDIR/$file/include"]) + unset file + ]) + OPAL_VAR_SCOPE_POP + + # This must be run unconditionally + HWLOC_DO_AM_CONDITIONALS +])dnl + + +# MCA_hwloc_hwloc1112_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([MCA_opal_hwloc_hwloc1112_CONFIG],[ + # Hwloc needs to know if we have Verbs support + AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) + + AC_CONFIG_FILES([opal/mca/hwloc/hwloc1112/Makefile]) + + OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc1112_save_CPPFLAGS opal_hwloc_hwloc1112_save_LDFLAGS opal_hwloc_hwloc1112_save_LIBS opal_hwloc_hwloc1112_save_cairo opal_hwloc_hwloc1112_save_xml opal_hwloc_hwloc1112_basedir opal_hwloc_hwloc1112_file opal_hwloc_hwloc1112_save_cflags CPPFLAGS_save LIBS_save]) + + # default to this component not providing support + opal_hwloc_hwloc1112_basedir=opal/mca/hwloc/hwloc1112 + opal_hwloc_hwloc1112_support=no + + if test "$with_hwloc" = "internal" || test -z "$with_hwloc" || test "$with_hwloc" = "yes"; then + opal_hwloc_hwloc1112_save_CPPFLAGS=$CPPFLAGS + opal_hwloc_hwloc1112_save_LDFLAGS=$LDFLAGS + opal_hwloc_hwloc1112_save_LIBS=$LIBS + + # Run the hwloc configuration - set the prefix to minimize + # the chance that someone will use the internal symbols + HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1112_]) + + # save XML or graphical options + opal_hwloc_hwloc1112_save_cairo=$enable_cairo + opal_hwloc_hwloc1112_save_xml=$enable_xml + opal_hwloc_hwloc1112_save_static=$enable_static + opal_hwloc_hwloc1112_save_shared=$enable_shared + opal_hwloc_hwloc1112_save_plugins=$enable_plugins + + # never enable hwloc's graphical option + enable_cairo=no + + # never enable hwloc's plugin system + enable_plugins=no + enable_static=yes + enable_shared=no + + # Override -- disable hwloc's libxml2 support, but enable the + # native hwloc XML support + enable_libxml2=no + enable_xml=yes + + # hwloc checks for compiler visibility, and its needs to do + # this without "picky" flags. + opal_hwloc_hwloc1112_save_cflags=$CFLAGS + CFLAGS=$OPAL_CFLAGS_BEFORE_PICKY + HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1112/hwloc], + [AC_MSG_CHECKING([whether hwloc configure succeeded]) + AC_MSG_RESULT([yes]) + HWLOC_VERSION="internal v`$srcdir/$opal_hwloc_hwloc1112_basedir/hwloc/config/hwloc_get_version.sh $srcdir/$opal_hwloc_hwloc1112_basedir/hwloc/VERSION`" + + # Build flags for our Makefile.am + opal_hwloc_hwloc1112_LDFLAGS='$(HWLOC_EMBEDDED_LDFLAGS)' + opal_hwloc_hwloc1112_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_hwloc_hwloc1112_basedir"'/hwloc/src/libhwloc_embedded.la $(HWLOC_EMBEDDED_LIBS)' + opal_hwloc_hwloc1112_support=yes + + AC_DEFINE_UNQUOTED([HWLOC_HWLOC1112_HWLOC_VERSION], + ["$HWLOC_VERSION"], + [Version of hwloc]) + + # Do we have verbs support? + CPPFLAGS_save=$CPPFLAGS + AS_IF([test "$opal_want_verbs" = "yes"], + [CPPFLAGS="-I$opal_verbs_dir/include $CPPFLAGS"]) + AC_CHECK_HEADERS([infiniband/verbs.h]) + CPPFLAGS=$CPPFLAGS_save + ], + [AC_MSG_CHECKING([whether hwloc configure succeeded]) + AC_MSG_RESULT([no]) + opal_hwloc_hwloc1112_support=no]) + CFLAGS=$opal_hwloc_hwloc1112_save_cflags + + # Restore some env variables, if necessary + AS_IF([test -n "$opal_hwloc_hwloc1112_save_cairo"], + [enable_cairo=$opal_hwloc_hwloc1112_save_cairo]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_xml"], + [enable_xml=$opal_hwloc_hwloc1112_save_xml]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_static"], + [enable_static=$opal_hwloc_hwloc1112_save_static]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_shared"], + [enable_shared=$opal_hwloc_hwloc1112_save_shared]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_plugins"], + [enable_plugins=$opal_hwloc_hwloc1112_save_shared]) + + CPPFLAGS=$opal_hwloc_hwloc1112_save_CPPFLAGS + LDFLAGS=$opal_hwloc_hwloc1112_save_LDFLAGS + LIBS=$opal_hwloc_hwloc1112_save_LIBS + + AC_SUBST([opal_hwloc_hwloc1112_CFLAGS]) + AC_SUBST([opal_hwloc_hwloc1112_CPPFLAGS]) + AC_SUBST([opal_hwloc_hwloc1112_LDFLAGS]) + AC_SUBST([opal_hwloc_hwloc1112_LIBS]) + + # Finally, add some flags to the wrapper compiler so that our + # headers can be found. + hwloc_hwloc1112_WRAPPER_EXTRA_LDFLAGS="$HWLOC_EMBEDDED_LDFLAGS" + hwloc_hwloc1112_WRAPPER_EXTRA_LIBS="$HWLOC_EMBEDDED_LIBS" + hwloc_hwloc1112_WRAPPER_EXTRA_CPPFLAGS='-I${pkgincludedir}/'"$opal_hwloc_hwloc1112_basedir/hwloc/include" + fi + + # Done! + AS_IF([test "$opal_hwloc_hwloc1112_support" = "yes"], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/hwloc/hwloc1111/hwloc/AUTHORS b/opal/mca/hwloc/hwloc1112/hwloc/AUTHORS similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/AUTHORS rename to opal/mca/hwloc/hwloc1112/hwloc/AUTHORS diff --git a/opal/mca/hwloc/hwloc1111/hwloc/COPYING b/opal/mca/hwloc/hwloc1112/hwloc/COPYING similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/COPYING rename to opal/mca/hwloc/hwloc1112/hwloc/COPYING diff --git a/opal/mca/hwloc/hwloc1111/hwloc/Makefile.am b/opal/mca/hwloc/hwloc1112/hwloc/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/Makefile.am rename to opal/mca/hwloc/hwloc1112/hwloc/Makefile.am diff --git a/opal/mca/hwloc/hwloc1111/hwloc/NEWS b/opal/mca/hwloc/hwloc1112/hwloc/NEWS similarity index 95% rename from opal/mca/hwloc/hwloc1111/hwloc/NEWS rename to opal/mca/hwloc/hwloc1112/hwloc/NEWS index 9638d63f0c2..28db7162783 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/NEWS +++ b/opal/mca/hwloc/hwloc1112/hwloc/NEWS @@ -1,5 +1,5 @@ Copyright © 2009 CNRS -Copyright © 2009-2015 Inria. All rights reserved. +Copyright © 2009-2016 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. @@ -17,6 +17,54 @@ bug fixes (and other actions) for each version of hwloc since version in v0.9.1). +Version 1.11.3 +-------------- +* Fix /proc/mounts parsing on Linux by using mntent.h. + Thanks to Nathan Hjelm for reporting the issue. + + +Version 1.11.2 +-------------- +* Improve support for Intel Knights Landing Xeon Phi on Linux: + + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory + (MCDRAM) together through "Cluster" groups so that the local MCDRAM is + easy to find. + - See "How do I find the local MCDRAM NUMA node on Intel Knights + Landing Xeon Phi?" in the documentation. + - For uniformity across all KNL configurations, always have a NUMA node + object even if the host is UMA. + + Fix the detection of the memory-side cache: + - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information + into /var/run/hwloc/ as root during boot, and load this dumped + information from the hwloc library at runtime. + - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights + Landing Xeon Phi?" in the documentation. + Thanks to Grzegorz Andrejczuk for the patches and for the help. +* The x86 and linux backends may now be combined for discovering CPUs + through x86 CPUID and memory from the Linux kernel. + This is useful for working around buggy CPU information reported by Linux + (for instance the AMD Bulldozer/Piledriver bug below). + Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment. +* Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer) + in the x86 backend. Thanks to many users who helped. +* Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1 + for AMD Opteron 61xx (Magny-Cours) processors. +* The x86 backend may now add the info attribute Inclusive=0 or 1 to caches + it discovers, or to caches discovered by other backends earlier. + Thanks to Guillaume Beauchamp for the patch. +* Fix the management on alloc_membind() allocation failures on AIX, HP-UX + and OSF/Tru64. +* Fix spurious failures to load with ENOMEM on AIX in case of Misc objects + below PUs. +* lstopo improvements in X11 and Windows graphical mode: + + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale, + or fit the entire window. + + Display all keyboard shortcuts in the console. +* Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0 + in the environment when --enable-debug was passed to configure. +* Add a FAQ entry "What are these Group objects in my topology?". + + Version 1.11.1 -------------- * Detection fixes diff --git a/opal/mca/hwloc/hwloc1111/hwloc/README b/opal/mca/hwloc/hwloc1112/hwloc/README similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/README rename to opal/mca/hwloc/hwloc1112/hwloc/README index 6332fbce930..592d459f842 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/README +++ b/opal/mca/hwloc/hwloc1112/hwloc/README @@ -25,9 +25,9 @@ using PLPA has already switched to hwloc. hwloc supports the following operating systems: * Linux (including old kernels not having sysfs topology information, with - knowledge of cpusets, offline CPUs, ScaleMP vSMP, NumaScale NumaConnect, - and Kerrighed support) on all supported hardware, including Intel Xeon Phi - (either standalone or as a coprocessor). + knowledge of cpusets, offline CPUs, ScaleMP vSMP and Kerrighed support) on + all supported hardware, including Intel Xeon Phi (KNL and KNC, either + standalone or as a coprocessor) and NumaScale NumaConnect. * Solaris * AIX * Darwin / OS X diff --git a/opal/mca/hwloc/hwloc1112/hwloc/README-ompi.txt b/opal/mca/hwloc/hwloc1112/hwloc/README-ompi.txt new file mode 100644 index 00000000000..78bf7af6f05 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/README-ompi.txt @@ -0,0 +1,3 @@ +Cherry-picked commits after 1.11.2: + +open-mpi/hwloc@d2d07b9a2268699e13e1644b4f2ef7a53ef7396c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/VERSION b/opal/mca/hwloc/hwloc1112/hwloc/VERSION similarity index 96% rename from opal/mca/hwloc/hwloc1111/hwloc/VERSION rename to opal/mca/hwloc/hwloc1112/hwloc/VERSION index b1361597cf2..c809f10a05a 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/VERSION +++ b/opal/mca/hwloc/hwloc1112/hwloc/VERSION @@ -7,7 +7,7 @@ major=1 minor=11 -release=1 +release=2 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -20,7 +20,7 @@ greek= # The date when this release was created -date="Oct 15, 2015" +date="Dec 17, 2015" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -39,4 +39,4 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=11:7:6 +libhwloc_so_version=11:8:6 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/distscript.sh b/opal/mca/hwloc/hwloc1112/hwloc/config/distscript.sh similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/distscript.sh rename to opal/mca/hwloc/hwloc1112/hwloc/config/distscript.sh diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4 similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4 index e364e6c0e92..1b4f490591e 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc.m4 +++ b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4 @@ -1213,6 +1213,7 @@ AC_DEFUN([HWLOC_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([HWLOC_HAVE_WINDOWS], [test "x$hwloc_windows" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_MINGW32], [test "x$target_os" = "xmingw32"]) + AM_CONDITIONAL([HWLOC_HAVE_X86], [test "x$hwloc_x86_32" = "xyes" -o "x$hwloc_x86_64" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_X86_32], [test "x$hwloc_x86_32" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_X86_64], [test "x$hwloc_x86_64" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_X86_CPUID], [test "x$hwloc_have_x86_cpuid" = "xyes"]) diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_attributes.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_attributes.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_vendor.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_vendor.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_vendor.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_vendor.m4 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_visibility.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_visibility.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_check_visibility.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_visibility.m4 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_components.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_components.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_components.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_components.m4 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_get_version.sh b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_get_version.sh similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_get_version.sh rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_get_version.sh diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_internal.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_internal.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_internal.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_internal.m4 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_pkg.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_pkg.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/hwloc_pkg.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_pkg.m4 diff --git a/opal/mca/hwloc/hwloc1111/hwloc/config/test-driver b/opal/mca/hwloc/hwloc1112/hwloc/config/test-driver similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/config/test-driver rename to opal/mca/hwloc/hwloc1112/hwloc/config/test-driver diff --git a/opal/mca/hwloc/hwloc1111/hwloc/configure.ac b/opal/mca/hwloc/hwloc1112/hwloc/configure.ac similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/configure.ac rename to opal/mca/hwloc/hwloc1112/hwloc/configure.ac diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/hwloc-valgrind.supp b/opal/mca/hwloc/hwloc1112/hwloc/contrib/hwloc-valgrind.supp similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/hwloc-valgrind.supp rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/hwloc-valgrind.supp diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/README b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/README similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/README rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/README diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-annotate.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-annotate.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-assembler.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-assembler.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-bind.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-bind.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-bind.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-bind.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-calc.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-calc.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-calc.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-calc.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-diff.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-diff.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-diff.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-diff.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distances.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distances.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distances.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distances.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distrib.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distrib.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-info.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-info.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-info.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-info.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-patch.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-patch.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-patch.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc-patch.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc.sln b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc.sln similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc.sln rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc.sln diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc_config.h b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc_config.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/hwloc_config.h rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc_config.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/libhwloc.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/libhwloc.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/libhwloc.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/libhwloc.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-no-graphics.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-no-graphics.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-win.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-win.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-win.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo-win.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo.vcxproj rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj.filters similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/lstopo.vcxproj.filters rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj.filters diff --git a/opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/private_config.h b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/private_config.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/contrib/windows/private_config.h rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/private_config.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/doc/README.txt b/opal/mca/hwloc/hwloc1112/hwloc/doc/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/doc/README.txt rename to opal/mca/hwloc/hwloc1112/hwloc/doc/README.txt diff --git a/opal/mca/hwloc/hwloc1111/hwloc/hwloc.pc.in b/opal/mca/hwloc/hwloc1112/hwloc/hwloc.pc.in similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/hwloc.pc.in rename to opal/mca/hwloc/hwloc1112/hwloc/hwloc.pc.in diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/Makefile.am b/opal/mca/hwloc/hwloc1112/hwloc/include/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/Makefile.am rename to opal/mca/hwloc/hwloc1112/hwloc/include/Makefile.am diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc.h similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc.h index 6220dcb7439..deb5141fc7e 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc.h @@ -211,6 +211,7 @@ typedef enum { * expose their arbitrary processors aggregation this * way. And hwloc may insert such objects to group * NUMA nodes according to their distances. + * See also \ref faq_groups. * * These objects are ignored when they do not bring * any structure. diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/autogen/config.h.in b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/autogen/config.h.in similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/autogen/config.h.in rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/autogen/config.h.in diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/bitmap.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/bitmap.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/bitmap.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/bitmap.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/cuda.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cuda.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/cuda.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cuda.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/cudart.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cudart.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/cudart.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cudart.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/deprecated.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/deprecated.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/deprecated.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/deprecated.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/diff.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/diff.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/diff.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/diff.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/gl.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/gl.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/gl.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/gl.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/glibc-sched.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/glibc-sched.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/glibc-sched.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/glibc-sched.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/helper.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/helper.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/helper.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/helper.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/inlines.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/inlines.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/inlines.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/inlines.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/intel-mic.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/intel-mic.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/intel-mic.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/intel-mic.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/linux-libnuma.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux-libnuma.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/linux-libnuma.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux-libnuma.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/linux.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/linux.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/myriexpress.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/myriexpress.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/myriexpress.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/myriexpress.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/nvml.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/nvml.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/nvml.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/nvml.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/opencl.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/opencl.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/opencl.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/opencl.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/openfabrics-verbs.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/openfabrics-verbs.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/openfabrics-verbs.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/openfabrics-verbs.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/plugins.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/plugins.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/plugins.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/plugins.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/rename.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/rename.h similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/rename.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/rename.h index 496504a9f06..27a6f9d12c6 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/include/hwloc/rename.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2014 Inria. All rights reserved. + * Copyright © 2010-2015 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -510,6 +510,7 @@ extern "C" { /* private/debug.h */ +#define hwloc_debug_enabled HWLOC_NAME(debug_enabled) #define hwloc_debug HWLOC_NAME(debug) /* private/misc.h */ diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/autogen/config.h.in b/opal/mca/hwloc/hwloc1112/hwloc/include/private/autogen/config.h.in similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/autogen/config.h.in rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/autogen/config.h.in diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/components.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/components.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/components.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/components.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/cpuid-x86.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/cpuid-x86.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/cpuid-x86.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/cpuid-x86.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/debug.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/debug.h similarity index 66% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/debug.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/debug.h index 4de91bf8ae8..2038a4cfe5e 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/include/private/debug.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/private/debug.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2012 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009, 2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -18,36 +18,57 @@ #include #endif +#ifdef HWLOC_DEBUG +static __hwloc_inline int hwloc_debug_enabled(void) +{ + static int checked = 0; + static int enabled = 1; + if (!checked) { + const char *env = getenv("HWLOC_DEBUG_VERBOSE"); + if (env) + enabled = atoi(env); + if (enabled) + fprintf(stderr, "hwloc verbose debug enabled, may be disabled with HWLOC_DEBUG_VERBOSE=0 in the environment.\n"); + checked = 1; + } + return enabled; +} +#endif + static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...) { #ifdef HWLOC_DEBUG + if (hwloc_debug_enabled()) { va_list ap; - va_start(ap, s); vfprintf(stderr, s, ap); va_end(ap); + } #endif } #ifdef HWLOC_DEBUG #define hwloc_debug_bitmap(fmt, bitmap) do { \ +if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ fprintf(stderr, fmt, s); \ free(s); \ -} while (0) +} } while (0) #define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \ +if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ fprintf(stderr, fmt, arg1, s); \ free(s); \ -} while (0) +} } while (0) #define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \ +if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ fprintf(stderr, fmt, arg1, arg2, s); \ free(s); \ -} while (0) +} } while (0) #else #define hwloc_debug_bitmap(s, bitmap) do { } while(0) #define hwloc_debug_1arg_bitmap(s, arg1, bitmap) do { } while(0) diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/misc.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/misc.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/misc.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/misc.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/private.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/private.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/private.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/private.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/solaris-chiptype.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/solaris-chiptype.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/solaris-chiptype.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/solaris-chiptype.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/include/private/xml.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/xml.h similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/include/private/xml.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/xml.h diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/Makefile.am b/opal/mca/hwloc/hwloc1112/hwloc/src/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/Makefile.am rename to opal/mca/hwloc/hwloc1112/hwloc/src/Makefile.am diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/base64.c b/opal/mca/hwloc/hwloc1112/hwloc/src/base64.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/base64.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/base64.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/bind.c b/opal/mca/hwloc/hwloc1112/hwloc/src/bind.c similarity index 94% rename from opal/mca/hwloc/hwloc1111/hwloc/src/bind.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/bind.c index a0cbfd8a3a6..afef5e8f0db 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/bind.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/bind.c @@ -74,9 +74,13 @@ hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set, int flags if (topology->binding_hooks.set_thisthread_cpubind) return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags); } else { - if (topology->binding_hooks.set_thisproc_cpubind) - return topology->binding_hooks.set_thisproc_cpubind(topology, set, flags); - else if (topology->binding_hooks.set_thisthread_cpubind) + if (topology->binding_hooks.set_thisproc_cpubind) { + int err = topology->binding_hooks.set_thisproc_cpubind(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.set_thisthread_cpubind) return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags); } @@ -94,9 +98,13 @@ hwloc_get_cpubind(hwloc_topology_t topology, hwloc_bitmap_t set, int flags) if (topology->binding_hooks.get_thisthread_cpubind) return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags); } else { - if (topology->binding_hooks.get_thisproc_cpubind) - return topology->binding_hooks.get_thisproc_cpubind(topology, set, flags); - else if (topology->binding_hooks.get_thisthread_cpubind) + if (topology->binding_hooks.get_thisproc_cpubind) { + int err = topology->binding_hooks.get_thisproc_cpubind(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_cpubind) return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags); } @@ -164,9 +172,13 @@ hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t set, int f if (topology->binding_hooks.get_thisthread_last_cpu_location) return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags); } else { - if (topology->binding_hooks.get_thisproc_last_cpu_location) - return topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags); - else if (topology->binding_hooks.get_thisthread_last_cpu_location) + if (topology->binding_hooks.get_thisproc_last_cpu_location) { + int err = topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_last_cpu_location) return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags); } @@ -272,9 +284,13 @@ hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodes if (topology->binding_hooks.set_thisthread_membind) return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags); } else { - if (topology->binding_hooks.set_thisproc_membind) - return topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags); - else if (topology->binding_hooks.set_thisthread_membind) + if (topology->binding_hooks.set_thisproc_membind) { + int err = topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.set_thisthread_membind) return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags); } @@ -307,9 +323,13 @@ hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hw if (topology->binding_hooks.get_thisthread_membind) return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags); } else { - if (topology->binding_hooks.get_thisproc_membind) - return topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags); - else if (topology->binding_hooks.get_thisthread_membind) + if (topology->binding_hooks.get_thisproc_membind) { + int err = topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_membind) return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags); } diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/bitmap.c b/opal/mca/hwloc/hwloc1112/hwloc/src/bitmap.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/bitmap.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/bitmap.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/components.c b/opal/mca/hwloc/hwloc1112/hwloc/src/components.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/components.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/components.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/diff.c b/opal/mca/hwloc/hwloc1112/hwloc/src/diff.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/diff.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/diff.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/distances.c b/opal/mca/hwloc/hwloc1112/hwloc/src/distances.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/distances.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/distances.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/dolib.c b/opal/mca/hwloc/hwloc1112/hwloc/src/dolib.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/dolib.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/dolib.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/hwloc.dtd b/opal/mca/hwloc/hwloc1112/hwloc/src/hwloc.dtd similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/hwloc.dtd rename to opal/mca/hwloc/hwloc1112/hwloc/src/hwloc.dtd diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/misc.c b/opal/mca/hwloc/hwloc1112/hwloc/src/misc.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/misc.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/misc.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/pci-common.c b/opal/mca/hwloc/hwloc1112/hwloc/src/pci-common.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/pci-common.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/pci-common.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-aix.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-aix.c similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-aix.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-aix.c index 1b98ba6b92b..37812f3ab81 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-aix.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-aix.c @@ -585,7 +585,7 @@ hwloc_aix_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodes ret = ra_mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0, R_RSET, rsid, aix_policy); rs_free(rsid.at_rset); - return ret; + return ret == (void*)-1 ? NULL : ret; } #endif /* P_DEFAULT */ diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-bgq.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-bgq.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-bgq.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-bgq.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-cuda.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-cuda.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-cuda.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-cuda.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-custom.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-custom.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-custom.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-custom.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-darwin.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-darwin.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-darwin.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-darwin.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-fake.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-fake.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-fake.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-fake.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-freebsd.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-freebsd.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-freebsd.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-freebsd.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-gl.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-gl.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-gl.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-gl.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-hardwired.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hardwired.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-hardwired.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-hardwired.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-hpux.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hpux.c similarity index 98% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-hpux.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-hpux.c index 44a4a4c41aa..44258cc2a2a 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-hpux.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hpux.c @@ -142,6 +142,7 @@ static void* hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { int mmap_flags; + void *p; /* Can not give a set of nodes. */ if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) { @@ -165,7 +166,8 @@ hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_node return NULL; } - return mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0); + p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0); + return p == MAP_FAILED ? NULL : p; } #endif /* MAP_MEM_FIRST_TOUCH */ diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-linux.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-linux.c similarity index 93% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-linux.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-linux.c index b1199d35197..c76f97c2506 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-linux.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-linux.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2013, 2015 Université Bordeaux * Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright © 2015 Intel, Inc. All rights reserved. @@ -36,18 +36,21 @@ #include #include #include +#include #if defined HWLOC_HAVE_SET_MEMPOLICY || defined HWLOC_HAVE_MBIND #define migratepages migrate_pages /* workaround broken migratepages prototype in numaif.h before libnuma 2.0.2 */ #include #endif struct hwloc_linux_backend_data_s { + char *root_path; /* NULL if unused */ int root_fd; /* The file descriptor for the file system root, used when browsing, e.g., Linux' sysfs and procfs. */ int is_real_fsroot; /* Boolean saying whether root_fd points to the real filesystem root of the system */ #ifdef HAVE_LIBUDEV_H struct udev *udev; /* Global udev context */ #endif - + char *dumped_hwdata_dirname; + int is_knl; struct utsname utsname; /* fields contain \0 when unknown */ int deprecated_classlinks_model; /* -2 if never tried, -1 if unknown, 0 if new (device contains class/name), 1 if old (device contains class:name) */ @@ -1651,96 +1654,40 @@ hwloc_parse_cpumap(const char *mappath, int fsroot_fd) return set; } -static char * -hwloc_strdup_mntpath(const char *escapedpath, size_t length) -{ - char *path = malloc(length+1); - const char *src = escapedpath, *tmp; - char *dst = path; - - while ((tmp = strchr(src, '\\')) != NULL) { - strncpy(dst, src, tmp-src); - dst += tmp-src; - if (!strncmp(tmp+1, "040", 3)) - *dst = ' '; - else if (!strncmp(tmp+1, "011", 3)) - *dst = ' '; - else if (!strncmp(tmp+1, "012", 3)) - *dst = '\n'; - else - *dst = '\\'; - dst++; - src = tmp+4; - } - - strcpy(dst, src); - - return path; -} - static void -hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, int fsroot_fd) +hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, const char *root_path) { -#define PROC_MOUNT_LINE_LEN 512 - char line[PROC_MOUNT_LINE_LEN]; + char *mount_path; + struct mntent *mntent; FILE *fd; + int err; *cgroup_mntpnt = NULL; *cpuset_mntpnt = NULL; - /* ideally we should use setmntent, getmntent, hasmntopt and endmntent, - * but they do not support fsroot_fd. - */ - - fd = hwloc_fopen("/proc/mounts", "r", fsroot_fd); + if (root_path) { + /* setmntent() doesn't support openat(), so use the root_path directly */ + err = asprintf(&mount_path, "%s/proc/mounts", root_path); + if (err < 0) + return; + fd = setmntent(mount_path, "r"); + free(mount_path); + } else { + fd = setmntent("/proc/mounts", "r"); + } if (!fd) return; - while (fgets(line, sizeof(line), fd)) { - char *path; - char *type; - char *tmp; - - /* remove the ending " 0 0\n" that the kernel always adds */ - tmp = line + strlen(line) - 5; - if (tmp < line || strcmp(tmp, " 0 0\n")) - fprintf(stderr, "Unexpected end of /proc/mounts line `%s'\n", line); - else - *tmp = '\0'; - - /* path is after first field and a space */ - tmp = strchr(line, ' '); - if (!tmp) - continue; - path = tmp+1; - - /* type is after path, which may not contain spaces since the kernel escaped them to \040 - * (see the manpage of getmntent) */ - tmp = strchr(path, ' '); - if (!tmp) - continue; - type = tmp+1; - /* mark the end of path to ease upcoming strdup */ - *tmp = '\0'; - - if (!strncmp(type, "cpuset ", 7)) { - /* found a cpuset mntpnt */ - hwloc_debug("Found cpuset mount point on %s\n", path); - *cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path); + while ((mntent = getmntent(fd)) != NULL) { + if (!strcmp(mntent->mnt_type, "cpuset")) { + hwloc_debug("Found cpuset mount point on %s\n", mntent->mnt_dir); + *cpuset_mntpnt = strdup(mntent->mnt_dir); break; - - } else if (!strncmp(type, "cgroup ", 7)) { + } else if (!strcmp(mntent->mnt_type, "cgroup")) { /* found a cgroup mntpnt */ - char *opt, *opts; + char *opt, *opts = mntent->mnt_opts; int cpuset_opt = 0; int noprefix_opt = 0; - - /* find options */ - tmp = strchr(type, ' '); - if (!tmp) - continue; - opts = tmp+1; - /* look at options */ while ((opt = strsep(&opts, ",")) != NULL) { if (!strcmp(opt, "cpuset")) @@ -1750,19 +1697,18 @@ hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, int f } if (!cpuset_opt) continue; - if (noprefix_opt) { - hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", path); - *cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path); + hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", mntent->mnt_dir); + *cpuset_mntpnt = strdup(mntent->mnt_dir); } else { - hwloc_debug("Found cgroup/cpuset mount point on %s\n", path); - *cgroup_mntpnt = hwloc_strdup_mntpath(path, type-path); + hwloc_debug("Found cgroup/cpuset mount point on %s\n", mntent->mnt_dir); + *cgroup_mntpnt = strdup(mntent->mnt_dir); } break; } } - fclose(fd); + endmntent(fd); } /* @@ -2801,6 +2747,96 @@ look_powerpc_device_tree(struct hwloc_topology *topology, free(cpus.p); } +/* Try to add memory-side caches for KNL. + * Returns 0 on success and -1 otherwise */ +static int hwloc_linux_try_add_knl_mcdram_caches(hwloc_topology_t topology, struct hwloc_linux_backend_data_s *data, hwloc_obj_t *nodes, unsigned nbnodes) +{ + char *knl_cache_file; + long long int cache_size = -1; + int associativity = -1; + int inclusiveness = -1; + int line_size = -1; + unsigned i; + FILE *f; + char buffer[512] = {0}; + char *data_beg = NULL; + char *data_end = NULL; + + if (asprintf(&knl_cache_file, "%s/knl_memoryside_cache", data->dumped_hwdata_dirname) < 0) + return -1; + + hwloc_debug("Reading knl cache data from: %s\n", knl_cache_file); + f = hwloc_fopen(knl_cache_file, "r", data->root_fd); + if (!f) { + hwloc_debug("Unable to open KNL data file `%s' (%s)\n", knl_cache_file, strerror(errno)); + free(knl_cache_file); + return -1; + } + free(knl_cache_file); + + data_beg = &buffer[0]; + data_end = data_beg + fread(buffer, 1, sizeof(buffer), f); + + /* file must start with version information, only 1 accepted for now */ + if (strncmp("version: 1\n", data_beg, strlen("version: 1\n"))) { + fprintf(stderr, "Invalid knl_memoryside_cache header, expected \"version: 1\".\n"); + fclose(f); + return -1; + } + data_beg += strlen("version: 1\n"); + + while (data_beg < data_end) { + char *line_end = strstr(data_beg, "\n"); + if (!line_end) + break; + if (!strncmp("cache_size:", data_beg, strlen("cache_size"))) { + sscanf(data_beg, "cache_size: %lld", &cache_size); + hwloc_debug("read cache_size=%lld\n", cache_size); + } else if (!strncmp("line_size:", data_beg, strlen("line_size:"))) { + sscanf(data_beg, "line_size: %d", &line_size); + hwloc_debug("read line_size=%d\n", line_size); + } else if (!strncmp("inclusiveness:", data_beg, strlen("inclusiveness:"))) { + sscanf(data_beg, "inclusiveness: %d", &inclusiveness); + hwloc_debug("read inclusiveness=%d\n", inclusiveness); + } else if (!strncmp("associativity:", data_beg, strlen("associativity:"))) { + sscanf(data_beg, "associativity: %d\n", &associativity); + hwloc_debug("read associativity=%d\n", associativity); + } + data_beg += line_end - data_beg +1; + } + + fclose(f); + + if (line_size == -1 || cache_size == -1 || associativity == -1 || inclusiveness == -1) { + hwloc_debug("Incorrect file format line_size=%d cache_size=%lld associativity=%d inclusiveness=%d\n", + line_size, cache_size, associativity, inclusiveness); + return -1; + } + + for(i=0; icpuset)) + /* one L3 per DDR, none for MCDRAM nodes */ + continue; + + cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + if (!cache) + return -1; + + cache->attr->cache.depth = 3; + cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + cache->attr->cache.associativity = associativity; + hwloc_obj_add_info(cache, "Inclusive", inclusiveness ? "1" : "0"); + cache->attr->cache.size = cache_size; + cache->attr->cache.linesize = line_size; + cache->cpuset = hwloc_bitmap_dup(nodes[i]->cpuset); + hwloc_obj_add_info(cache, "Type", "MemorySideCache"); + hwloc_insert_object_by_cpuset(topology, cache); + } + return 0; +} + /************************************** @@ -2838,18 +2874,17 @@ look_sysfsnode(struct hwloc_topology *topology, else return -1; - if (nbnodes <= 1) - { - hwloc_bitmap_free(nodeset); - return 0; - } + if (!nbnodes || (nbnodes == 1 && !data->is_knl)) { /* always keep NUMA for KNL, or configs might look too different */ + hwloc_bitmap_free(nodeset); + return 0; + } /* For convenience, put these declarations inside a block. */ { hwloc_obj_t * nodes = calloc(nbnodes, sizeof(hwloc_obj_t)); unsigned *indexes = calloc(nbnodes, sizeof(unsigned)); - float * distances; + float * distances = NULL; int failednodes = 0; unsigned index_; @@ -2886,6 +2921,7 @@ look_sysfsnode(struct hwloc_topology *topology, char nodepath[SYSFS_NUMA_NODE_PATH_LEN]; hwloc_bitmap_t cpuset; hwloc_obj_t node, res_obj; + int annotate; osnode = indexes[index_]; @@ -2897,32 +2933,43 @@ look_sysfsnode(struct hwloc_topology *topology, continue; } - node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, osnode); - node->cpuset = cpuset; - node->nodeset = hwloc_bitmap_alloc(); - hwloc_bitmap_set(node->nodeset, osnode); - + node = hwloc_get_numanode_obj_by_os_index(topology, osnode); + annotate = (node != NULL); + if (!annotate) { + /* create a new node */ + node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, osnode); + node->cpuset = cpuset; + node->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(node->nodeset, osnode); + } hwloc_sysfs_node_meminfo_info(topology, data, path, osnode, &node->memory); hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", osnode, node->cpuset); - res_obj = hwloc_insert_object_by_cpuset(topology, node); - if (node == res_obj) { + + if (annotate) { nodes[index_] = node; } else { - /* We got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset. - * This object disappeared, we'll ignore distances */ - failednodes++; + res_obj = hwloc_insert_object_by_cpuset(topology, node); + if (node == res_obj) { + nodes[index_] = node; + } else { + /* We got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset. + * This object disappeared, we'll ignore distances */ + failednodes++; + } } } + if (!failednodes && data->is_knl) + hwloc_linux_try_add_knl_mcdram_caches(topology, data, nodes, nbnodes); + if (failednodes) { /* failed to read/create some nodes, don't bother reading/fixing * a distance matrix that would likely be wrong anyway. */ nbnodes -= failednodes; - distances = NULL; - } else { + } else if (nbnodes > 1) { distances = calloc(nbnodes*nbnodes, sizeof(float)); } @@ -2944,6 +2991,46 @@ look_sysfsnode(struct hwloc_topology *topology, hwloc_parse_node_distance(nodepath, nbnodes, distances+index_*nbnodes, data->root_fd); } + if (data->is_knl) { + char *env = getenv("HWLOC_KNL_NUMA_QUIRK"); + if (!(env && !atoi(env)) && nbnodes>=2) { /* SNC2 or SNC4, with 0 or 2/4 MCDRAM, and 0-4 DDR nodes */ + unsigned i, j, closest; + for(i=0; icpuset)) + /* nodes with CPU, that's DDR, skip it */ + continue; + hwloc_obj_add_info(nodes[i], "Type", "MCDRAM"); + + /* DDR is the closest node with CPUs */ + closest = (unsigned)-1; + for(j=0; jcpuset)) + /* nodes without CPU, that's another MCDRAM, skip it */ + continue; + if (closest == (unsigned)-1 || distances[i*nbnodes+j]cpuset = hwloc_bitmap_dup(nodes[i]->cpuset); + cluster->nodeset = hwloc_bitmap_dup(nodes[i]->nodeset); + hwloc_bitmap_or(cluster->cpuset, cluster->cpuset, nodes[closest]->cpuset); + hwloc_bitmap_or(cluster->nodeset, cluster->nodeset, nodes[closest]->nodeset); + hwloc_obj_add_info(cluster, "Type", "Cluster"); + hwloc_insert_object_by_cpuset(topology, cluster); + } + } + /* drop the distance matrix, it contradicts the above NUMA layout groups */ + free(distances); + free(nodes); + free(indexes); + goto out; + } + } + hwloc_distances_set(topology, HWLOC_OBJ_NUMANODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */); } @@ -3224,6 +3311,11 @@ look_sysfscpu(struct hwloc_topology *topology, kB = atol(str2); /* in kB */ fclose(fd); } + /* KNL reports L3 with size=0 and full cpuset in cpuid. + * Let hwloc_linux_try_add_knl_mcdram_cache() detect it better. + */ + if (!kB && depth == 2 && data->is_knl) + continue; /* get the line size */ sprintf(mappath, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j); @@ -3459,6 +3551,7 @@ hwloc_linux_parse_cpuinfo_generic(const char *prefix, const char *value, return 0; } +/* Lprocs_p set to NULL unless returns > 0 */ static int hwloc_linux_parse_cpuinfo(struct hwloc_linux_backend_data_s *data, const char *path, @@ -3606,6 +3699,7 @@ hwloc_linux_parse_cpuinfo(struct hwloc_linux_backend_data_s *data, fclose(fd); free(str); free(Lprocs); + *Lprocs_p = NULL; return -1; } @@ -3625,18 +3719,13 @@ hwloc_linux_free_cpuinfo(struct hwloc_linux_cpuinfo_proc * Lprocs, unsigned nump static int look_cpuinfo(struct hwloc_topology *topology, - struct hwloc_linux_backend_data_s *data, - const char *path, hwloc_bitmap_t online_cpuset) + struct hwloc_linux_cpuinfo_proc * Lprocs, + unsigned numprocs, hwloc_bitmap_t online_cpuset) { - struct hwloc_linux_cpuinfo_proc * Lprocs = NULL; - struct hwloc_obj_info_s *global_infos = NULL; - unsigned global_infos_count = 0; /* P for physical/OS index, L for logical (e.g. in we order we get them, not in the final hwloc logical order) */ unsigned *Lcore_to_Pcore; unsigned *Lcore_to_Ppkg; /* needed because Lcore is equivalent to Pcore+Ppkg, not to Pcore alone */ unsigned *Lpkg_to_Ppkg; - int _numprocs; - unsigned numprocs; unsigned numpkgs=0; unsigned numcores=0; unsigned long Lproc; @@ -3645,20 +3734,6 @@ look_cpuinfo(struct hwloc_topology *topology, unsigned i,j; hwloc_bitmap_t cpuset; - /* parse the entire cpuinfo first, fill the Lprocs array and numprocs */ - _numprocs = hwloc_linux_parse_cpuinfo(data, path, &Lprocs, &global_infos, &global_infos_count); - - - /* setup root info */ - hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count, - &global_infos, &global_infos_count); - - - if (_numprocs <= 0) - /* found no processor */ - return -1; - numprocs = _numprocs; - /* initialize misc arrays, there can be at most numprocs entries */ Lcore_to_Pcore = malloc(numprocs * sizeof(*Lcore_to_Pcore)); Lcore_to_Ppkg = malloc(numprocs * sizeof(*Lcore_to_Ppkg)); @@ -3713,7 +3788,7 @@ look_cpuinfo(struct hwloc_topology *topology, * provide bogus information. We should rather drop it. */ missingpkg=0; for(j=0; jlevels[0][0]->cpuset) - /* somebody discovered things */ - return 0; + already_pus = (topology->levels[0][0]->complete_cpuset != NULL + && !hwloc_bitmap_iszero(topology->levels[0][0]->complete_cpuset)); + /* if there are PUs, still look at memory information + * since x86 misses NUMA node information (unless the processor supports topoext) + * memory size. + */ + /* allocate root sets in case not done yet */ + hwloc_alloc_obj_cpusets(topology->levels[0][0]); + + /********************************* + * Platform information for later + */ hwloc_gather_system_info(topology, data); - hwloc_alloc_obj_cpusets(topology->levels[0][0]); + /********************** + * /proc/cpuinfo + */ + numprocs = hwloc_linux_parse_cpuinfo(data, "/proc/cpuinfo", &Lprocs, &global_infos, &global_infos_count); - /* Gather the list of admin-disabled cpus and mems */ - hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, data->root_fd); + /* detect models for quirks */ + if (numprocs > 0) { + /* KNL */ + if (!strncmp(data->utsname.machine, "x86", 3)) { /* supports 32bits? */ + unsigned i; + const char *cpuvendor = NULL, *cpufamilynumber = NULL, *cpumodelnumber = NULL; + for(i=0; iis_knl = 1; + } + } + + /********************** + * Gather the list of admin-disabled cpus and mems + */ + hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, data->root_path); if (cgroup_mntpnt || cpuset_mntpnt) { cpuset_name = hwloc_read_linux_cpuset_name(data->root_fd, topology->pid); if (cpuset_name) { @@ -3982,6 +4094,10 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) hwloc_obj_t machine; hwloc_bitmap_t machine_online_set; + if (already_pus) + /* we don't support extending kerrighed topologies */ + return 0; + /* replace top-level object type with SYSTEM and add some MACHINE underneath */ topology->levels[0][0]->type = HWLOC_OBJ_SYSTEM; @@ -3990,13 +4106,19 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) /* No cpuset support for now. */ /* No sys support for now. */ while ((dirent = readdir(nodes_dir)) != NULL) { + struct hwloc_linux_cpuinfo_proc * machine_Lprocs = NULL; + struct hwloc_obj_info_s *machine_global_infos = NULL; + unsigned machine_global_infos_count = 0; + int machine_numprocs = 0; unsigned long node; if (strncmp(dirent->d_name, "node", 4)) continue; machine_online_set = hwloc_bitmap_alloc(); node = strtoul(dirent->d_name+4, NULL, 0); snprintf(path, sizeof(path), "/proc/nodes/node%lu/cpuinfo", node); - err = look_cpuinfo(topology, data, path, machine_online_set); + machine_numprocs = hwloc_linux_parse_cpuinfo(data, path, &machine_Lprocs, &machine_global_infos, &machine_global_infos_count); + err = look_cpuinfo(topology, machine_Lprocs, machine_numprocs, machine_online_set); + hwloc_linux_free_cpuinfo(machine_Lprocs, machine_numprocs, machine_global_infos, machine_global_infos_count); if (err < 0) { hwloc_bitmap_free(machine_online_set); continue; @@ -4018,6 +4140,10 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) } closedir(nodes_dir); } else { + /********************* + * Memory information + */ + /* Get the machine memory attributes */ hwloc_get_procfs_meminfo_info(topology, data, &topology->levels[0][0]->memory); @@ -4034,11 +4160,23 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) topology->levels[0][0]->memory.page_types[i].count = 0; } + /********************** + * CPU information + */ + + /* Don't rediscover CPU resources if already done */ + if (already_pus) + goto done; + /* Gather the list of cpus now */ err = hwloc_linux_try_hardwired_cpuinfo(backend); if (!err) goto done; + /* setup root info */ + hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count, + &global_infos, &global_infos_count); + if (getenv("HWLOC_LINUX_USE_CPUINFO") || (hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, data->root_fd) < 0 && hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, data->root_fd) < 0 @@ -4046,27 +4184,28 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) && hwloc_access("/sys/bus/cpu/devices/cpu0/topology/core_siblings", R_OK, data->root_fd) < 0)) { /* revert to reading cpuinfo only if /sys/.../topology unavailable (before 2.6.16) * or not containing anything interesting */ - err = look_cpuinfo(topology, data, "/proc/cpuinfo", topology->levels[0][0]->online_cpuset); + if (numprocs > 0) + err = look_cpuinfo(topology, Lprocs, numprocs, topology->levels[0][0]->online_cpuset); + else + err = -1; if (err < 0) hwloc_linux_fallback_pu_level(topology); + look_powerpc_device_tree(topology, data); } else { - struct hwloc_linux_cpuinfo_proc * Lprocs = NULL; - struct hwloc_obj_info_s *global_infos = NULL; - unsigned global_infos_count = 0; - int numprocs = hwloc_linux_parse_cpuinfo(data, "/proc/cpuinfo", &Lprocs, &global_infos, &global_infos_count); - if (numprocs <= 0) - Lprocs = NULL; + /* sysfs */ if (look_sysfscpu(topology, data, "/sys/bus/cpu/devices", Lprocs, numprocs) < 0) if (look_sysfscpu(topology, data, "/sys/devices/system/cpu", Lprocs, numprocs) < 0) /* sysfs but we failed to read cpu topology, fallback */ hwloc_linux_fallback_pu_level(topology); - hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count, - &global_infos, &global_infos_count); - hwloc_linux_free_cpuinfo(Lprocs, numprocs, global_infos, global_infos_count); } done: + + /********************** + * Misc + */ + /* Gather DMI info */ hwloc__get_dmi_id_info(data, topology->levels[0][0]); if (hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)) @@ -4084,6 +4223,7 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) /* data->utsname was filled with real uname or \0, we can safely pass it */ hwloc_add_uname_info(topology, &data->utsname); + hwloc_linux_free_cpuinfo(Lprocs, numprocs, global_infos, global_infos_count); return 1; } @@ -4464,20 +4604,30 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, if (!dev) return; prop = udev_device_get_property_value(dev, "ID_VENDOR"); - if (prop) - strcpy(vendor, prop); + if (prop) { + strncpy(vendor, prop, sizeof(vendor)); + vendor[sizeof(vendor)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_MODEL"); - if (prop) - strcpy(model, prop); + if (prop) { + strncpy(model, prop, sizeof(model)); + model[sizeof(model)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_REVISION"); - if (prop) - strcpy(revision, prop); + if (prop) { + strncpy(revision, prop, sizeof(revision)); + revision[sizeof(revision)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_SERIAL_SHORT"); - if (prop) - strcpy(serial, prop); + if (prop) { + strncpy(serial, prop, sizeof(serial)); + serial[sizeof(serial)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_TYPE"); - if (prop) - strcpy(blocktype, prop); + if (prop) { + strncpy(blocktype, prop, sizeof(blocktype)); + blocktype[sizeof(blocktype)-1] = '\0'; + } udev_device_unref(dev); } else @@ -4952,6 +5102,8 @@ hwloc_linux_backend_disable(struct hwloc_backend *backend) { struct hwloc_linux_backend_data_s *data = backend->private_data; #ifdef HAVE_OPENAT + if (data->root_path) + free(data->root_path); close(data->root_fd); #endif #ifdef HAVE_LIBUDEV_H @@ -4983,13 +5135,16 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, } backend->private_data = data; + backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS; backend->discover = hwloc_look_linuxfs; backend->get_obj_cpuset = hwloc_linux_backend_get_obj_cpuset; backend->notify_new_object = hwloc_linux_backend_notify_new_object; backend->disable = hwloc_linux_backend_disable; /* default values */ + data->is_knl = 0; data->is_real_fsroot = 1; + data->root_path = NULL; if (!fsroot_path) fsroot_path = "/"; @@ -5001,6 +5156,7 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, if (strcmp(fsroot_path, "/")) { backend->is_thissystem = 0; data->is_real_fsroot = 0; + data->root_path = strdup(fsroot_path); } /* Since this fd stays open after hwloc returns, mark it as @@ -5028,6 +5184,10 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, } #endif + data->dumped_hwdata_dirname = getenv("HWLOC_DUMPED_HWDATA_DIR"); + if (!data->dumped_hwdata_dirname) + data->dumped_hwdata_dirname = "/var/run/hwloc/"; + data->deprecated_classlinks_model = -2; /* never tried */ data->mic_need_directlookup = -1; /* not initialized */ data->mic_directlookup_id_max = -1; /* not initialized */ @@ -5035,6 +5195,10 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, return backend; out_with_data: +#ifdef HAVE_OPENAT + if (data->root_path) + free(data->root_path); +#endif free(data); out_with_backend: free(backend); diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-netbsd.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-netbsd.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-netbsd.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-netbsd.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-noos.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-noos.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-noos.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-noos.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-nvml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-nvml.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-nvml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-nvml.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-opencl.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-opencl.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-opencl.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-opencl.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-osf.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-osf.c similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-osf.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-osf.c index 57158883d67..b403d1343fc 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-osf.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-osf.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -234,7 +234,7 @@ hwloc_osf_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodes ptr = nmmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0, &mattr); radsetdestroy(&mattr.mattr_radset); - return ptr; + return ptr == MAP_FAILED ? NULL : ptr; } static int diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-pci.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-pci.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-pci.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-pci.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-solaris-chiptype.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris-chiptype.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-solaris-chiptype.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris-chiptype.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-solaris.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-solaris.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-synthetic.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-synthetic.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-synthetic.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-synthetic.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-windows.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-windows.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-windows.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-windows.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c similarity index 88% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-x86.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c index 624b7bf50f9..ab6de7c9343 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-x86.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c @@ -26,6 +26,7 @@ struct hwloc_x86_backend_data_s { unsigned nbprocs; hwloc_bitmap_t apicid_set; int apicid_unique; + int is_knl; }; #define has_topoext(features) ((features)[6] & (1 << 22)) @@ -35,9 +36,11 @@ struct cacheinfo { unsigned type; unsigned level; unsigned nbthreads_sharing; + unsigned cacheid; unsigned linesize; unsigned linepart; + int inclusive; int ways; unsigned sets; unsigned long size; @@ -99,6 +102,8 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, uns cache->nbthreads_sharing = infos->max_log_proc; cache->linesize = cpuid & 0xff; cache->linepart = 0; + cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */ + if (level == 1) { cache->ways = (cpuid >> 16) & 0xff; if (cache->ways == 0xff) @@ -112,20 +117,6 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, uns cache->size = size; cache->sets = 0; - if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9 - && level == 3 - && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) { - /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores. - * The L3 (and its associativity) is actually split into two halves). - */ - if (cache->nbthreads_sharing == 16) - cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */ - cache->nbthreads_sharing /= 2; - cache->size /= 2; - if (cache->ways != -1) - cache->ways /= 2; - } - hwloc_debug("cache L%u t%u linesize %u ways %u size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); } @@ -183,6 +174,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } infos->cpustepping = eax & 0xf; + if (cpuid_type == intel && infos->cpufamilynumber == 0x6 && infos->cpumodelnumber == 0x57) + data->is_knl = 1; + /* Get cpu vendor string from cpuid 0x00 */ memset(regs, 0, sizeof(regs)); regs[0] = 0; @@ -297,6 +291,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns cache->ways = ways; cache->sets = sets = ecx + 1; cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); @@ -331,6 +326,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns * (not supported on AMD) */ if (cpuid_type != amd && highest_cpuid >= 0x04) { + unsigned level; for (cachenum = 0; ; cachenum++) { unsigned type; eax = 0x04; @@ -343,6 +339,10 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns if (type == 0) break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; infos->numcaches++; if (!cachenum) { @@ -369,9 +369,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns if (type == 0) break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; cache->type = type; - cache->level = (eax >> 5) & 0x7; + cache->level = level; cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; cache->linesize = linesize = (ebx & 0xfff) + 1; @@ -384,6 +388,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns cache->ways = ways; cache->sets = sets = ecx + 1; cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); @@ -441,6 +446,48 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } } + /* Now that we have all info, compute cacheids and apply quirks */ + for (cachenum = 0; cachenum < infos->numcaches; cachenum++) { + struct cacheinfo *cache = &infos->cache[cachenum]; + + /* default cacheid value */ + cache->cacheid = infos->apicid / cache->nbthreads_sharing; + + /* AMD quirk */ + if (cpuid_type == amd + && infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9 + && cache->level == 3 + && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) { + /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores. + * The L3 (and its associativity) is actually split into two halves). + */ + if (cache->nbthreads_sharing == 16) + cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */ + cache->nbthreads_sharing /= 2; + cache->size /= 2; + if (cache->ways != -1) + cache->ways /= 2; + /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB.... + * among first L3 (A), second L3 (B), and unexisting cores (.). + * On multi-socket servers, L3 in non-first sockets may have APIC id ranges + * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6). + * That means, we can't just compare apicid/nbthreads_sharing to identify siblings. + */ + cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ + + 2 * (infos->apicid / infos->max_log_proc); /* add 2 caches per previous package */ + + } else if (cpuid_type == amd + && infos->cpufamilynumber == 0x15 + && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */) + && cache->level == 3 && cache->nbthreads_sharing == 6) { + /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above, + * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here. + */ + cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ + + 2 * (infos->apicid / infos->max_log_proc); /* add 2 cache per previous package */ + } + } + if (hwloc_bitmap_isset(data->apicid_set, infos->apicid)) data->apicid_unique = 0; else @@ -631,6 +678,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } unit = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unitid); unit->cpuset = unit_cpuset; + hwloc_obj_add_info(unit, "Type", "ComputeUnit"); hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n", unitid, unit_cpuset); hwloc_insert_object_by_cpuset(topology, unit); @@ -728,18 +776,14 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int for (j = 0; j < infos[i].numcaches; j++) if (infos[i].cache[j].level > level) level = infos[i].cache[j].level; - - /* Look for known types */ - if (fulldiscovery) while (level > 0) { + while (level > 0) { for (type = 1; type <= 3; type++) { /* Look for caches of that type at level level */ { hwloc_bitmap_t caches_cpuset = hwloc_bitmap_dup(complete_cpuset); - hwloc_bitmap_t cache_cpuset; hwloc_obj_t cache; while ((i = hwloc_bitmap_first(caches_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; for (l = 0; l < infos[i].numcaches; l++) { if (infos[i].cache[l].level == level && infos[i].cache[l].type == type) @@ -751,17 +795,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int continue; } - /* Found a matching cache, now look for others sharing it */ - { - /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB.... - * among first L3 (A), second L3 (B), and unexisting cores (.). - * On multi-socket servers, L3 in non-first sockets may have APIC id ranges - * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6). - * That means, we can't just compare apicid/nbthreads_sharing to identify siblings. - * Hence we use apicid%max_log_proc instead of apicid to restore the alignment. - * Works because we also compare packageid to identify siblings. - */ - unsigned cacheid = (infos[i].apicid % infos[i].max_log_proc) / infos[i].cache[l].nbthreads_sharing; + if (fulldiscovery) { + /* Add caches */ + hwloc_bitmap_t cache_cpuset; + unsigned packageid = infos[i].packageid; + unsigned cacheid = infos[i].cache[l].cacheid; + /* Found a matching cache, now look for others sharing it */ cache_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { @@ -775,7 +814,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_clr(caches_cpuset, j); continue; } - if (infos[j].packageid == packageid && (infos[j].apicid % infos[j].max_log_proc) / infos[j].cache[l2].nbthreads_sharing == cacheid) { + if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) { hwloc_bitmap_set(cache_cpuset, j); hwloc_bitmap_clr(caches_cpuset, j); } @@ -797,9 +836,31 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int break; } cache->cpuset = cache_cpuset; + hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n", level, cacheid, cache_cpuset); hwloc_insert_object_by_cpuset(topology, cache); + + } else { + /* Annotate existing caches */ + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + hwloc_obj_t cache = NULL; + int depth; + hwloc_bitmap_set(set, i); + depth = hwloc_get_cache_type_depth(topology, level, + type == 1 ? HWLOC_OBJ_CACHE_DATA : type == 2 ? HWLOC_OBJ_CACHE_INSTRUCTION : HWLOC_OBJ_CACHE_UNIFIED); + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + cache = hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, NULL); + hwloc_bitmap_free(set); + if (cache) { + /* Found cache above that PU, annotate if no such attribute yet */ + if (!hwloc_obj_get_info_by_name(cache, "Inclusive")) + hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); + hwloc_bitmap_andnot(caches_cpuset, caches_cpuset, cache->cpuset); + } else { + /* No cache above that PU?! */ + hwloc_bitmap_clr(caches_cpuset, i); + } } } hwloc_bitmap_free(caches_cpuset); @@ -981,6 +1042,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) if (highest_cpuid >= 0x7) { eax = 0x7; + ecx = 0; hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx); features[9] = ebx; } @@ -1108,6 +1170,7 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component, backend->disable = hwloc_x86_backend_disable; /* default values */ + data->is_knl = 0; data->apicid_set = hwloc_bitmap_alloc(); data->apicid_unique = 1; diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml-libxml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-libxml.c similarity index 99% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml-libxml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-libxml.c index ce3250c2850..46fe4aec292 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml-libxml.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-libxml.c @@ -257,7 +257,8 @@ hwloc_libxml_import_diff(struct hwloc__xml_import_state_s *state, const char *xm if (state->global->next_attr(state, &attrname, &attrvalue) < 0) break; if (!strcmp(attrname, "refname")) { - free(refname); + if (refname) + free(refname); refname = strdup(attrvalue); } else goto out_with_doc; @@ -266,13 +267,15 @@ hwloc_libxml_import_diff(struct hwloc__xml_import_state_s *state, const char *xm ret = hwloc__xml_import_diff(state, firstdiffp); if (refnamep && !ret) *refnamep = refname; - else + else if (refname) free(refname); xmlFreeDoc(doc); return ret; out_with_doc: + if (refname) + free(refname); xmlFreeDoc(doc); out: return -1; /* failed */ diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml-nolibxml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-nolibxml.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml-nolibxml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-nolibxml.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology-xml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/topology.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology.c similarity index 96% rename from opal/mca/hwloc/hwloc1111/hwloc/src/topology.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology.c index a25baf8cd56..028c226bdb0 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology.c @@ -349,9 +349,8 @@ void hwloc_obj_add_info_nodup(hwloc_obj_t obj, const char *name, const char *val /* Get pointer to next childect. */ \ child = *pchild) -/* Free an object and all its content. */ -void -hwloc_free_unlinked_object(hwloc_obj_t obj) +static void +hwloc__free_object_contents(hwloc_obj_t obj) { switch (obj->type) { default: @@ -370,9 +369,34 @@ hwloc_free_unlinked_object(hwloc_obj_t obj) hwloc_bitmap_free(obj->nodeset); hwloc_bitmap_free(obj->complete_nodeset); hwloc_bitmap_free(obj->allowed_nodeset); +} + +/* Free an object and all its content. */ +void +hwloc_free_unlinked_object(hwloc_obj_t obj) +{ + hwloc__free_object_contents(obj); free(obj); } +/* Replace old with contents of new object, and make new freeable by the caller. + * Only updates next_sibling/first_child pointers, + * so may only be used during early discovery. + */ +static void +hwloc_replace_linked_object(hwloc_obj_t old, hwloc_obj_t new) +{ + /* drop old fields */ + hwloc__free_object_contents(old); + /* copy old tree pointers to new */ + new->next_sibling = old->next_sibling; + new->first_child = old->first_child; + /* copy new contents to old now that tree pointers are OK */ + memcpy(old, new, sizeof(*old)); + /* clear new to that we may free it */ + memset(new, 0,sizeof(*new)); +} + /* insert the (non-empty) list of sibling starting at firstnew as new children of newparent, * and return the address of the pointer to the next one */ @@ -753,21 +777,56 @@ static int hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) { hwloc_bitmap_t set1, set2; + int res = HWLOC_OBJ_DIFFERENT; - /* compare cpusets if possible, or fallback to nodeset, or return */ - if (obj1->cpuset && !hwloc_bitmap_iszero(obj1->cpuset) - && obj2->cpuset && !hwloc_bitmap_iszero(obj2->cpuset)) { + /* compare cpusets first */ + if (obj1->complete_cpuset && obj2->complete_cpuset) { + set1 = obj1->complete_cpuset; + set2 = obj2->complete_cpuset; + } else { set1 = obj1->cpuset; set2 = obj2->cpuset; - } else if (obj1->nodeset && !hwloc_bitmap_iszero(obj1->nodeset) - && obj2->nodeset && !hwloc_bitmap_iszero(obj2->nodeset)) { + } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { + res = hwloc_bitmap_compare_inclusion(set1, set2); + if (res == HWLOC_OBJ_INTERSECTS) + return HWLOC_OBJ_INTERSECTS; + } + + /* then compare nodesets, and combine the results */ + if (obj1->complete_nodeset && obj2->complete_nodeset) { + set1 = obj1->complete_nodeset; + set2 = obj2->complete_nodeset; + } else { set1 = obj1->nodeset; set2 = obj2->nodeset; - } else { - return HWLOC_OBJ_DIFFERENT; } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { + int noderes = hwloc_bitmap_compare_inclusion(set1, set2); + /* deal with conflicting cpusets/nodesets inclusions */ + if (noderes == HWLOC_OBJ_INCLUDED) { + if (res == HWLOC_OBJ_CONTAINS) + /* contradicting order for cpusets and nodesets */ + return HWLOC_OBJ_INTERSECTS; + res = HWLOC_OBJ_INCLUDED; - return hwloc_bitmap_compare_inclusion(set1, set2); + } else if (noderes == HWLOC_OBJ_CONTAINS) { + if (res == HWLOC_OBJ_INCLUDED) + /* contradicting order for cpusets and nodesets */ + return HWLOC_OBJ_INTERSECTS; + res = HWLOC_OBJ_CONTAINS; + + } else if (noderes == HWLOC_OBJ_INTERSECTS) { + return HWLOC_OBJ_INTERSECTS; + + } else { + /* nodesets are different, keep the cpuset order */ + /* FIXME: with upcoming multiple levels of NUMA, we may have to report INCLUDED or CONTAINED here */ + + } + } + + return res; } /* Compare object cpusets based on complete_cpuset if defined (always correctly ordered), @@ -847,9 +906,7 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) &new->infos, &new->infos_count); } - if (new->name) { - if (old->name) - free(old->name); + if (new->name && !old->name) { old->name = new->name; new->name = NULL; } @@ -858,21 +915,17 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) switch(new->type) { case HWLOC_OBJ_NUMANODE: - /* Do not check these, it may change between calls */ - merge_sizes(new, old, memory.local_memory); - merge_sizes(new, old, memory.total_memory); - /* if both newects have a page_types array, just keep the biggest one for now */ - if (new->memory.page_types_len && old->memory.page_types_len) - hwloc_debug("%s", "merging page_types by keeping the biggest one only\n"); - if (new->memory.page_types_len < old->memory.page_types_len) { - free(new->memory.page_types); - } else { - free(old->memory.page_types); + if (new->memory.local_memory && !old->memory.local_memory) { + /* no memory in old, use new memory */ + old->memory.local_memory = new->memory.local_memory; + if (old->memory.page_types) + free(old->memory.page_types); old->memory.page_types_len = new->memory.page_types_len; old->memory.page_types = new->memory.page_types; new->memory.page_types = NULL; new->memory.page_types_len = 0; } + /* old->memory.total_memory will be updated by propagate_total_memory() */ break; case HWLOC_OBJ_CACHE: merge_sizes(new, old, attr->cache.size); @@ -922,8 +975,18 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur assert(topology->ignored_types[HWLOC_OBJ_GROUP] != HWLOC_IGNORE_TYPE_NEVER); /* Remove the Group now. The normal ignore code path wouldn't tell us whether the Group was removed or not. * - * Keep EQUAL so that the Group gets merged. + * The Group doesn't contain anything to keep, just let the caller free it. */ + return child; + + } else if (child->type == HWLOC_OBJ_GROUP) { + + /* Replace the Group with the new object contents + * and let the caller free the new object + */ + hwloc_replace_linked_object(child, obj); + return child; + } else { /* otherwise compare actual types to decide of the inclusion */ res = hwloc_type_cmp(obj, child); @@ -950,7 +1013,9 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur } return NULL; } - /* Can be two objects with same type. Or one Group and anything else. */ + /* Two objects with same type. + * Groups are handled above. + */ if (obj->type == child->type && (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE) && obj->os_index != child->os_index) { @@ -2129,6 +2194,12 @@ hwloc_level_filter_objects(hwloc_topology_t topology, /* count interesting objects and allocate the new array */ for(i=0, nnew=0; itype_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN; /* initialize root type depth */ topology->type_depth[topology->levels[0][0]->type] = 0; @@ -2205,17 +2276,14 @@ hwloc_connect_levels(hwloc_topology_t topology) topology->bridge_level = NULL; topology->bridge_nbobjects = 0; topology->first_bridge = topology->last_bridge = NULL; - topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; free(topology->pcidev_level); topology->pcidev_level = NULL; topology->pcidev_nbobjects = 0; topology->first_pcidev = topology->last_pcidev = NULL; - topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; free(topology->osdev_level); topology->osdev_level = NULL; topology->osdev_nbobjects = 0; topology->first_osdev = topology->last_osdev = NULL; - topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; /* Start with children of the whole system. */ n_objs = topology->levels[0][0]->arity; @@ -2343,13 +2411,20 @@ hwloc_connect_levels(hwloc_topology_t topology) void hwloc_alloc_obj_cpusets(hwloc_obj_t obj) { - obj->cpuset = hwloc_bitmap_alloc_full(); - obj->complete_cpuset = hwloc_bitmap_alloc(); - obj->online_cpuset = hwloc_bitmap_alloc_full(); - obj->allowed_cpuset = hwloc_bitmap_alloc_full(); - obj->nodeset = hwloc_bitmap_alloc(); - obj->complete_nodeset = hwloc_bitmap_alloc(); - obj->allowed_nodeset = hwloc_bitmap_alloc_full(); + if (!obj->cpuset) + obj->cpuset = hwloc_bitmap_alloc_full(); + if (!obj->complete_cpuset) + obj->complete_cpuset = hwloc_bitmap_alloc(); + if (!obj->online_cpuset) + obj->online_cpuset = hwloc_bitmap_alloc_full(); + if (!obj->allowed_cpuset) + obj->allowed_cpuset = hwloc_bitmap_alloc_full(); + if (!obj->nodeset) + obj->nodeset = hwloc_bitmap_alloc(); + if (!obj->complete_nodeset) + obj->complete_nodeset = hwloc_bitmap_alloc(); + if (!obj->allowed_nodeset) + obj->allowed_nodeset = hwloc_bitmap_alloc_full(); } /* Main discovery loop */ @@ -2595,6 +2670,7 @@ void hwloc_topology_setup_defaults(struct hwloc_topology *topology) { struct hwloc_obj *root_obj; + unsigned l; /* reset support */ memset(&topology->binding_hooks, 0, sizeof(topology->binding_hooks)); @@ -2615,6 +2691,12 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) topology->first_bridge = topology->last_bridge = NULL; topology->first_pcidev = topology->last_pcidev = NULL; topology->first_osdev = topology->last_osdev = NULL; + /* sane values to type_depth */ + for (l = HWLOC_OBJ_SYSTEM; l < HWLOC_OBJ_MISC; l++) + topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN; + topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; + topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; + topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; /* Create the actual machine object, but don't touch its attributes yet * since the OS backend may still change the object into something else diff --git a/opal/mca/hwloc/hwloc1111/hwloc/src/traversal.c b/opal/mca/hwloc/hwloc1112/hwloc/src/traversal.c similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/src/traversal.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/traversal.c diff --git a/opal/mca/hwloc/hwloc1111/hwloc/tests/README.txt b/opal/mca/hwloc/hwloc1112/hwloc/tests/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/tests/README.txt rename to opal/mca/hwloc/hwloc1112/hwloc/tests/README.txt diff --git a/opal/mca/hwloc/hwloc1111/hwloc/utils/README.txt b/opal/mca/hwloc/hwloc1112/hwloc/utils/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1111/hwloc/utils/README.txt rename to opal/mca/hwloc/hwloc1112/hwloc/utils/README.txt diff --git a/opal/mca/hwloc/hwloc1111/hwloc1111.h b/opal/mca/hwloc/hwloc1112/hwloc1112.h similarity index 83% rename from opal/mca/hwloc/hwloc1111/hwloc1111.h rename to opal/mca/hwloc/hwloc1112/hwloc1112.h index 7b5c2f41240..8b20bf623b9 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc1111.h +++ b/opal/mca/hwloc/hwloc1112/hwloc1112.h @@ -1,6 +1,9 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -13,8 +16,8 @@ * this header represents the public interface to this static component. */ -#ifndef MCA_OPAL_HWLOC_HWLOC1111_H -#define MCA_OPAL_HWLOC_HWLOC1111_H +#ifndef MCA_OPAL_HWLOC_HWLOC1112_H +#define MCA_OPAL_HWLOC_HWLOC1112_H BEGIN_C_DECLS @@ -42,4 +45,4 @@ BEGIN_C_DECLS END_C_DECLS -#endif /* MCA_OPAL_HWLOC_HWLOC1111_H */ +#endif /* MCA_OPAL_HWLOC_HWLOC1112_H */ diff --git a/opal/mca/hwloc/hwloc1111/hwloc1111_component.c b/opal/mca/hwloc/hwloc1112/hwloc1112_component.c similarity index 81% rename from opal/mca/hwloc/hwloc1111/hwloc1111_component.c rename to opal/mca/hwloc/hwloc1112/hwloc1112_component.c index f40ff00258d..81177662763 100644 --- a/opal/mca/hwloc/hwloc1111/hwloc1111_component.c +++ b/opal/mca/hwloc/hwloc1112/hwloc1112_component.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ @@ -22,20 +22,20 @@ #include "opal/constants.h" #include "opal/mca/hwloc/hwloc.h" -#include "hwloc1111.h" +#include "hwloc1112.h" /* * Public string showing the sysinfo ompi_linux component version number */ -const char *opal_hwloc_hwloc1111_component_version_string = - "OPAL hwloc1111 hwloc MCA component version " OPAL_VERSION; +const char *opal_hwloc_hwloc1112_component_version_string = + "OPAL hwloc1112 hwloc MCA component version " OPAL_VERSION; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -const opal_hwloc_component_t mca_hwloc_hwloc1111_component = { +const opal_hwloc_component_t mca_hwloc_hwloc1112_component = { /* First, the mca_component_t struct containing meta information about the component itself */ @@ -44,7 +44,7 @@ const opal_hwloc_component_t mca_hwloc_hwloc1111_component = { OPAL_HWLOC_BASE_VERSION_2_0_0, /* Component name and version */ - .mca_component_name = "hwloc1111", + .mca_component_name = "hwloc1112", MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION), }, diff --git a/opal/mca/hwloc/hwloc1111/owner.txt b/opal/mca/hwloc/hwloc1112/owner.txt similarity index 100% rename from opal/mca/hwloc/hwloc1111/owner.txt rename to opal/mca/hwloc/hwloc1112/owner.txt diff --git a/opal/mca/if/bsdx_ipv4/configure.m4 b/opal/mca/if/bsdx_ipv4/configure.m4 index 8a1c97b91f6..d572cc44d70 100644 --- a/opal/mca/if/bsdx_ipv4/configure.m4 +++ b/opal/mca/if/bsdx_ipv4/configure.m4 @@ -1,6 +1,8 @@ # -*- shell-script -*- # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,7 +32,10 @@ AC_DEFUN([MCA_opal_if_bsdx_ipv4_CONFIG], [ AS_IF([test "$opal_found_sockaddr" = "yes"], [AC_MSG_RESULT([yes (cached)]) AC_MSG_CHECKING([NetBSD, FreeBSD, OpenBSD, or DragonFly]) - AS_IF([test "$opal_found_netbsd" = "yes" -o "$opal_found_freebsd" = "yes" -o "$opal_found_openbsd" = "yes" -o "$opal_found_dragonfly" = "yes"], + AS_IF([test "$opal_found_netbsd" = "yes" || \ + test "$opal_found_freebsd" = "yes" || \ + test "$opal_found_openbsd" = "yes" || \ + test "$opal_found_dragonfly" = "yes"], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) diff --git a/opal/mca/if/bsdx_ipv6/configure.m4 b/opal/mca/if/bsdx_ipv6/configure.m4 index 361e9a32ae4..4b2122f7012 100644 --- a/opal/mca/if/bsdx_ipv6/configure.m4 +++ b/opal/mca/if/bsdx_ipv6/configure.m4 @@ -1,6 +1,8 @@ # -*- shell-script -*- # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,7 +32,12 @@ AC_DEFUN([MCA_opal_if_bsdx_ipv6_CONFIG], [ AS_IF([test "$opal_found_sockaddr" = "yes"], [AC_MSG_RESULT([yes (cached)]) AC_MSG_CHECKING([some flavor of BSD]) - AS_IF([test "$opal_found_netbsd" = "yes" -o "$opal_found_freebsd" = "yes" -o "$opal_found_openbsd" = "yes" -o "$opal_found_386bsd" = "yes" -o "$opal_found_bsdi" = "yes" -o "$opal_found_apple" = "yes"], + AS_IF([test "$opal_found_netbsd" = "yes" || \ + test "$opal_found_freebsd" = "yes" || \ + test "$opal_found_openbsd" = "yes" || \ + test "$opal_found_386bsd" = "yes" || \ + test "$opal_found_bsdi" = "yes" || + test "$opal_found_apple" = "yes"], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) diff --git a/opal/mca/if/linux_ipv6/configure.m4 b/opal/mca/if/linux_ipv6/configure.m4 index fc9dc8b27d4..583d59e93fe 100644 --- a/opal/mca/if/linux_ipv6/configure.m4 +++ b/opal/mca/if/linux_ipv6/configure.m4 @@ -1,6 +1,8 @@ # -*- shell-script -*- # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,7 +27,7 @@ AC_DEFUN([MCA_opal_if_linux_ipv6_CONFIG], [ AC_MSG_CHECKING([if we are on Linux with TCP]) # If we have struct sockaddr and we're on Linux, then we're # happy. - AS_IF([test "$opal_found_sockaddr" = "yes" -a "$opal_found_linux" = "yes"], + AS_IF([test "$opal_found_sockaddr" = "yes" && test "$opal_found_linux" = "yes"], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) diff --git a/opal/mca/if/posix_ipv4/configure.m4 b/opal/mca/if/posix_ipv4/configure.m4 index 33faf57fc40..73548efb985 100644 --- a/opal/mca/if/posix_ipv4/configure.m4 +++ b/opal/mca/if/posix_ipv4/configure.m4 @@ -1,6 +1,8 @@ # -*- shell-script -*- # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -33,7 +35,7 @@ AC_DEFUN([MCA_opal_if_posix_ipv4_CONFIG], [ AS_IF([test "$opal_found_sockaddr" = "yes"], [AC_MSG_RESULT([yes (cached)]) AC_MSG_CHECKING([not NetBSD, FreeBSD, OpenBSD, or DragonFly]) - AS_IF([test "$opal_found_netbsd" = "no" -a "$opal_found_freebsd" = "no" -a "$opal_found_openbsd" = "no" -a "$opal_found_dragonfly" = "no"], + AS_IF([test "$opal_found_netbsd" = "no" && test "$opal_found_freebsd" = "no" && test "$opal_found_openbsd" = "no" && test "$opal_found_dragonfly" = "no"], [AC_MSG_RESULT([yes]) opal_if_posix_ipv4_happy=yes], [AC_MSG_RESULT([no])] diff --git a/opal/mca/memchecker/configure.m4 b/opal/mca/memchecker/configure.m4 index 3462e1a5088..3ccd3786adb 100644 --- a/opal/mca/memchecker/configure.m4 +++ b/opal/mca/memchecker/configure.m4 @@ -3,6 +3,8 @@ dnl dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -43,10 +45,10 @@ AC_DEFUN([MCA_opal_memchecker_CONFIG],[ # first, compile all the components MCA_CONFIGURE_FRAMEWORK($1, $2, 1) - AS_IF([test "$MCA_opal_memchecker_STATIC_COMPONENTS" != "" -o "$MCA_opal_memchecker_DSO_COMPONENTS" != ""], + AS_IF([test "$MCA_opal_memchecker_STATIC_COMPONENTS" != "" || test "$MCA_opal_memchecker_DSO_COMPONENTS" != ""], [memchecker_base_found=1], [memchecker_base_found=0]) - AS_IF([test $WANT_MEMCHECKER -eq 1 -a $memchecker_base_found -eq 0], + AS_IF([test $WANT_MEMCHECKER -eq 1 && test $memchecker_base_found -eq 0], [AC_MSG_WARN([Memchecker support requested, but no memchecker]) AC_MSG_WARN([components configured successfully. Did you]) AC_MSG_WARN([forget --with-valgrind?]) diff --git a/opal/mca/memchecker/valgrind/configure.m4 b/opal/mca/memchecker/valgrind/configure.m4 index 874ac7250a0..28ddccbc533 100644 --- a/opal/mca/memchecker/valgrind/configure.m4 +++ b/opal/mca/memchecker/valgrind/configure.m4 @@ -3,6 +3,8 @@ # Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -35,7 +37,7 @@ AC_DEFUN([MCA_opal_memchecker_valgrind_CONFIG],[ opal_memchecker_valgrind_save_CPPFLAGS="$CPPFLAGS" opal_memchecker_valgrind_happy=no AS_IF([test "$with_valgrind" != "no"], - [AS_IF([test ! -z "$with_valgrind" -a "$with_valgrind" != "yes"], + [AS_IF([test -n "$with_valgrind" && test "$with_valgrind" != "yes"], [opal_memchecker_valgrind_CPPFLAGS="-I$with_valgrind/include" # We need this -I to stay in CPPFLAGS when we're done CPPFLAGS="$CPPFLAGS -I$with_valgrind/include" @@ -60,7 +62,7 @@ AC_DEFUN([MCA_opal_memchecker_valgrind_CONFIG],[ CPPFLAGS="$opal_memchecker_valgrind_save_CPPFLAGS" # If we specifically requested this component and can't build it, error - AS_IF([test "$with_valgrind" != "no" -a "$with_valgrind" != "" -a "$opal_memchecker_valgrind_happy" != "yes"], + AS_IF([test "$with_valgrind" != "no" && test -n "$with_valgrind" && test "$opal_memchecker_valgrind_happy" != "yes"], [AC_MSG_ERROR([Cannot continue])]) AS_IF([test "$opal_memchecker_valgrind_happy" = "yes"], diff --git a/opal/mca/memory/linux/configure.m4 b/opal/mca/memory/linux/configure.m4 index 1ecfbc9fb3a..e44786e1eb3 100644 --- a/opal/mca/memory/linux/configure.m4 +++ b/opal/mca/memory/linux/configure.m4 @@ -11,6 +11,8 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -55,12 +57,48 @@ AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ memory_linux_ummu_happy=yes memory_linux_requested=1], [memory_linux_requested=0 - AS_IF([test "$with_memory_manager" = "" -o "$with_memory_manager" = "yes"], + AS_IF([test -z "$with_memory_manager" || test "$with_memory_manager" = "yes"], [memory_linux_ptmalloc2_happy=yes memory_linux_ummu_happy=yes], [memory_linux_ptmalloc2_happy=no memory_linux_ummu_happy=no])]) + + ###################################################################### + # if memory hook available + ###################################################################### + memory_hook_found=1 + AS_IF([test "$memory_hook_found" -eq 1], + [memory_hook_found=0 AC_CHECK_HEADER([malloc.h], + [AC_CHECK_FUNC([__malloc_initialize_hook], + [AC_CHECK_FUNC([__malloc_hook], + [AC_CHECK_FUNC([__realloc_hook], + [AC_CHECK_FUNC([__free_hook], + [memory_hook_found=1])])])])])]) + AC_MSG_CHECKING([whether the system can use malloc hooks]) + AS_IF([test "$memory_hook_found" = "0"], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([yes])]) + AC_DEFINE_UNQUOTED([MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT], [$memory_hook_found], + [Whether the system has Memory Allocation Hooks]) + + AC_ARG_ENABLE(memory-linux-malloc-alignment, + AC_HELP_STRING([--enable-memory-linux-malloc-alignment], [Enable support for allocated memory alignment. Default: enabled if supported, disabled otherwise.])) + + malloc_align_enabled=0 + AS_IF([test "$enable_memory_linux_malloc_alignment" != "no"], + [malloc_align_enabled=$memory_hook_found]) + + AS_IF([test "$enable_memory_linux_malloc_alignment" = "yes" && test "$malloc_align_enabled" = "0"], + [AC_MSG_ERROR([memory linux malloc alignment is requested but __malloc_hook is not available])]) + AC_MSG_CHECKING([whether the memory linux will use malloc alignment]) + AS_IF([test "$malloc_align_enabled" = "0"], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([yes])]) + + AC_DEFINE_UNQUOTED(MEMORY_LINUX_MALLOC_ALIGN_ENABLED, [$malloc_align_enabled], + [Whether the memory linux malloc alignment is enabled]) + ###################################################################### # ptmalloc2 ###################################################################### @@ -82,7 +120,7 @@ AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ [# check for v9.0 <= 20051201 icc_major_ver="`$CC --version | head -n 1 | awk '{ print [$]3 }'`" icc_minor_ver="`$CC --version | head -n 1 | awk '{ print [$]4 }'`" - AS_IF([test "$icc_major_ver" = "9.0" -a "`expr $icc_minor_ver \<= 20051201`" = "1"], + AS_IF([test "$icc_major_ver" = "9.0" && test "`expr $icc_minor_ver \<= 20051201`" = "1"], [memory_linux_ptmalloc2_happy=no AC_MSG_WARN([*** Detected Intel C compiler v9.0 <= 20051201 on ia64]) AC_MSG_WARN([*** This compiler/platform combination has known problems with ptmalloc2]) @@ -139,7 +177,7 @@ AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ AC_CHECK_FUNCS([dlsym]) LIBS="$memory_linux_LIBS_SAVE"]) - AS_IF([test "$memory_linux_mmap" = "0" -a "$memory_linux_munmap" = "0"], + AS_IF([test "$memory_linux_mmap" = "0" && test "$memory_linux_munmap" = "0"], [memory_linux_ptmalloc2_happy=no])]) # If all is good, save the extra libs for the wrapper @@ -180,14 +218,14 @@ AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ # post processing ###################################################################### - AS_IF([test "$memory_malloc_hooks_requested" = 1 -a \ - "$memory_linux_ptmalloc2_happy" = no -a \ - "$memory_linux_ummu_happy" = no], + AS_IF([test "$memory_malloc_hooks_requested" = 1 && \ + test "$memory_linux_ptmalloc2_happy" = no && \ + test "$memory_linux_ummu_happy" = no], [AC_MSG_ERROR([linux memory management requested but neither ptmalloc2 nor ummunotify are available. Aborting.])]) AC_SUBST([memory_linux_LIBS]) - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes -o \ - "$memory_linux_ummu_happy" = yes], + AS_IF([test "$memory_linux_ptmalloc2_happy" = yes || \ + test "$memory_linux_ummu_happy" = yes], [memory_base_found=1 $1], [memory_base_found=0 diff --git a/opal/mca/memory/linux/help-opal-memory-linux.txt b/opal/mca/memory/linux/help-opal-memory-linux.txt index b3327c7356a..4e16124dc0c 100644 --- a/opal/mca/memory/linux/help-opal-memory-linux.txt +++ b/opal/mca/memory/linux/help-opal-memory-linux.txt @@ -27,3 +27,10 @@ alternate memory hook manager *may* be used instead (if available). Local host: %s UMMU device: %s Error: %s (%d) +# +[invalid mca param value] +WARNING: An invalid MCA parameter value was found for memory/linux +component. + + Problem: %s + Resolution: %s diff --git a/opal/mca/memory/linux/hooks.c b/opal/mca/memory/linux/hooks.c index 02fffb6b4ca..910d8e6e202 100644 --- a/opal/mca/memory/linux/hooks.c +++ b/opal/mca/memory/linux/hooks.c @@ -33,6 +33,7 @@ #include "opal/mca/mca.h" #include "opal/mca/memory/memory.h" #include "opal/constants.h" +#include "opal/memoryhooks/memory.h" #include "opal/mca/memory/linux/memory_linux.h" @@ -734,7 +735,10 @@ static check_result_t check(const char *name) } } -/* OMPI's init function */ + +/* This function is called on loading libmpi in case system has Memory Allocation Hooks + * (see ompi/runtime/ompi_mpi_init.c for details) + */ void opal_memory_linux_malloc_init_hook(void) { check_result_t r1, lp, lpp; diff --git a/opal/mca/memory/linux/memory_linux.h b/opal/mca/memory/linux/memory_linux.h index 24cb303c1f8..22685c14780 100644 --- a/opal/mca/memory/linux/memory_linux.h +++ b/opal/mca/memory/linux/memory_linux.h @@ -31,6 +31,11 @@ typedef struct opal_memory_linux_component_t { int ummunotify_fd; #endif +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED + int use_memalign; + size_t memalign_threshold; +#endif + #if MEMORY_LINUX_PTMALLOC2 /* Ptmalloc2-specific data. Note that these variables are all marked as volatile. * This is needed because of what may be a buggy optimization in the GCC 4.9.2 @@ -64,13 +69,20 @@ int opal_memory_linux_ummunotify_close(void); /* memory_linux_ptmalloc2.c */ int opal_memory_linux_ptmalloc2_open(void); int opal_memory_linux_ptmalloc2_close(void); -OPAL_DECLSPEC void opal_memory_linux_malloc_init_hook(void); /* memory_linux_munmap.c */ OPAL_DECLSPEC int opal_memory_linux_free_ptmalloc2_munmap(void *start, size_t length, int from_alloc); OPAL_DECLSPEC int munmap(void* addr, size_t len); #endif /* !MEMORY_LINUX_PTMALLOC2 */ +#if MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT +OPAL_DECLSPEC void opal_memory_linux_malloc_init_hook(void); +#endif /* MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT */ + +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED +OPAL_DECLSPEC void opal_memory_linux_malloc_set_alignment(int use_memalign, size_t memalign_threshold); +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ + END_C_DECLS #endif diff --git a/opal/mca/memory/linux/memory_linux_component.c b/opal/mca/memory/linux/memory_linux_component.c index ad14eb98511..d5590e4b8be 100644 --- a/opal/mca/memory/linux/memory_linux_component.c +++ b/opal/mca/memory/linux/memory_linux_component.c @@ -39,12 +39,17 @@ #include "opal_config.h" +#if HAVE_MALLOC_H +#include +#endif + #include "opal/constants.h" #include "opal/mca/base/mca_base_var.h" #include "opal/mca/memory/memory.h" #include "opal/mca/memory/base/empty.h" #include "opal/memoryhooks/memory.h" #include "opal/util/output.h" +#include "opal/util/show_help.h" #include "opal/mca/memory/linux/memory_linux.h" #undef opal_memory_changed @@ -100,6 +105,26 @@ opal_memory_linux_component_t mca_memory_linux_component = { static bool ptmalloc2_available = MEMORY_LINUX_PTMALLOC2; static bool ummunotify_available = MEMORY_LINUX_UMMUNOTIFY; +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED + +static void *(*prev_malloc_hook)(size_t, const void *); + +/* This is a memory allocator hook. The purpose of this is to make + * every malloc aligned. + * There two basic cases here: + * + * 1. Memory manager for Open MPI is enabled. Then memalign below will + * be overridden by __memalign_hook which is set to + * opal_memory_linux_memalign_hook. Thus, _malloc_hook is going to + * use opal_memory_linux_memalign_hook. + * + * 2. No memory manager support. The memalign below is just regular glibc + * memalign which will be called through __malloc_hook instead of malloc. + */ +static void *_opal_memory_linux_malloc_align_hook(size_t sz, const void* caller); +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ + + /* * Register MCA params */ @@ -162,6 +187,53 @@ static int linux_register(void) OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY, &opal_memory_linux_disable); + if (0 > ret) { + return ret; + } + +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED + mca_memory_linux_component.use_memalign = -1; + ret = mca_base_component_var_register(&mca_memory_linux_component.super.memoryc_version, + "memalign", + "[64 | 32 | 0] - Enable memory alignment for all malloc calls (default: disabled).", + MCA_BASE_VAR_TYPE_INT, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_memory_linux_component.use_memalign); + if (0 > ret) { + return ret; + } + + mca_memory_linux_component.memalign_threshold = 12288; + ret = mca_base_component_var_register(&mca_memory_linux_component.super.memoryc_version, + "memalign_threshold", + "Allocating memory more than memory_linux_memalign_threshold" + "bytes will automatically be aligned to the value of memory_linux_memalign bytes." + "(default: 12288)", + MCA_BASE_VAR_TYPE_SIZE_T, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_memory_linux_component.memalign_threshold); + if (0 > ret) { + return ret; + } + + if (mca_memory_linux_component.use_memalign != -1 + && mca_memory_linux_component.use_memalign != 32 + && mca_memory_linux_component.use_memalign != 64 + && mca_memory_linux_component.use_memalign != 0){ + opal_show_help("help-opal-memory-linux.txt", "invalid mca param value", + true, "Wrong memalign parameter value. Allowed values: 64, 32, 0.", + "memory_linux_memalign is reset to 32"); + mca_memory_linux_component.use_memalign = 32; + } +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ return (0 > ret) ? ret : OPAL_SUCCESS; } @@ -188,7 +260,7 @@ static int linux_open(void) if (mca_memory_linux_component.verbose_level >= 10) { opal_output(0, "memory:linux: ummunotify successfully initialized; we'll use that"); } - return OPAL_SUCCESS; + goto done; } if (mca_memory_linux_component.verbose_level >= 10) { opal_output(0, "memory:linux: ummunotify failed to initialize"); @@ -206,7 +278,7 @@ static int linux_open(void) if (mca_memory_linux_component.verbose_level >= 10) { opal_output(0, "memory:linux: ptmalloc2 successfully initialized; we'll use that"); } - return OPAL_SUCCESS; + goto done; } if (mca_memory_linux_component.verbose_level >= 10) { opal_output(0, "memory:linux: ptmalloc2 failed to initialize"); @@ -222,12 +294,36 @@ static int linux_open(void) opal_output(0, "memory:linux: no memory hooks available in this process"); } return OPAL_ERR_NOT_AVAILABLE; + +done: + +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED + /* save original call */ + prev_malloc_hook = NULL; + + if (mca_memory_linux_component.use_memalign > 0 && + (opal_mem_hooks_support_level() & + (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT)) != 0) { + prev_malloc_hook = __malloc_hook; + __malloc_hook = _opal_memory_linux_malloc_align_hook; + } +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ + + return OPAL_SUCCESS; } static int linux_close(void) { int v = mca_memory_linux_component.verbose_level; +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED + /* restore original call */ + if (prev_malloc_hook) { + __malloc_hook = prev_malloc_hook; + prev_malloc_hook = NULL; + } +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ + #if MEMORY_LINUX_UMMUNOTIFY if (ummunotify_opened) { if (v >= 10) { @@ -249,3 +345,32 @@ static int linux_close(void) return OPAL_SUCCESS; } + +#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED +void opal_memory_linux_malloc_set_alignment(int use_memalign, size_t memalign_threshold) +{ + /* ignore cases when this capability is enabled explicitly using + * mca variables + */ + if ((NULL == prev_malloc_hook) && (-1 == mca_memory_linux_component.use_memalign)) { + if (use_memalign == 0 || use_memalign == 32 || use_memalign == 64) { + mca_memory_linux_component.use_memalign = use_memalign; + mca_memory_linux_component.memalign_threshold = memalign_threshold; + if ((opal_mem_hooks_support_level() & + (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT)) != 0) { + prev_malloc_hook = __malloc_hook; + __malloc_hook = _opal_memory_linux_malloc_align_hook; + } + } + } +} + +static void *_opal_memory_linux_malloc_align_hook(size_t sz, const void* caller) +{ + if (sz < mca_memory_linux_component.memalign_threshold) { + return prev_malloc_hook(sz, caller); + } else { + return memalign(mca_memory_linux_component.use_memalign, sz); + } +} +#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ diff --git a/opal/mca/memory/malloc_solaris/configure.m4 b/opal/mca/memory/malloc_solaris/configure.m4 index 03f970b485e..42d42333374 100644 --- a/opal/mca/memory/malloc_solaris/configure.m4 +++ b/opal/mca/memory/malloc_solaris/configure.m4 @@ -12,6 +12,8 @@ # All rights reserved. # Copyright (c) 2007-2011 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -93,8 +95,8 @@ AC_DEFUN([MCA_opal_memory_malloc_solaris_CONFIG],[ [Whether to use the legacy Solaris munmap prototype or not]) ]) - AS_IF([test "$memory_malloc_solaris_happy" = "no" -a \ - "$memory_malloc_solaris_should_use" = "1"], + AS_IF([test "$memory_malloc_solaris_happy" = "no" && \ + test "$memory_malloc_solaris_should_use" = "1"], [AC_MSG_ERROR([malloc_solaris memory management requested but not available. Aborting.])]) AC_SUBST(memory_malloc_solaris_LIBS) diff --git a/opal/mca/mpool/gpusm/configure.m4 b/opal/mca/mpool/gpusm/configure.m4 index fd2871a853d..52f0b765c25 100644 --- a/opal/mca/mpool/gpusm/configure.m4 +++ b/opal/mca/mpool/gpusm/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,7 +18,7 @@ AC_DEFUN([MCA_opal_mpool_gpusm_CONFIG],[ AC_CONFIG_FILES([opal/mca/mpool/gpusm/Makefile]) # Use CUDA_SUPPORT which was filled in by the opal configure code. - AS_IF([test "x$CUDA_SUPPORT_41" = "x1"], + AS_IF([test "x$CUDA_SUPPORT" = "x1"], [$1], [$2]) diff --git a/opal/mca/mpool/rgpusm/configure.m4 b/opal/mca/mpool/rgpusm/configure.m4 index 3d887ec0bb2..8518cfc3b7c 100644 --- a/opal/mca/mpool/rgpusm/configure.m4 +++ b/opal/mca/mpool/rgpusm/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,7 +18,7 @@ AC_DEFUN([MCA_opal_mpool_rgpusm_CONFIG],[ AC_CONFIG_FILES([opal/mca/mpool/rgpusm/Makefile]) # Use CUDA_SUPPORT which was filled in by the opal configure code. - AS_IF([test "x$CUDA_SUPPORT_41" = "x1"], + AS_IF([test "x$CUDA_SUPPORT" = "x1"], [$1], [$2]) diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 6aaf308c4ac..1551f8eb4d2 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -32,11 +32,17 @@ OPAL_DECLSPEC int opal_pmix_base_select(void); OPAL_DECLSPEC extern bool opal_pmix_base_allow_delayed_server; -OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err); -OPAL_DECLSPEC void opal_pmix_base_deregister_handler(void); +OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_list_t *info, + opal_pmix_notification_fn_t errhandler, + opal_pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata); +OPAL_DECLSPEC void opal_pmix_base_deregister_handler(int errhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); OPAL_DECLSPEC void opal_pmix_base_errhandler(int status, opal_list_t *procs, - opal_list_t *info); + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, void *cbdata); OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info, opal_pmix_pdata_t *pdat, int timeout); diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index a0779474109..9ffba7b33be 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -21,6 +21,9 @@ #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #include "opal_stdint.h" #include "opal/class/opal_pointer_array.h" @@ -35,26 +38,40 @@ #define OPAL_PMI_PAD 10 -/******** ERRHANDLER SUPPORT ********/ -static opal_pmix_errhandler_fn_t errhandler = NULL; +/******** ERRHANDLER SUPPORT FOR COMPONENTS THAT + ******** DO NOT NATIVELY SUPPORT IT + ********/ +static opal_pmix_notification_fn_t errhandler = NULL; -void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err) +void opal_pmix_base_register_handler(opal_list_t *info, + opal_pmix_notification_fn_t err, + opal_pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) { errhandler = err; + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, 0, cbdata); + } } void opal_pmix_base_errhandler(int status, opal_list_t *procs, - opal_list_t *info) + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, void *cbdata) { if (NULL != errhandler) { - errhandler(status); + errhandler(status, procs, info, cbfunc, cbdata); } } -void opal_pmix_base_deregister_handler(void) +void opal_pmix_base_deregister_handler(int errid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { errhandler = NULL; + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); + } } struct lookup_caddy_t { diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index e1ab7666e10..e75e378d4da 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,6 +13,7 @@ #include "opal/constants.h" #include "opal/mca/mca.h" +#include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" @@ -31,12 +32,21 @@ /* Note that this initializer is important -- do not remove it! See https://github.com/open-mpi/ompi/issues/375 for details. */ opal_pmix_base_module_t opal_pmix = { 0 }; -bool opal_pmix_collect_all_data = false; +bool opal_pmix_collect_all_data = true; bool opal_pmix_base_allow_delayed_server = false; int opal_pmix_verbose_output = -1; +bool opal_pmix_base_async_modex = false; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { + opal_pmix_base_async_modex = false; + (void) mca_base_var_register("opal", "pmix", "base", "async_modex", "Use asynchronous modex mode", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_base_async_modex); + opal_pmix_collect_all_data = true; + (void) mca_base_var_register("opal", "pmix", "base", "collect_data", "Collect all data during modex", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_collect_all_data); return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index d4ce7acc09f..72ff4ceff53 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -6,7 +6,7 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All * rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -21,6 +21,7 @@ #include "opal_stdint.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/proc.h" @@ -535,7 +536,6 @@ static int cray_fence(opal_list_t *procs, int collect_data) opal_hwloc_locality_t locality; opal_list_t vals; char *cpuset = NULL; - opal_process_name_t pname; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:cray executing fence cache_global %p cache_local %p", @@ -543,10 +543,6 @@ static int cray_fence(opal_list_t *procs, int collect_data) (void *)mca_pmix_cray_component.cache_global, (void *)mca_pmix_cray_component.cache_local); - /* get the modex data from each local process and set the - * localities to avoid having the MPI layer fetch data - * for every process in the job */ - pname.jobid = OPAL_PROC_MY_NAME.jobid; /* * "unload" the cache_local/cache_global buffers, first copy @@ -669,8 +665,14 @@ static int cray_fence(opal_list_t *procs, int collect_data) } OPAL_LIST_DESTRUCT(&vals); - /* we only need to set locality for each local rank as "not found" - * equates to "non-local" */ + /* Get the modex data from each local process and set the + * localities to avoid having the MPI layer fetch data + * for every process in the job. + * + * we only need to set locality for each local rank as "not found" + * equates to "non-local" + */ + for (i=0; i < pmix_nlranks; i++) { id.vpid = pmix_lranks[i]; id.jobid = pmix_jobid; @@ -715,7 +717,7 @@ static int cray_fence(opal_list_t *procs, int collect_data) kvn.key = strdup(OPAL_PMIX_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; - opal_pmix_base_store(&pname, &kvn); + opal_pmix_base_store(&id, &kvn); OBJ_DESTRUCT(&kvn); } @@ -821,7 +823,7 @@ static int cray_store_local(const opal_process_name_t *proc, static const char *cray_get_nspace(opal_jobid_t jobid) { - return NULL; + return "N/A"; } static void cray_register_jobid(opal_jobid_t jobid, const char *nspace) diff --git a/opal/mca/pmix/external/Makefile.am b/opal/mca/pmix/external/Makefile.am new file mode 100644 index 00000000000..770850c6e3f --- /dev/null +++ b/opal/mca/pmix/external/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + pmix_ext.h \ + pmix_ext_component.c \ + pmix_ext.c \ + pmix_ext_client.c \ + pmix_ext_server_south.c \ + pmix_ext_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_external_DSO +component_noinst = +component_install = mca_pmix_external.la +else +component_noinst = libmca_pmix_external.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_external_la_SOURCES = $(sources) +mca_pmix_external_la_CFLAGS = +mca_pmix_external_la_CPPFLAGS = $(opal_pmix_ext_CPPFLAGS) +mca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext_LDFLAGS) +mca_pmix_external_la_LIBADD = $(opal_pmix_ext_LIBS) \ + $(OPAL_TOP_BUILDDIR)/opal/mca/event/lib@OPAL_LIB_PREFIX@mca_event.la \ + $(OPAL_TOP_BUILDDIR)/opal/mca/hwloc/lib@OPAL_LIB_PREFIX@mca_hwloc.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_external_la_SOURCES =$(sources) +libmca_pmix_external_la_CFLAGS = +libmca_pmix_external_la_CPPFLAGS = $(opal_pmix_ext_CPPFLAGS) +libmca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext_LDFLAGS) +libmca_pmix_external_la_LIBADD = $(opal_pmix_ext_LIBS) diff --git a/opal/mca/pmix/external/configure.m4 b/opal/mca/pmix/external/configure.m4 new file mode 100644 index 00000000000..779be3431c7 --- /dev/null +++ b/opal/mca/pmix/external/configure.m4 @@ -0,0 +1,45 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_external_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_external_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/external/Makefile]) + + AC_REQUIRE([OPAL_CHECK_PMIX]) + + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AS_IF([test "$opal_event_external_want" != "yes" || test "$opal_hwloc_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([AND EXTERNAL HWLOC LIBRARIES. THESE LIBRARIES MUST POINT]) + AC_MSG_WARN([TO THE SAME ONES USED TO BUILD PMIX OR ELSE UNPREDICTABLE]) + AC_MSG_WARN([BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + external_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/$opal_pmix_external_basedir/pmix -I${includedir}/openmpi/$opal_pmix_external_basedir/pmix/include' + $1], + [$2]) +])dnl diff --git a/opal/mca/pmix/external/pmix_ext.c b/opal/mca/pmix/external/pmix_ext.c new file mode 100644 index 00000000000..49c466b6694 --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext.c @@ -0,0 +1,541 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" + +#include "pmix_ext.h" +#include "opal/mca/pmix/base/base.h" + +#include "pmix_common.h" + +/**** C.O.M.M.O.N I.N.T.E.R.F.A.C.E.S ****/ + +/* These are functions used by both client and server to + * access common functions in the embedded PMIx library */ + +static const char *pmix1_get_nspace(opal_jobid_t jobid); +static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); + +const opal_pmix_base_module_t opal_pmix_external_module = { + /* client APIs */ + .init = pmix1_client_init, + .finalize = pmix1_client_finalize, + .initialized = pmix1_initialized, + .abort = pmix1_abort, + .commit = pmix1_commit, + .fence = pmix1_fence, + .fence_nb = pmix1_fencenb, + .put = pmix1_put, + .get = pmix1_get, + .get_nb = pmix1_getnb, + .publish = pmix1_publish, + .publish_nb = pmix1_publishnb, + .lookup = pmix1_lookup, + .lookup_nb = pmix1_lookupnb, + .unpublish = pmix1_unpublish, + .unpublish_nb = pmix1_unpublishnb, + .spawn = pmix1_spawn, + .spawn_nb = pmix1_spawnnb, + .connect = pmix1_connect, + .connect_nb = pmix1_connectnb, + .disconnect = pmix1_disconnect, + .disconnect_nb = pmix1_disconnectnb, + .resolve_peers = pmix1_resolve_peers, + .resolve_nodes = pmix1_resolve_nodes, + /* server APIs */ + .server_init = pmix1_server_init, + .server_finalize = pmix1_server_finalize, + .generate_regex = pmix1_server_gen_regex, + .generate_ppn = pmix1_server_gen_ppn, + .server_register_nspace = pmix1_server_register_nspace, + .server_deregister_nspace = pmix1_server_deregister_nspace, + .server_register_client = pmix1_server_register_client, + .server_deregister_client = pmix1_server_deregister_client, + .server_setup_fork = pmix1_server_setup_fork, + .server_dmodex_request = pmix1_server_dmodex, + .server_notify_error = pmix1_server_notify_error, + /* utility APIs */ + .get_version = PMIx_Get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = pmix1_store_local, + .get_nspace = pmix1_get_nspace, + .register_jobid = pmix1_register_jobid +}; + +static const char *pmix1_get_nspace(opal_jobid_t jobid) +{ + opal_pmix1_jobid_trkr_t *jptr; + + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + +static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace) +{ + opal_pmix1_jobid_trkr_t *jptr; + + /* if we don't already have it, add this to our jobid tracker */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return; + } + } + jptr = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); + jptr->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &jptr->super); +} + +pmix_status_t pmix1_convert_opalrc(int rc) +{ + switch (rc) { + case OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER: + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + case OPAL_ERR_COMM_FAILURE: + return PMIX_ERR_COMM_FAILURE; + case OPAL_ERR_NOT_IMPLEMENTED: + return PMIX_ERR_NOT_IMPLEMENTED; + case OPAL_ERR_NOT_SUPPORTED: + return PMIX_ERR_NOT_SUPPORTED; + case OPAL_ERR_NOT_FOUND: + return PMIX_ERR_NOT_FOUND; + case OPAL_ERR_SERVER_NOT_AVAIL: + return PMIX_ERR_SERVER_NOT_AVAIL; + + case OPAL_ERR_BAD_PARAM: + return PMIX_ERR_BAD_PARAM; + case OPAL_ERR_OUT_OF_RESOURCE: + return PMIX_ERR_NOMEM; + + case OPAL_ERR_DATA_VALUE_NOT_FOUND: + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + case OPAL_ERR_IN_ERRNO: + return PMIX_ERR_IN_ERRNO; + case OPAL_ERR_UNREACH: + return PMIX_ERR_UNREACH; + case OPAL_ERR_TIMEOUT: + return PMIX_ERR_TIMEOUT; + case OPAL_ERR_PERM: + return PMIX_ERR_NO_PERMISSIONS; + case OPAL_ERR_PACK_MISMATCH: + return PMIX_ERR_PACK_MISMATCH; + case OPAL_ERR_PACK_FAILURE: + return PMIX_ERR_PACK_FAILURE; + + case OPAL_ERR_UNPACK_FAILURE: + return PMIX_ERR_UNPACK_FAILURE; + case OPAL_ERR_UNPACK_INADEQUATE_SPACE: + return PMIX_ERR_UNPACK_INADEQUATE_SPACE; + case OPAL_ERR_TYPE_MISMATCH: + return PMIX_ERR_TYPE_MISMATCH; + case OPAL_ERR_PROC_ENTRY_NOT_FOUND: + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + case OPAL_ERR_UNKNOWN_DATA_TYPE: + return PMIX_ERR_UNKNOWN_DATA_TYPE; + case OPAL_ERR_WOULD_BLOCK: + return PMIX_ERR_WOULD_BLOCK; + case OPAL_EXISTS: + return PMIX_EXISTS; + + case OPAL_ERR_SILENT: + return PMIX_ERR_SILENT; + case OPAL_ERROR: + return PMIX_ERROR; + case OPAL_SUCCESS: + return PMIX_SUCCESS; + default: + return PMIX_ERROR; + } +} + +int pmix1_convert_rc(pmix_status_t rc) +{ + switch (rc) { + case PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER: + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + case PMIX_ERR_COMM_FAILURE: + return OPAL_ERR_COMM_FAILURE; + case PMIX_ERR_NOT_IMPLEMENTED: + return OPAL_ERR_NOT_IMPLEMENTED; + case PMIX_ERR_NOT_SUPPORTED: + return OPAL_ERR_NOT_SUPPORTED; + case PMIX_ERR_NOT_FOUND: + return OPAL_ERR_NOT_FOUND; + case PMIX_ERR_SERVER_NOT_AVAIL: + return OPAL_ERR_SERVER_NOT_AVAIL; + + case PMIX_ERR_INVALID_NAMESPACE: + case PMIX_ERR_INVALID_SIZE: + case PMIX_ERR_INVALID_KEYVALP: + case PMIX_ERR_INVALID_NUM_PARSED: + case PMIX_ERR_INVALID_ARGS: + case PMIX_ERR_INVALID_NUM_ARGS: + case PMIX_ERR_INVALID_LENGTH: + case PMIX_ERR_INVALID_VAL_LENGTH: + case PMIX_ERR_INVALID_VAL: + case PMIX_ERR_INVALID_KEY_LENGTH: + case PMIX_ERR_INVALID_KEY: + case PMIX_ERR_INVALID_ARG: + return OPAL_ERR_BAD_PARAM; + case PMIX_ERR_NOMEM: + return OPAL_ERR_OUT_OF_RESOURCE; + case PMIX_ERR_INIT: + return OPAL_ERROR; + + case PMIX_ERR_DATA_VALUE_NOT_FOUND: + return OPAL_ERR_DATA_VALUE_NOT_FOUND; + case PMIX_ERR_OUT_OF_RESOURCE: + return OPAL_ERR_OUT_OF_RESOURCE; + case PMIX_ERR_RESOURCE_BUSY: + return OPAL_ERR_TEMP_OUT_OF_RESOURCE; + case PMIX_ERR_BAD_PARAM: + return OPAL_ERR_BAD_PARAM; + case PMIX_ERR_IN_ERRNO: + return OPAL_ERR_IN_ERRNO; + case PMIX_ERR_UNREACH: + return OPAL_ERR_UNREACH; + case PMIX_ERR_TIMEOUT: + return OPAL_ERR_TIMEOUT; + case PMIX_ERR_NO_PERMISSIONS: + return OPAL_ERR_PERM; + case PMIX_ERR_PACK_MISMATCH: + return OPAL_ERR_PACK_MISMATCH; + case PMIX_ERR_PACK_FAILURE: + return OPAL_ERR_PACK_FAILURE; + + case PMIX_ERR_UNPACK_FAILURE: + return OPAL_ERR_UNPACK_FAILURE; + case PMIX_ERR_UNPACK_INADEQUATE_SPACE: + return OPAL_ERR_UNPACK_INADEQUATE_SPACE; + case PMIX_ERR_TYPE_MISMATCH: + return OPAL_ERR_TYPE_MISMATCH; + case PMIX_ERR_PROC_ENTRY_NOT_FOUND: + return OPAL_ERR_PROC_ENTRY_NOT_FOUND; + case PMIX_ERR_UNKNOWN_DATA_TYPE: + return OPAL_ERR_UNKNOWN_DATA_TYPE; + case PMIX_ERR_WOULD_BLOCK: + return OPAL_ERR_WOULD_BLOCK; + case PMIX_ERR_READY_FOR_HANDSHAKE: + case PMIX_ERR_HANDSHAKE_FAILED: + case PMIX_ERR_INVALID_CRED: + return OPAL_ERR_COMM_FAILURE; + case PMIX_EXISTS: + return OPAL_EXISTS; + + case PMIX_ERR_SILENT: + return OPAL_ERR_SILENT; + case PMIX_ERROR: + return OPAL_ERROR; + case PMIX_SUCCESS: + return OPAL_SUCCESS; + default: + return OPAL_ERROR; + } +} + +void pmix1_value_load(pmix_value_t *v, + opal_value_t *kv) +{ + switch(kv->type) { + case OPAL_UNDEF: + v->type = PMIX_UNDEF; + opal_output(0, "TYPE WAS UNDEF"); + break; + case OPAL_BOOL: + v->type = PMIX_BOOL; + memcpy(&(v->data.flag), &kv->data.flag, 1); + break; + case OPAL_BYTE: + v->type = PMIX_BYTE; + memcpy(&(v->data.byte), &kv->data.byte, 1); + break; + case OPAL_STRING: + v->type = PMIX_STRING; + if (NULL != kv->data.string) { + v->data.string = strdup(kv->data.string); + } else { + v->data.string = NULL; + } + break; + case OPAL_SIZE: + v->type = PMIX_SIZE; + v->data.size = (size_t)kv->data.size; + break; + case OPAL_PID: + v->type = PMIX_PID; + memcpy(&(v->data.pid), &kv->data.pid, sizeof(pid_t)); + break; + case OPAL_INT: + v->type = PMIX_INT; + memcpy(&(v->data.integer), &kv->data.integer, sizeof(int)); + break; + case OPAL_INT8: + v->type = PMIX_INT8; + memcpy(&(v->data.int8), &kv->data.int8, 1); + break; + case OPAL_INT16: + v->type = PMIX_INT16; + memcpy(&(v->data.int16), &kv->data.int16, 2); + break; + case OPAL_INT32: + v->type = PMIX_INT32; + memcpy(&(v->data.int32), &kv->data.int32, 4); + break; + case OPAL_INT64: + v->type = PMIX_INT64; + memcpy(&(v->data.int64), &kv->data.int64, 8); + break; + case OPAL_UINT: + v->type = PMIX_UINT; + memcpy(&(v->data.uint), &kv->data.uint, sizeof(int)); + break; + case OPAL_UINT8: + v->type = PMIX_UINT8; + memcpy(&(v->data.uint8), &kv->data.uint8, 1); + break; + case OPAL_UINT16: + v->type = PMIX_UINT16; + memcpy(&(v->data.uint16), &kv->data.uint16, 2); + break; + case OPAL_UINT32: + v->type = PMIX_UINT32; + memcpy(&(v->data.uint32), &kv->data.uint32, 4); + break; + case OPAL_UINT64: + v->type = PMIX_UINT64; + memcpy(&(v->data.uint64), &kv->data.uint64, 8); + break; + case OPAL_FLOAT: + v->type = PMIX_FLOAT; + memcpy(&(v->data.fval), &kv->data.fval, sizeof(float)); + break; + case OPAL_DOUBLE: + v->type = PMIX_DOUBLE; + memcpy(&(v->data.dval), &kv->data.dval, sizeof(double)); + break; + case OPAL_TIMEVAL: + v->type = PMIX_TIMEVAL; + memcpy(&(v->data.tv), &kv->data.tv, sizeof(struct timeval)); + break; + case OPAL_BYTE_OBJECT: + v->type = PMIX_BYTE_OBJECT; + if (NULL != kv->data.bo.bytes) { + v->data.bo.bytes = (char*)malloc(kv->data.bo.size); + memcpy(v->data.bo.bytes, kv->data.bo.bytes, kv->data.bo.size); + v->data.bo.size = (size_t)kv->data.bo.size; + } else { + v->data.bo.bytes = NULL; + v->data.bo.size = 0; + } + break; + default: + /* silence warnings */ + break; + } +} + +int pmix1_value_unload(opal_value_t *kv, + const pmix_value_t *v) +{ + int rc=OPAL_SUCCESS; + + + switch(v->type) { + case PMIX_UNDEF: + rc = OPAL_ERR_UNKNOWN_DATA_TYPE; + break; + case PMIX_BOOL: + kv->type = OPAL_BOOL; + memcpy(&kv->data.flag, &(v->data.flag), 1); + break; + case PMIX_BYTE: + kv->type = OPAL_BYTE; + memcpy(&kv->data.byte, &(v->data.byte), 1); + break; + case PMIX_STRING: + kv->type = OPAL_STRING; + if (NULL != v->data.string) { + kv->data.string = strdup(v->data.string); + } + break; + case PMIX_SIZE: + kv->type = OPAL_SIZE; + kv->data.size = (int)v->data.size; + break; + case PMIX_PID: + kv->type = OPAL_PID; + memcpy(&kv->data.pid, &(v->data.pid), sizeof(pid_t)); + break; + case PMIX_INT: + kv->type = OPAL_INT; + memcpy(&kv->data.integer, &(v->data.integer), sizeof(int)); + break; + case PMIX_INT8: + kv->type = OPAL_INT8; + memcpy(&kv->data.int8, &(v->data.int8), 1); + break; + case PMIX_INT16: + kv->type = OPAL_INT16; + memcpy(&kv->data.int16, &(v->data.int16), 2); + break; + case PMIX_INT32: + kv->type = OPAL_INT32; + memcpy(&kv->data.int32, &(v->data.int32), 4); + break; + case PMIX_INT64: + kv->type = OPAL_INT64; + memcpy(&kv->data, &(v->data.int64), 8); + break; + case PMIX_UINT: + kv->type = OPAL_UINT; + memcpy(&kv->data, &(v->data.uint), sizeof(int)); + break; + case PMIX_UINT8: + kv->type = OPAL_UINT8; + memcpy(&kv->data, &(v->data.uint8), 1); + break; + case PMIX_UINT16: + kv->type = OPAL_UINT16; + memcpy(&kv->data, &(v->data.uint16), 2); + break; + case PMIX_UINT32: + kv->type = OPAL_UINT32; + memcpy(&kv->data, &(v->data.uint32), 4); + break; + case PMIX_UINT64: + kv->type = OPAL_UINT64; + memcpy(&kv->data, &(v->data.uint64), 8); + break; + case PMIX_FLOAT: + kv->type = OPAL_FLOAT; + memcpy(&kv->data, &(v->data.fval), sizeof(float)); + break; + case PMIX_DOUBLE: + kv->type = OPAL_DOUBLE; + memcpy(&kv->data, &(v->data.dval), sizeof(double)); + break; + case PMIX_TIMEVAL: + kv->type = OPAL_TIMEVAL; + memcpy(&kv->data, &(v->data.tv), sizeof(struct timeval)); + break; + case PMIX_BYTE_OBJECT: + kv->type = OPAL_BYTE_OBJECT; + if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) { + kv->data.bo.bytes = (uint8_t*)malloc(v->data.bo.size); + memcpy(kv->data.bo.bytes, v->data.bo.bytes, v->data.bo.size); + kv->data.bo.size = (int)v->data.bo.size; + } else { + kv->data.bo.bytes = NULL; + kv->data.bo.size = 0; + } + break; + default: + /* silence warnings */ + rc = OPAL_ERROR; + break; + } + return rc; +} + + +/**** INSTANTIATE INTERNAL CLASSES ****/ +OBJ_CLASS_INSTANCE(opal_pmix1_jobid_trkr_t, + opal_list_item_t, + NULL, NULL); + +static void opcon(pmix1_opcaddy_t *p) +{ + memset(&p->p, 0, sizeof(pmix_proc_t)); + p->procs = NULL; + p->nprocs = 0; + p->error_procs = NULL; + p->nerror_procs = 0; + p->info = NULL; + p->ninfo = 0; + p->apps = NULL; + p->sz = 0; + p->opcbfunc = NULL; + p->mdxcbfunc = NULL; + p->valcbfunc = NULL; + p->lkcbfunc = NULL; + p->spcbfunc = NULL; + p->cbdata = NULL; +} +static void opdes(pmix1_opcaddy_t *p) +{ + if (NULL != p->procs) { + PMIX_PROC_FREE(p->procs, p->nprocs); + } + if (NULL != p->error_procs) { + PMIX_PROC_FREE(p->error_procs, p->nerror_procs); + } + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->sz); + } + if (NULL != p->apps) { + PMIX_APP_FREE(p->apps, p->sz); + } +} +OBJ_CLASS_INSTANCE(pmix1_opcaddy_t, + opal_object_t, + opcon, opdes); + +static void ocadcon(pmix1_opalcaddy_t *p) +{ + OBJ_CONSTRUCT(&p->procs, opal_list_t); + OBJ_CONSTRUCT(&p->info, opal_list_t); + OBJ_CONSTRUCT(&p->apps, opal_list_t); + p->opcbfunc = NULL; + p->dmdxfunc = NULL; + p->mdxcbfunc = NULL; + p->lkupcbfunc = NULL; + p->spwncbfunc = NULL; + p->cbdata = NULL; + p->odmdxfunc = NULL; + p->ocbdata = NULL; +} +static void ocaddes(pmix1_opalcaddy_t *p) +{ + OPAL_LIST_DESTRUCT(&p->procs); + OPAL_LIST_DESTRUCT(&p->info); + OPAL_LIST_DESTRUCT(&p->apps); +} +OBJ_CLASS_INSTANCE(pmix1_opalcaddy_t, + opal_object_t, + ocadcon, ocaddes); diff --git a/opal/mca/pmix/external/pmix_ext.h b/opal/mca/pmix/external/pmix_ext.h new file mode 100644 index 00000000000..2d658fb07ed --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PMIX_EXTERNAL_H +#define MCA_PMIX_EXTERNAL_H + +#include "opal_config.h" + +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif + +#include "opal/mca/mca.h" +#include "opal/mca/event/event.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/pmix.h" +#include "pmix_server.h" +#include "pmix_server.h" +#include "pmix/pmix_common.h" + +BEGIN_C_DECLS + +typedef struct { + opal_pmix_base_component_t super; + opal_list_t jobids; + bool native_launch; +} mca_pmix_external_component_t; + +OPAL_DECLSPEC extern mca_pmix_external_component_t mca_pmix_external_component; + +OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_external_module; + +/**** INTERNAL OBJECTS ****/ +typedef struct { + opal_list_item_t super; + opal_jobid_t jobid; + char nspace[PMIX_MAX_NSLEN + 1]; +} opal_pmix1_jobid_trkr_t; +OBJ_CLASS_DECLARATION(opal_pmix1_jobid_trkr_t); + +typedef struct { + opal_object_t super; + pmix_proc_t p; + pmix_proc_t *procs; + size_t nprocs; + pmix_proc_t *error_procs; + size_t nerror_procs; + pmix_info_t *info; + size_t ninfo; + pmix_app_t *apps; + size_t sz; + opal_pmix_op_cbfunc_t opcbfunc; + opal_pmix_modex_cbfunc_t mdxcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; + opal_pmix_spawn_cbfunc_t spcbfunc; + void *cbdata; +} pmix1_opcaddy_t; +OBJ_CLASS_DECLARATION(pmix1_opcaddy_t); + +typedef struct { + opal_object_t super; + opal_list_t procs; + opal_list_t info; + opal_list_t apps; + pmix_op_cbfunc_t opcbfunc; + pmix_dmodex_response_fn_t dmdxfunc; + pmix_modex_cbfunc_t mdxcbfunc; + pmix_lookup_cbfunc_t lkupcbfunc; + pmix_spawn_cbfunc_t spwncbfunc; + void *cbdata; + opal_pmix_release_cbfunc_t odmdxfunc; + void *ocbdata; +} pmix1_opalcaddy_t; +OBJ_CLASS_DECLARATION(pmix1_opalcaddy_t); + + +/**** CLIENT FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix1_client_init(void); +OPAL_MODULE_DECLSPEC int pmix1_client_finalize(void); +OPAL_MODULE_DECLSPEC int pmix1_initialized(void); +OPAL_MODULE_DECLSPEC int pmix1_abort(int flag, const char *msg, + opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_commit(void); +OPAL_MODULE_DECLSPEC int pmix1_fence(opal_list_t *procs, int collect_data); +OPAL_MODULE_DECLSPEC int pmix1_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_put(opal_pmix_scope_t scope, + opal_value_t *val); +OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val); +OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_publish(opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_lookup(opal_list_t *data, opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_lookupnb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_unpublish(char **keys, opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); +OPAL_MODULE_DECLSPEC int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_connect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_connectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_disconnect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_disconnectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, + opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist); + +/**** COMMON FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc, + opal_value_t *val); + +/**** SERVER SOUTHBOUND FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void); +OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex); +OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn); +OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_nspace(opal_jobid_t jobid); +OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_client(const opal_process_name_t *proc); +OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env); +OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_server_notify_error(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + + +/**** COMPONENT UTILITY FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC pmix_status_t pmix1_convert_opalrc(int rc); +OPAL_MODULE_DECLSPEC int pmix1_convert_rc(pmix_status_t rc); +OPAL_MODULE_DECLSPEC void pmix1_value_load(pmix_value_t *v, + opal_value_t *kv); +OPAL_MODULE_DECLSPEC int pmix1_value_unload(opal_value_t *kv, + const pmix_value_t *v); + +END_C_DECLS + +#endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/external/pmix_ext_client.c b/opal/mca/pmix/external/pmix_ext_client.c new file mode 100644 index 00000000000..fb4fd6fcb79 --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_client.c @@ -0,0 +1,1314 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/hash_string.h" +#include "opal/util/argv.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext.h" +#include "pmix.h" +#include "src/buffer_ops/buffer_ops.h" + +static pmix_proc_t my_proc; +static char *dbgvalue=NULL; +static int errhdler_ref = 0; + +static void completion_handler (void * cbdata) { + int * cond = (int *)cbdata; + *cond = 0; +} + +#define PMIX_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0); + +static void myerr(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo) +{ + int rc; + opal_list_t plist, ilist; + opal_namelist_t *nm; + opal_value_t *iptr; + size_t n; + volatile int cond = 1; + + /* convert the incoming status */ + rc = pmix1_convert_rc(status); + + /* convert the array of procs */ + OBJ_CONSTRUCT(&plist, opal_list_t); + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); + nm->name.vpid = procs[n].rank; + opal_list_append(&plist, &nm->super); + } + + /* convert the array of info */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(info[n].key); + pmix1_value_unload(iptr, &info[n].value); + opal_list_append(&plist, &iptr->super); + } + + /* call the base errhandler */ + opal_pmix_base_errhandler(rc, &plist, &ilist, completion_handler, (void *)&cond); + PMIX_WAIT_FOR_COMPLETION(cond); + + OPAL_LIST_DESTRUCT(&plist); + OPAL_LIST_DESTRUCT(&ilist); +} + +static void errreg_cbfunc (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%d", + status, errhandler_ref); +} + +int pmix1_client_init(void) +{ + opal_process_name_t pname; + pmix_status_t rc; + int dbg; + opal_pmix1_jobid_trkr_t *job; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client init"); + + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + rc = PMIx_Init(&my_proc); + if (PMIX_SUCCESS != rc) { + return pmix1_convert_rc(rc); + } + + /* store our jobid and rank */ + if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + mca_pmix_external_component.native_launch = true; + opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(my_proc.nspace, pname.jobid); + } + /* insert this into our list of jobids - it will be the + * first, and so we'll check it first */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); + job->jobid = pname.jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + + pname.vpid = my_proc.rank; + opal_proc_set_name(&pname); + + /* register the errhandler */ + PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL ); + return OPAL_SUCCESS; + +} + +int pmix1_client_finalize(void) +{ + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client finalize"); + + /* deregister the errhandler */ + PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); + + rc = PMIx_Finalize(); + + return pmix1_convert_rc(rc); +} + +int pmix1_initialized(void) +{ + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client initialized"); + + rc = PMIx_Initialized(); + return pmix1_convert_rc(rc); +} + +int pmix1_abort(int flag, const char *msg, + opal_list_t *procs) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client abort"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + + /* call the library abort */ + rc = PMIx_Abort(flag, msg, parray, cnt); + + /* release the array */ + PMIX_PROC_FREE(parray, cnt); + + return pmix1_convert_rc(rc); +} + +int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val) +{ + pmix_value_t kv; + pmix_status_t rc; + pmix_proc_t p; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + } else { + /* use our name */ + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = OPAL_PROC_MY_NAME.vpid; + } + + PMIX_VALUE_CONSTRUCT(&kv); + pmix1_value_load(&kv, val); + + rc = PMIx_Store_internal(&p, val->key, &kv); + PMIX_VALUE_DESTRUCT(&kv); + + return pmix1_convert_rc(rc); +} + +int pmix1_commit(void) +{ + pmix_status_t rc; + + rc = PMIx_Commit(); + return pmix1_convert_rc(rc); +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix1_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix1_fence(opal_list_t *procs, int collect_data) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix_info_t info, *iptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client fence"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + if (collect_data) { + PMIX_INFO_CONSTRUCT(&info); + (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); + info.value.type = PMIX_BOOL; + info.value.data.flag = true; + iptr = &info; + n = 1; + } else { + iptr = NULL; + n = 0; + } + + /* call the library function */ + rc = PMIx_Fence(parray, cnt, iptr, n); + + /* release the array */ + PMIX_PROC_FREE(parray, cnt); + if (NULL != iptr) { + PMIX_INFO_DESTRUCT(&info); + } + + return pmix1_convert_rc(rc); + +} + +int pmix1_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix1_opcaddy_t *op; + pmix_info_t info, *iptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client fence_nb"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + + if (collect_data) { + PMIX_INFO_CONSTRUCT(&info); + (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); + iptr = &info; + n = 1; + } else { + iptr = NULL; + n = 0; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->procs = parray; + op->nprocs = cnt; + + /* call the library function */ + rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + + return pmix1_convert_rc(rc); + +} + +int pmix1_put(opal_pmix_scope_t scope, + opal_value_t *val) +{ + pmix_value_t kv; + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client put"); + + PMIX_VALUE_CONSTRUCT(&kv); + pmix1_value_load(&kv, val); + + rc = PMIx_Put(scope, val->key, &kv); + PMIX_VALUE_DESTRUCT(&kv); + return pmix1_convert_rc(rc); +} + +int pmix1_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val) +{ + int ret; + pmix_value_t *kv; + pmix_status_t rc; + pmix_proc_t p, *pptr; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *ival; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s PMIx_client get on proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + + /* prep default response */ + *val = NULL; + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + pptr = &p; + } else { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + (*val) = OBJ_NEW(opal_value_t); + (*val)->type = OPAL_UINT32; + (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + return OPAL_SUCCESS; + } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + (*val) = OBJ_NEW(opal_value_t); + (*val)->type = OPAL_INT; + (*val)->data.integer = my_proc.rank; + return OPAL_SUCCESS; + } + pptr = NULL; + } + + if (NULL != info) { + ninfo = opal_list_get_size(info); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, ival); + } + } else { + pinfo = NULL; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + /* pass the request down */ + rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + if (PMIX_SUCCESS == rc) { + if (NULL == kv) { + ret = OPAL_SUCCESS; + } else { + *val = OBJ_NEW(opal_value_t); + ret = pmix1_value_unload(*val, kv); + PMIX_VALUE_FREE(kv, 1); + } + } else { + ret = pmix1_convert_rc(rc); + } + PMIX_INFO_FREE(pinfo, ninfo); + return ret; +} + +static void val_cbfunc(pmix_status_t status, + pmix_value_t *kv, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + int rc; + opal_value_t val, *v=NULL; + + rc = pmix1_convert_opalrc(status); + if (PMIX_SUCCESS == status && NULL != kv) { + rc = pmix1_value_unload(&val, kv); + v = &val; + } + + if (NULL != op->valcbfunc) { + op->valcbfunc(rc, v, op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix1_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opcaddy_t *op; + pmix_status_t rc; + size_t n; + opal_value_t *ival; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s PMIx_client get_nb on proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->valcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); + op->p.rank = proc->vpid; + } else { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = PMIX_RANK_WILDCARD; + } + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, ival); + } + } + } + + /* call the library function */ + rc = PMIx_Get_nb(&op->p, key, op->info, op->sz, val_cbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + + return pmix1_convert_rc(rc); +} + +int pmix1_publish(opal_list_t *info) +{ + pmix_info_t *pinfo; + pmix_status_t ret; + opal_value_t *iptr; + size_t sz, n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client publish"); + + if (NULL == info) { + return OPAL_ERR_BAD_PARAM; + } + + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + } + + ret = PMIx_Publish(pinfo, sz); + + return pmix1_convert_rc(ret); +} + +int pmix1_publishnb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + opal_value_t *iptr; + size_t n; + pmix1_opcaddy_t *op; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client publish_nb"); + + if (NULL == info) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; + } + } + + ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_lookup(opal_list_t *data, opal_list_t *info) +{ + pmix_pdata_t *pdata; + pmix_info_t *pinfo; + size_t sz, ninfo, n; + int rc; + pmix_status_t ret; + opal_pmix_pdata_t *d; + opal_value_t *iptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client lookup"); + + if (NULL == data) { + return OPAL_ERR_BAD_PARAM; + } + + sz = opal_list_get_size(data); + PMIX_PDATA_CREATE(pdata, sz); + n=0; + OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { + (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + } + + if (NULL != info) { + ninfo = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); + PMIX_INFO_FREE(pinfo, ninfo); + + if (PMIX_SUCCESS == ret) { + /* transfer the data back */ + n=0; + OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(pdata[n].proc.nspace, d->proc.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == d->proc.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); + job->jobid = d->proc.jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) { + d->proc.vpid = OPAL_VPID_WILDCARD; + } else { + d->proc.vpid = pdata[n].proc.rank; + } + rc = pmix1_value_unload(&d->value, &pdata[n].value); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + PMIX_PDATA_FREE(pdata, sz); + return OPAL_ERR_BAD_PARAM; + } + ++n; + } + } + + return pmix1_convert_rc(ret); +} + +static void lk_cbfunc(pmix_status_t status, + pmix_pdata_t data[], size_t ndata, + void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + opal_pmix_pdata_t *d; + opal_list_t results, *r = NULL; + int rc; + size_t n; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (NULL == op->lkcbfunc) { + OBJ_RELEASE(op); + return; + } + + rc = pmix1_convert_rc(status); + if (OPAL_SUCCESS == rc) { + OBJ_CONSTRUCT(&results, opal_list_t); + for (n=0; n < ndata; n++) { + d = OBJ_NEW(opal_pmix_pdata_t); + opal_list_append(&results, &d->super); + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(data[n].proc.nspace, d->proc.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == d->proc.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); + job->jobid = d->proc.jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + if (PMIX_RANK_WILDCARD == data[n].proc.rank) { + d->proc.vpid = OPAL_VPID_WILDCARD; + } else { + d->proc.vpid = data[n].proc.rank; + } + d->value.key = strdup(data[n].key); + rc = pmix1_value_unload(&d->value, &data[n].value); + if (OPAL_SUCCESS != rc) { + rc = OPAL_ERR_BAD_PARAM; + OPAL_ERROR_LOG(rc); + goto release; + } + } + r = &results; + } + release: + /* execute the callback */ + op->lkcbfunc(rc, r, op->cbdata); + + if (NULL != r) { + OPAL_LIST_DESTRUCT(&results); + } + OBJ_RELEASE(op); +} + +int pmix1_lookupnb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix1_opcaddy_t *op; + opal_value_t *iptr; + size_t n; + + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client lookup_nb"); + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->lkcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; + } + } + } + + ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_unpublish(char **keys, opal_list_t *info) +{ + pmix_status_t ret; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *iptr; + + if (NULL != info) { + ninfo = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + ret = PMIx_Unpublish(keys, pinfo, ninfo); + PMIX_INFO_FREE(pinfo, ninfo); + + return pmix1_convert_rc(ret); +} + +int pmix1_unpublishnb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix1_opcaddy_t *op; + opal_value_t *iptr; + size_t n; + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; + } + } + } + + ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) +{ + pmix_status_t ret; + pmix_info_t *pinfo = NULL; + pmix_app_t *papps; + size_t napps, n, m, ninfo = 0; + char nspace[PMIX_MAX_NSLEN+1]; + opal_value_t *info; + opal_pmix_app_t *app; + opal_pmix1_jobid_trkr_t *job; + + if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(info, job_info, opal_value_t) { + (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, info); + ++n; + } + } + + napps = opal_list_get_size(apps); + PMIX_APP_CREATE(papps, napps); + n=0; + OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { + papps[n].cmd = strdup(app->cmd); + papps[n].argc = app->argc; + papps[n].argv = opal_argv_copy(app->argv); + papps[n].env = opal_argv_copy(app->env); + papps[n].maxprocs = app->maxprocs; + if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { + PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); + m=0; + OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&papps[n].info[m].value, info); + ++m; + } + } + ++n; + } + + ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == ret) { + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(jobid, nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(nspace, *jobid); + } + /* add this to our jobid tracker */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = *jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + PMIX_APP_FREE(papps, napps); + + return pmix1_convert_rc(ret); +} + +static void spcbfunc(pmix_status_t status, + char *nspace, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + int rc; + opal_jobid_t jobid=OPAL_JOBID_INVALID; + opal_pmix1_jobid_trkr_t *job; + + rc = pmix1_convert_rc(status); + if (PMIX_SUCCESS == status) { + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&jobid, nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(nspace, jobid); + } + /* add this to our jobid tracker */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + + op->spcbfunc(rc, jobid, op->cbdata); + OBJ_RELEASE(op); +} + +int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix1_opcaddy_t *op; + size_t n, m; + opal_value_t *info; + opal_pmix_app_t *app; + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->spcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != job_info && 0 < (op->ninfo = opal_list_get_size(job_info))) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(info, job_info, opal_value_t) { + (void)strncpy(op->info[n].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, info); + ++n; + } + } + + op->sz = opal_list_get_size(apps); + PMIX_APP_CREATE(op->apps, op->sz); + n=0; + OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { + op->apps[n].cmd = strdup(app->cmd); + op->apps[n].argc = app->argc; + op->apps[n].argv = opal_argv_copy(app->argv); + op->apps[n].env = opal_argv_copy(app->env); + op->apps[n].maxprocs = app->maxprocs; + if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { + PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); + m=0; + OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { + (void)strncpy(op->apps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->apps[n].info[m].value, info); + ++m; + } + } + ++n; + } + + ret = PMIx_Spawn_nb(op->info, op->ninfo, op->apps, op->sz, spcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_connect(opal_list_t *procs) +{ + pmix_status_t ret; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + parray[n].rank = PMIX_RANK_WILDCARD; + } else { + parray[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Connect(parray, cnt, NULL, 0); + PMIX_PROC_FREE(parray, cnt); + + return pmix1_convert_rc(ret); +} + +int pmix1_connectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t ret; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix1_opcaddy_t *op; + opal_pmix1_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->nprocs = cnt; + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(op->procs, op->nprocs); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + op->procs[n].rank = PMIX_RANK_WILDCARD; + } else { + op->procs[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_disconnect(opal_list_t *procs) +{ + pmix_status_t ret; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix1_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + parray[n].rank = PMIX_RANK_WILDCARD; + } else { + parray[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Disconnect(parray, cnt, NULL, 0); + PMIX_PROC_FREE(parray, cnt); + + return pmix1_convert_rc(ret); +} + +int pmix1_disconnectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t ret; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix1_opcaddy_t *op; + opal_pmix1_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->nprocs = cnt; + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(op->procs, op->nprocs); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + op->procs[n].rank = PMIX_RANK_WILDCARD; + } else { + op->procs[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + + +int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, + opal_list_t *procs) +{ + char *nspace; + pmix_proc_t *array=NULL; + size_t nprocs, n; + opal_namelist_t *nm; + int rc; + pmix_status_t ret; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (OPAL_JOBID_WILDCARD == jobid) { + nspace = NULL; + } else { + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + nspace = job->nspace; + } + + ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); + rc = pmix1_convert_rc(ret); + + if (NULL != array && 0 < nprocs) { + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(procs, &nm->super); + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(array[n].nspace, nm->name.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == nm->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + nm->name.vpid = array[n].rank; + } + } + PMIX_PROC_FREE(array, nprocs); + + return rc; +} + +int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist) +{ + pmix_status_t ret; + char *nspace=NULL; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (OPAL_JOBID_WILDCARD != jobid) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + nspace = job->nspace; + } + + ret = PMIx_Resolve_nodes(nspace, nodelist); + + return pmix1_convert_rc(ret);; +} diff --git a/opal/mca/pmix/external/pmix_ext_component.c b/opal/mca/pmix/external/pmix_ext_component.c new file mode 100644 index 00000000000..c250fe0703f --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_component.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/class/opal_list.h" +#include "opal/util/proc.h" +#include "opal/mca/pmix/pmix.h" +#include "pmix_ext.h" + +/* + * Public string showing the pmix external component version number + */ +const char *opal_pmix_external_component_version_string = + "OPAL external pmix MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int external_open(void); +static int external_close(void); +static int external_component_query(mca_base_module_t **module, int *priority); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +mca_pmix_external_component_t mca_pmix_external_component = { + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + + .base_version = { + /* Indicate that we are a pmix v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_PMIX_BASE_VERSION_2_0_0, + + /* Component name and version */ + + .mca_component_name = "external", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + + .mca_open_component = external_open, + .mca_close_component = external_close, + .mca_query_component = external_component_query, + }, + /* Next the MCA v1.0.0 component meta data */ + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + }, + .native_launch = false +}; + +static int external_open(void) +{ + OBJ_CONSTRUCT(&mca_pmix_external_component.jobids, opal_list_t); + return OPAL_SUCCESS; +} + +static int external_close(void) +{ + OPAL_LIST_DESTRUCT(&mca_pmix_external_component.jobids); + return OPAL_SUCCESS; +} + + +static int external_component_query(mca_base_module_t **module, int *priority) +{ + char *t, *id; + + /* see if a PMIx server is present */ + if (NULL != (t = getenv("PMIX_SERVER_URI")) || + NULL != (id = getenv("PMIX_ID"))) { + /* if PMIx is present, then we are a client and need to use it */ + *priority = 100; + } else { + /* we could be a server, so we still need to be considered */ + *priority = 5; + } + *module = (mca_base_module_t *)&opal_pmix_external_module; + return OPAL_SUCCESS; +} diff --git a/opal/mca/pmix/external/pmix_ext_server_north.c b/opal/mca/pmix/external/pmix_ext_server_north.c new file mode 100644 index 00000000000..8b75dcf7013 --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_server_north.c @@ -0,0 +1,780 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext.h" + +#include "pmix.h" +#include "pmix_server.h" + +/**** N.O.R.T.H.B.O.U.N.D I.N.T.E.R.F.A.C.E.S ****/ + +/* These are the interfaces used by the embedded PMIx server + * to call up into ORTE for service requests */ + +static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object); +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_register_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_listener_fn(int listening_sd, + pmix_connection_cbfunc_t cbfunc); + +pmix_server_module_t mymodule = { + server_client_connected_fn, + server_client_finalized_fn, + server_abort_fn, + server_fencenb_fn, + server_dmodex_req_fn, + server_publish_fn, + server_lookup_fn, + server_unpublish_fn, + server_spawn_fn, + server_connect_fn, + server_disconnect_fn, + server_register_events, + server_deregister_events, + server_listener_fn +}; + +opal_pmix_server_module_t *host_module = NULL; + + +static void opal_opcbfunc(int status, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + + if (NULL != opalcaddy->opcbfunc) { + opalcaddy->opcbfunc(pmix1_convert_opalrc(status), opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *server_object) +{ + int rc; + opal_process_name_t proc; + + if (NULL == host_module || NULL == host_module->client_connected) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* pass it up */ + rc = host_module->client_connected(&proc, server_object); + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + + if (NULL == host_module || NULL == host_module->client_finalized) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* pass it up */ + rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + size_t n; + opal_namelist_t *nm; + opal_process_name_t proc; + int rc; + pmix1_opalcaddy_t *opalcaddy; + + if (NULL == host_module || NULL == host_module->abort) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* pass it up */ + rc = host_module->abort(&proc, server_object, status, msg, + &opalcaddy->procs, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix1_convert_opalrc(rc); +} + +static void _data_release(void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + + if (NULL != opalcaddy->odmdxfunc) { + opalcaddy->odmdxfunc(opalcaddy->ocbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static void opmdx_response(int status, const char *data, size_t sz, void *cbdata, + opal_pmix_release_cbfunc_t relcbfunc, void *relcbdata) +{ + pmix_status_t rc; + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + + rc = pmix1_convert_rc(status); + if (NULL != opalcaddy->mdxcbfunc) { + opalcaddy->odmdxfunc = relcbfunc; + opalcaddy->ocbdata = relcbdata; + opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, + _data_release, opalcaddy); + } else { + OBJ_RELEASE(opalcaddy); + } +} + +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy; + size_t n; + opal_namelist_t *nm; + opal_value_t *iptr; + int rc; + + if (NULL == host_module || NULL == host_module->fence_nb) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->mdxcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->fence_nb(&opalcaddy->procs, &opalcaddy->info, + data, ndata, opmdx_response, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == host_module || NULL == host_module->direct_modex) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->mdxcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->direct_modex(&proc, &opalcaddy->info, opmdx_response, opalcaddy); + if (OPAL_SUCCESS != rc && OPAL_ERR_IN_PROCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + if (OPAL_ERR_IN_PROCESS == rc) { + rc = OPAL_SUCCESS; + } + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_publish_fn(const pmix_proc_t *p, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + size_t n; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *oinfo; + + if (NULL == host_module || NULL == host_module->publish) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the info array */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static void opal_lkupcbfunc(int status, + opal_list_t *data, + void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + pmix_status_t rc; + pmix_pdata_t *d=NULL; + size_t nd=0, n; + opal_pmix_pdata_t *p; + + if (NULL != opalcaddy->lkupcbfunc) { + rc = pmix1_convert_opalrc(status); + /* convert any returned data */ + if (NULL != data) { + nd = opal_list_get_size(data); + PMIX_PDATA_CREATE(d, nd); + n=0; + OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { + /* convert the jobid */ + (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); + d[n].proc.rank = p->proc.vpid; + (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); + pmix1_value_load(&d[n].value, &p->value); + } + } + opalcaddy->lkupcbfunc(rc, d, nd, opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == host_module || NULL == host_module->lookup) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->lkupcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + + +static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == host_module || NULL == host_module->unpublish) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static void opal_spncbfunc(int status, opal_jobid_t jobid, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + pmix_status_t rc; + char nspace[PMIX_MAX_NSLEN]; + + if (NULL != opalcaddy->spwncbfunc) { + rc = pmix1_convert_opalrc(status); + /* convert the jobid */ + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + opalcaddy->spwncbfunc(rc, nspace, opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_spawn_fn(const pmix_proc_t *p, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_pmix_app_t *app; + opal_value_t *oinfo; + size_t k, n; + int rc; + + if (NULL == host_module || NULL == host_module->spawn) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->spwncbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the job info */ + for (k=0; k < ninfo; k++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(job_info[k].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &job_info[k].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* convert the apps */ + for (n=0; n < napps; n++) { + app = OBJ_NEW(opal_pmix_app_t); + opal_list_append(&opalcaddy->apps, &app->super); + if (NULL != apps[n].cmd) { + app->cmd = strdup(apps[n].cmd); + } + app->argc = apps[n].argc; + if (NULL != apps[n].argv) { + app->argv = opal_argv_copy(apps[n].argv); + } + if (NULL != apps[n].env) { + app->env = opal_argv_copy(apps[n].env); + } + app->maxprocs = apps[n].maxprocs; + for (k=0; k < apps[n].ninfo; k++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&app->info, &oinfo->super); + oinfo->key = strdup(apps[n].info[k].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &apps[n].info[k].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + } + + /* pass it up */ + rc = host_module->spawn(&proc, &opalcaddy->info, &opalcaddy->apps, opal_spncbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + + +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_namelist_t *nm; + size_t n; + opal_value_t *oinfo; + + if (NULL == host_module || NULL == host_module->connect) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->connect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + + +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_namelist_t *nm; + size_t n; + opal_value_t *oinfo; + + if (NULL == host_module || NULL == host_module->disconnect) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->disconnect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_register_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy; + size_t n; + opal_value_t *oinfo; + int rc; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->register_events(&opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_ERR_NOT_IMPLEMENTED; +} +static pmix_status_t server_listener_fn(int listening_sd, + pmix_connection_cbfunc_t cbfunc) +{ + int rc; + + if (NULL == host_module || NULL == host_module->listener) { + return PMIX_ERR_NOT_SUPPORTED; + } + + rc = host_module->listener(listening_sd, cbfunc); + return pmix1_convert_opalrc(rc); +} diff --git a/opal/mca/pmix/pmix1xx/pmix1_server_south.c b/opal/mca/pmix/external/pmix_ext_server_south.c similarity index 71% rename from opal/mca/pmix/pmix1xx/pmix1_server_south.c rename to opal/mca/pmix/external/pmix_ext_server_south.c index ae42de0082d..2feed763098 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_server_south.c +++ b/opal/mca/pmix/external/pmix_ext_server_south.c @@ -1,8 +1,9 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -34,7 +35,7 @@ #include "opal/util/proc.h" #include "opal/util/show_help.h" #include "opal/mca/pmix/base/base.h" -#include "pmix1.h" +#include "pmix_ext.h" #include "pmix.h" #include "pmix_server.h" @@ -49,6 +50,18 @@ extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; static int errhdler_ref = 0; +static void completion_handler (void * cbdata) { + int * cond = (int *)cbdata; + *cond = 0; +} + +#define PMIX_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0); + static void myerr(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) @@ -57,6 +70,7 @@ static void myerr(pmix_status_t status, opal_list_t plist, ilist; opal_namelist_t *nm; opal_value_t *iptr; + volatile int cond = 1; size_t n; /* convert the incoming status */ @@ -77,11 +91,13 @@ static void myerr(pmix_status_t status, iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(info[n].key); pmix1_value_unload(iptr, &info[n].value); - opal_list_append(&plist, &nm->super); + opal_list_append(&plist, &iptr->super); } /* call the base errhandler */ - opal_pmix_base_errhandler(rc, &plist, &ilist); + opal_pmix_base_errhandler(rc, &plist, &ilist, completion_handler, (void *)&cond); + PMIX_WAIT_FOR_COMPLETION(cond); + OPAL_LIST_DESTRUCT(&plist); OPAL_LIST_DESTRUCT(&ilist); } @@ -96,19 +112,41 @@ static void errreg_cbfunc(pmix_status_t status, status, errhandler_ref); } -int pmix1_server_init(opal_pmix_server_module_t *module) +int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info) { pmix_status_t rc; int dbg; + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); putenv(dbgvalue); } - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { + PMIX_INFO_FREE(pinfo, sz); return pmix1_convert_rc(rc); } + PMIX_INFO_FREE(pinfo, sz); + /* record the host module */ host_module = module; @@ -156,10 +194,10 @@ static void opcbfunc(pmix_status_t status, void *cbdata) } int pmix1_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { opal_value_t *kv, *k2; pmix_info_t *pinfo, *pmap; @@ -168,9 +206,16 @@ int pmix1_server_register_nspace(opal_jobid_t jobid, pmix_status_t rc; pmix1_opcaddy_t *op; opal_list_t *pmapinfo; + opal_pmix1_jobid_trkr_t *job; /* convert the jobid */ - (void)snprintf(nspace, PMIX_MAX_NSLEN, opal_convert_jobid_to_string(jobid)); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + + /* store this job in our list of known nspaces */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -218,12 +263,28 @@ int pmix1_server_register_nspace(opal_jobid_t jobid, return pmix1_convert_rc(rc); } +void pmix1_server_deregister_nspace(opal_jobid_t jobid) +{ + opal_pmix1_jobid_trkr_t *jptr; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + /* found it - tell the server to deregister */ + PMIx_server_deregister_nspace(jptr->nspace); + /* now get rid of it from our list */ + opal_list_remove_item(&mca_pmix_external_component.jobids, &jptr->super); + OBJ_RELEASE(jptr); + return; + } + } +} int pmix1_server_register_client(const opal_process_name_t *proc, - uid_t uid, gid_t gid, - void *server_object, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { pmix_status_t rc; pmix1_opcaddy_t *op; @@ -234,7 +295,7 @@ int pmix1_server_register_client(const opal_process_name_t *proc, op->cbdata = cbdata; /* convert the jobid */ - (void)strncpy(op->p.nspace, opal_convert_jobid_to_string(proc->jobid), PMIX_MAX_NSLEN); + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); op->p.rank = proc->vpid; rc = PMIx_server_register_client(&op->p, uid, gid, server_object, @@ -245,6 +306,23 @@ int pmix1_server_register_client(const opal_process_name_t *proc, return pmix1_convert_rc(rc); } +void pmix1_server_deregister_client(const opal_process_name_t *proc) +{ + opal_pmix1_jobid_trkr_t *jptr; + pmix_proc_t p; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + /* found it - tell the server to deregister */ + (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + PMIx_server_deregister_client(&p); + return; + } + } +} + int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env) { @@ -252,7 +330,7 @@ int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env) pmix_proc_t p; /* convert the jobid */ - (void)strncpy(p.nspace, opal_convert_jobid_to_string(proc->jobid), PMIX_MAX_NSLEN); + (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = proc->vpid; rc = PMIx_server_setup_fork(&p, env); @@ -275,7 +353,7 @@ static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbd } int pmix1_server_dmodex(const opal_process_name_t *proc, - opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) { pmix1_opcaddy_t *op; pmix_status_t rc; @@ -286,7 +364,7 @@ int pmix1_server_dmodex(const opal_process_name_t *proc, op->cbdata = cbdata; /* convert the jobid */ - (void)strncpy(op->p.nspace, opal_convert_jobid_to_string(proc->jobid), PMIX_MAX_NSLEN); + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); op->p.rank = proc->vpid; /* find the internally-cached data for this proc */ @@ -317,7 +395,7 @@ int pmix1_server_notify_error(int status, PMIX_PROC_CREATE(ps, psz); n = 0; OPAL_LIST_FOREACH(nm, procs, opal_namelist_t) { - (void)snprintf(ps[n].nspace, PMIX_MAX_NSLEN, opal_convert_jobid_to_string(nm->name.jobid)); + (void)opal_snprintf_jobid(ps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); ps[n].rank = (int)nm->name.vpid; ++n; } @@ -330,7 +408,7 @@ int pmix1_server_notify_error(int status, PMIX_PROC_CREATE(eps, esz); n = 0; OPAL_LIST_FOREACH(nm, error_procs, opal_namelist_t) { - (void)snprintf(eps[n].nspace, PMIX_MAX_NSLEN, opal_convert_jobid_to_string(nm->name.jobid)); + (void)opal_snprintf_jobid(eps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); eps[n].rank = (int)nm->name.vpid; ++n; } diff --git a/opal/mca/pmix/isolated/Makefile.am b/opal/mca/pmix/isolated/Makefile.am new file mode 100644 index 00000000000..1ecab6374d5 --- /dev/null +++ b/opal/mca/pmix/isolated/Makefile.am @@ -0,0 +1,34 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + pmix_isolated.h \ + pmix_isolated_component.c \ + pmix_isolated.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_isolated_DSO +component_noinst = +component_install = mca_pmix_isolated.la +else +component_noinst = libmca_pmix_isolated.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_isolated_la_SOURCES = $(sources) +mca_pmix_isolated_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_isolated_la_SOURCES =$(sources) +libmca_pmix_isolated_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c new file mode 100644 index 00000000000..e338bcae82e --- /dev/null +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -0,0 +1,438 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All + * rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" + +#include "pmix_isolated.h" +#include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" + + +static int isolated_init(void); +static int isolated_fini(void); +static int isolated_initialized(void); +static int isolated_abort(int flat, const char *msg, + opal_list_t *procs); +static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid); +static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); +static int isolated_job_connect(opal_list_t *procs); +static int isolated_job_disconnect(opal_list_t *procs); +static int isolated_job_disconnect_nb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +static int isolated_resolve_peers(const char *nodename, + opal_jobid_t jobid, + opal_list_t *procs); +static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist); +static int isolated_put(opal_pmix_scope_t scope, opal_value_t *kv); +static int isolated_fence(opal_list_t *procs, int collect_data); +static int isolated_fence_nb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int isolated_commit(void); +static int isolated_get(const opal_process_name_t *id, + const char *key, opal_list_t *info, + opal_value_t **kv); +static int isolated_get_nb(const opal_process_name_t *id, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); +static int isolated_publish(opal_list_t *info); +static int isolated_publish_nb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int isolated_lookup(opal_list_t *data, opal_list_t *info); +static int isolated_lookup_nb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static int isolated_unpublish(char **keys, opal_list_t *info); +static int isolated_unpublish_nb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static const char *isolated_get_version(void); +static int isolated_store_local(const opal_process_name_t *proc, + opal_value_t *val); +static const char *isolated_get_nspace(opal_jobid_t jobid); +static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace); + +const opal_pmix_base_module_t opal_pmix_isolated_module = { + .init = isolated_init, + .finalize = isolated_fini, + .initialized = isolated_initialized, + .abort = isolated_abort, + .commit = isolated_commit, + .fence = isolated_fence, + .fence_nb = isolated_fence_nb, + .put = isolated_put, + .get = isolated_get, + .get_nb = isolated_get_nb, + .publish = isolated_publish, + .publish_nb = isolated_publish_nb, + .lookup = isolated_lookup, + .lookup_nb = isolated_lookup_nb, + .unpublish = isolated_unpublish, + .unpublish_nb = isolated_unpublish_nb, + .spawn = isolated_spawn, + .spawn_nb = isolated_spawn_nb, + .connect = isolated_job_connect, + .disconnect = isolated_job_disconnect, + .disconnect_nb = isolated_job_disconnect_nb, + .resolve_peers = isolated_resolve_peers, + .resolve_nodes = isolated_resolve_nodes, + .get_version = isolated_get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = isolated_store_local, + .get_nspace = isolated_get_nspace, + .register_jobid = isolated_register_jobid +}; + +static int pmix_init_count = 0; +static opal_process_name_t pmix_pname; + +static int isolated_init(void) +{ + int rc; + opal_value_t kv; + + ++pmix_init_count; + + /* store our name in the opal_proc_t so that + * debug messages will make sense - an upper + * layer will eventually overwrite it, but that + * won't do any harm */ + pmix_pname.jobid = 1; + pmix_pname.vpid = 0; + opal_proc_set_name(&pmix_pname); + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:isolated: assigned tmp name %d %d", + OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid); + + // setup hash table + opal_pmix_base_hash_init(); + + /* save the job size */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_JOB_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = 1; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the appnum */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_APPNUM); + kv.type = OPAL_UINT32; + kv.data.uint32 = 0; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_UNIV_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = 1; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_JOBID); + kv.type = OPAL_UINT32; + kv.data.uint32 = 1; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the local size */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = 1; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_PEERS); + kv.type = OPAL_STRING; + kv.data.string = strdup("0"); + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the local leader */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCALLDR); + kv.type = OPAL_UINT64; + kv.data.uint64 = 0; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + + /* save our local rank */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_RANK); + kv.type = OPAL_UINT16; + kv.data.uint16 = 0; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + + /* and our node rank */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_NODE_RANK); + kv.type = OPAL_UINT16; + kv.data.uint16 = 0; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + return OPAL_SUCCESS; + +err_exit: + return rc; +} + +static int isolated_fini(void) +{ + if (0 == pmix_init_count) { + return OPAL_SUCCESS; + } + + if (0 != --pmix_init_count) { + return OPAL_SUCCESS; + } + opal_pmix_base_hash_finalize(); + return OPAL_SUCCESS; +} + +static int isolated_initialized(void) +{ + if (0 < pmix_init_count) { + return 1; + } + return 0; +} + +static int isolated_abort(int flag, const char *msg, + opal_list_t *procs) +{ + return OPAL_SUCCESS; +} + +static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_job_connect(opal_list_t *procs) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_job_disconnect(opal_list_t *procs) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_job_disconnect_nb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_resolve_peers(const char *nodename, + opal_jobid_t jobid, + opal_list_t *procs) +{ + return OPAL_ERR_NOT_IMPLEMENTED; +} + +static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist) +{ + return OPAL_ERR_NOT_IMPLEMENTED; +} + +static int isolated_put(opal_pmix_scope_t scope, + opal_value_t *kv) +{ + int rc; + + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated_put key %s scope %d\n", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); + + if (!pmix_init_count) { + return OPAL_ERROR; + } + + rc = opal_pmix_base_store(&pmix_pname, kv); + + return rc; +} + +static int isolated_commit(void) +{ + return OPAL_SUCCESS; +} + +static int isolated_fence(opal_list_t *procs, int collect_data) +{ + return OPAL_SUCCESS; +} + +static int isolated_fence_nb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_IMPLEMENTED; +} + +static int isolated_get(const opal_process_name_t *id, + const char *key, opal_list_t *info, + opal_value_t **kv) +{ + int rc; + opal_list_t vals; + + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated getting value for proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(*id), key); + + OBJ_CONSTRUCT(&vals, opal_list_t); + rc = opal_pmix_base_fetch(id, key, &vals); + if (OPAL_SUCCESS == rc) { + *kv = (opal_value_t*)opal_list_remove_first(&vals); + return OPAL_SUCCESS; + } else { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated fetch from dstore failed: %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); + } + OPAL_LIST_DESTRUCT(&vals); + + return rc; +} +static int isolated_get_nb(const opal_process_name_t *id, const char *key, + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_IMPLEMENTED; +} + +static int isolated_publish(opal_list_t *info) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_publish_nb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_lookup(opal_list_t *data, opal_list_t *info) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_lookup_nb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_unpublish(char **keys, opal_list_t *info) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int isolated_unpublish_nb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static const char *isolated_get_version(void) +{ + return "N/A"; +} + +static int isolated_store_local(const opal_process_name_t *proc, + opal_value_t *val) +{ + opal_pmix_base_store(proc, val); + + return OPAL_SUCCESS; +} + +static const char *isolated_get_nspace(opal_jobid_t jobid) +{ + return "N/A"; +} + +static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace) +{ + return; +} + diff --git a/opal/mca/pmix/isolated/pmix_isolated.h b/opal/mca/pmix/isolated/pmix_isolated.h new file mode 100644 index 00000000000..f66c657d1d0 --- /dev/null +++ b/opal/mca/pmix/isolated/pmix_isolated.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PMIX_ISOLATED_H +#define MCA_PMIX_ISOLATED_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/pmix/pmix.h" + + +BEGIN_C_DECLS + +OPAL_DECLSPEC extern opal_pmix_base_component_t mca_pmix_isolated_component; + +OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_isolated_module; + + +END_C_DECLS + +#endif /* MCA_PMIX_ISOLATED_H */ diff --git a/opal/mca/pmix/isolated/pmix_isolated_component.c b/opal/mca/pmix/isolated/pmix_isolated_component.c new file mode 100644 index 00000000000..73c32602b09 --- /dev/null +++ b/opal/mca/pmix/isolated/pmix_isolated_component.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/class/opal_list.h" +#include "opal/util/proc.h" +#include "opal/mca/pmix/pmix.h" +#include "pmix_isolated.h" + +/* + * Public string showing the pmix isolated component version number + */ +const char *opal_pmix_isolated_component_version_string = + "OPAL isolated pmix MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int isolated_open(void); +static int isolated_close(void); +static int isolated_component_query(mca_base_module_t **module, int *priority); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +opal_pmix_base_component_t mca_pmix_isolated_component = { + .base_version = { + /* Indicate that we are a pmix v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_PMIX_BASE_VERSION_2_0_0, + + /* Component name and version */ + + .mca_component_name = "isolated", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + + .mca_open_component = isolated_open, + .mca_close_component = isolated_close, + .mca_query_component = isolated_component_query, + }, + /* Next the MCA v1.0.0 component meta data */ + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + +static int isolated_open(void) +{ + return OPAL_SUCCESS; +} + +static int isolated_close(void) +{ + return OPAL_SUCCESS; +} + + +static int isolated_component_query(mca_base_module_t **module, int *priority) +{ + /* if we are in a Singularity container, then we cannot spawn an + * HNP and are truly on our own and cannot call comm_spawn or + * any of its friends */ + if (NULL != getenv("SINGULARITY_CONTAINER")) { + *priority = 100; + *module = (mca_base_module_t *)&opal_pmix_isolated_module; + return OPAL_SUCCESS; + } + /* otherwise, ignore us */ + *priority = 0; + *module = NULL; + return OPAL_ERR_TAKE_NEXT_OPTION; +} diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index f265e0157f9..faf65c6578a 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -36,6 +36,8 @@ BEGIN_C_DECLS /* provide access to the framework verbose output without * exposing the entire base */ extern int opal_pmix_verbose_output; +extern bool opal_pmix_collect_all_data; +extern bool opal_pmix_base_async_modex; extern int opal_pmix_base_exchange(opal_value_t *info, opal_pmix_pdata_t *pdat, int timeout); @@ -254,10 +256,13 @@ extern int opal_pmix_base_exchange(opal_value_t *info, * that takes into account directives and availability of * non-blocking operations */ -#define OPAL_MODEX(p, s) \ - do { \ - opal_pmix.commit(); \ - opal_pmix.fence((p), (s)); \ +#define OPAL_MODEX() \ + do { \ + opal_pmix.commit(); \ + if (!opal_pmix_base_async_modex) { \ + opal_pmix.fence(NULL, \ + opal_pmix_collect_all_data); \ + } \ } while(0); /** @@ -275,10 +280,6 @@ extern int opal_pmix_base_exchange(opal_value_t *info, } while(0); -/* callback handler for errors */ -typedef void (*opal_pmix_errhandler_fn_t)(int error); - - /************************************************************ * CLIENT APIs * ************************************************************/ @@ -533,8 +534,11 @@ typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char * SERVER APIs * ************************************************************/ -/* Initialize the server support library */ -typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module); +/* Initialize the server support library - must pass the callback + * module for the server to use, plus any attributes we want to + * pass down to it */ +typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module, + opal_list_t *info); /* Finalize the server support library */ typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void); @@ -601,6 +605,13 @@ typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jo opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister an nspace. Instruct the PMIx server to purge + * all info relating to the provided jobid so that memory + * can be freed. Note that the server will automatically + * purge all info relating to any clients it has from + * this nspace */ +typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid); + /* Register a client process with the PMIx server library. The * expected user ID and group ID of the child process helps the * server library to properly authenticate clients as they connect @@ -620,6 +631,15 @@ typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_proc opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister a client. Instruct the PMIx server to purge + * all info relating to the provided client so that memory + * can be freed. As per above note, the server will automatically + * free all client-related data when the nspace is deregistered, + * so there is no need to call this function during normal + * finalize operations. Instead, this is provided for use + * during exception operations */ +typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc); + /* Setup the environment of a child process to be forked * by the host so it can correctly interact with the PMIx * server. The PMIx client needs some setup information @@ -666,17 +686,6 @@ typedef int (*opal_pmix_base_module_server_dmodex_request_fn_t)(const opal_proce * The info array contains any further info the RM can and/or chooses * to provide. * - * If the payload and size parameters are non-NULL, then the function - * will assume that the caller intends to send the message itself. In - * this situation, the convenience library will simply pack the message - * for transmission, and return the payload and size in the provided - * variables (external comm should have been indicated during server_init). - * The caller will be responsible for thread protection. - * - * Otherwise, the convenience library will transmit the message to - * the identified target processes, and the function call will be - * internally thread protected. - * * The callback function will be called upon completion of the * notify_error function's actions. Note that any messages will * have been queued, but may not have been transmitted by this @@ -696,11 +705,92 @@ typedef int (*opal_pmix_base_module_server_notify_error_fn_t)(int status, /* get the version of the embedded library */ typedef const char* (*opal_pmix_base_module_get_version_fn_t)(void); -/* register an errhandler to report loss of connection to the server */ -typedef void (*opal_pmix_base_module_register_fn_t)(opal_pmix_errhandler_fn_t errhandler); +/* Register an errhandler to report errors. Three types of errors + * can be reported: + * + * (a) those that occur within the client library, but are not + * reportable via the API itself (e.g., loss of connection to + * the server). These errors typically occur during behind-the-scenes + * non-blocking operations. + * + * (b) job-related errors such as the failure of another process in + * the job or in any connected job, impending failure of hardware + * within the job's usage footprint, etc. + * + * (c) system notifications that are made available by the local + * administrators + * + * By default, only errors that directly affect the process and/or + * any process to which it is connected (via the PMIx_Connect call) + * will be reported. Options to modify that behavior can be provided + * in the info array + * + * Both the client application and the resource manager can register + * err handlers for specific errors. PMIx client/server calls the registered + * err handler upon receiving error notify notification (via PMIx_Notify_error) + * from the other end (Resource Manager/Client application). + * + * Multiple err handlers can be registered for different errors. PMIX returns + * an integer reference to each register handler in the callback fn. The caller + * must retain the reference in order to deregister the errhandler. + * Modification of the notification behavior can be accomplished by + * deregistering the current errhandler, and then registering it + * using a new set of info values. + * + * See pmix_types.h for a description of the notification function */ +typedef void (*opal_pmix_base_module_register_fn_t)(opal_list_t *info, + opal_pmix_notification_fn_t errhandler, + opal_pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata); + +/* deregister the errhandler + * errhandler_ref is the reference returned by PMIx for the errhandler + * to pmix_errhandler_reg_cbfunc_t */ +typedef void (*opal_pmix_base_module_deregister_fn_t)(int errhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); -/* deregister the errhandler */ -typedef void (*opal_pmix_base_module_deregister_fn_t)(void); +/* Report an error to a process for notification via any + * registered errhandler. The errhandler registration can be + * called by both the server and the client application. On the + * server side, the errhandler is used to report errors detected + * by PMIx to the host server for handling. On the client side, + * the errhandler is used to notify the process of errors + * reported by the server - e.g., the failure of another process. + * + * This function allows the host server to direct the server + * convenience library to notify all indicated local procs of + * an error. The error can be local, or anywhere in the cluster. + * The status indicates the error being reported. + * + * The client application can also call this function to notify the + * resource manager of an error it encountered. It can request the host + * server to notify the indicated processes about the error. + * + * The first array of procs informs the server library as to which + * processes should be alerted - e.g., the processes that are in + * a directly-affected job or are connected to one that is affected. + * Passing a NULL for this array will indicate that all local procs + * are to be notified. + * + * The second array identifies the processes that will be impacted + * by the error. This could consist of a single process, or a number + * of processes. + * + * The info array contains any further info the RM can and/or chooses + * to provide. + * + * The callback function will be called upon completion of the + * notify_error function's actions. Note that any messages will + * have been queued, but may not have been transmitted by this + * time. Note that the caller is required to maintain the input + * data until the callback function has been executed! +*/ +typedef int (*opal_pmix_base_module_notify_error_fn_t)(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); /* store data internally, but don't push it out to be shared - this is * intended solely for storage of info on other procs that comes thru @@ -720,47 +810,50 @@ typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, co */ typedef struct { /* client APIs */ - opal_pmix_base_module_init_fn_t init; - opal_pmix_base_module_fini_fn_t finalize; - opal_pmix_base_module_initialized_fn_t initialized; - opal_pmix_base_module_abort_fn_t abort; - opal_pmix_base_module_commit_fn_t commit; - opal_pmix_base_module_fence_fn_t fence; - opal_pmix_base_module_fence_nb_fn_t fence_nb; - opal_pmix_base_module_put_fn_t put; - opal_pmix_base_module_get_fn_t get; - opal_pmix_base_module_get_nb_fn_t get_nb; - opal_pmix_base_module_publish_fn_t publish; - opal_pmix_base_module_publish_nb_fn_t publish_nb; - opal_pmix_base_module_lookup_fn_t lookup; - opal_pmix_base_module_lookup_nb_fn_t lookup_nb; - opal_pmix_base_module_unpublish_fn_t unpublish; - opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; - opal_pmix_base_module_spawn_fn_t spawn; - opal_pmix_base_module_spawn_nb_fn_t spawn_nb; - opal_pmix_base_module_connect_fn_t connect; - opal_pmix_base_module_connect_nb_fn_t connect_nb; - opal_pmix_base_module_disconnect_fn_t disconnect; - opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; - opal_pmix_base_module_resolve_peers_fn_t resolve_peers; - opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; + opal_pmix_base_module_init_fn_t init; + opal_pmix_base_module_fini_fn_t finalize; + opal_pmix_base_module_initialized_fn_t initialized; + opal_pmix_base_module_abort_fn_t abort; + opal_pmix_base_module_commit_fn_t commit; + opal_pmix_base_module_fence_fn_t fence; + opal_pmix_base_module_fence_nb_fn_t fence_nb; + opal_pmix_base_module_put_fn_t put; + opal_pmix_base_module_get_fn_t get; + opal_pmix_base_module_get_nb_fn_t get_nb; + opal_pmix_base_module_publish_fn_t publish; + opal_pmix_base_module_publish_nb_fn_t publish_nb; + opal_pmix_base_module_lookup_fn_t lookup; + opal_pmix_base_module_lookup_nb_fn_t lookup_nb; + opal_pmix_base_module_unpublish_fn_t unpublish; + opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; + opal_pmix_base_module_spawn_fn_t spawn; + opal_pmix_base_module_spawn_nb_fn_t spawn_nb; + opal_pmix_base_module_connect_fn_t connect; + opal_pmix_base_module_connect_nb_fn_t connect_nb; + opal_pmix_base_module_disconnect_fn_t disconnect; + opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; + opal_pmix_base_module_resolve_peers_fn_t resolve_peers; + opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; /* server APIs */ - opal_pmix_base_module_server_init_fn_t server_init; - opal_pmix_base_module_server_finalize_fn_t server_finalize; - opal_pmix_base_module_generate_regex_fn_t generate_regex; - opal_pmix_base_module_generate_ppn_fn_t generate_ppn; - opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; - opal_pmix_base_module_server_register_client_fn_t server_register_client; - opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; - opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; - opal_pmix_base_module_server_notify_error_fn_t server_notify_error; + opal_pmix_base_module_server_init_fn_t server_init; + opal_pmix_base_module_server_finalize_fn_t server_finalize; + opal_pmix_base_module_generate_regex_fn_t generate_regex; + opal_pmix_base_module_generate_ppn_fn_t generate_ppn; + opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; + opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace; + opal_pmix_base_module_server_register_client_fn_t server_register_client; + opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client; + opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; + opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; + opal_pmix_base_module_server_notify_error_fn_t server_notify_error; /* Utility APIs */ - opal_pmix_base_module_get_version_fn_t get_version; - opal_pmix_base_module_register_fn_t register_errhandler; - opal_pmix_base_module_deregister_fn_t deregister_errhandler; - opal_pmix_base_module_store_fn_t store_local; - opal_pmix_base_module_get_nspace_fn_t get_nspace; - opal_pmix_base_module_register_jobid_fn_t register_jobid; + opal_pmix_base_module_get_version_fn_t get_version; + opal_pmix_base_module_register_fn_t register_errhandler; + opal_pmix_base_module_deregister_fn_t deregister_errhandler; + opal_pmix_base_module_notify_error_fn_t notify_error; + opal_pmix_base_module_store_fn_t store_local; + opal_pmix_base_module_get_nspace_fn_t get_nspace; + opal_pmix_base_module_register_jobid_fn_t register_jobid; } opal_pmix_base_module_t; typedef struct { diff --git a/opal/mca/pmix/pmix112/Makefile.am b/opal/mca/pmix/pmix112/Makefile.am new file mode 100644 index 00000000000..a1e8ca037f6 --- /dev/null +++ b/opal/mca/pmix/pmix112/Makefile.am @@ -0,0 +1,53 @@ +# +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = autogen.subdirs + +SUBDIRS = pmix + +sources = \ + pmix1.h \ + pmix_pmix1_component.c \ + pmix_pmix1.c \ + pmix1_client.c \ + pmix1_server_south.c \ + pmix1_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_pmix112_DSO +component_noinst = +component_install = mca_pmix_pmix112.la +else +component_noinst = libmca_pmix_pmix112.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_pmix112_la_SOURCES = $(sources) +mca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS) +mca_pmix_pmix112_la_CPPFLAGS = \ + -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS) +mca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS) +mca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS) +mca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_pmix112_la_SOURCES =$(sources) +libmca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS) +libmca_pmix_pmix112_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS) +libmca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS) +libmca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS) +libmca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD) diff --git a/opal/mca/pmix/pmix1xx/autogen.subdirs b/opal/mca/pmix/pmix112/autogen.subdirs similarity index 100% rename from opal/mca/pmix/pmix1xx/autogen.subdirs rename to opal/mca/pmix/pmix112/autogen.subdirs diff --git a/opal/mca/pmix/pmix112/configure.m4 b/opal/mca/pmix/pmix112/configure.m4 new file mode 100644 index 00000000000..1d920b30d62 --- /dev/null +++ b/opal/mca/pmix/pmix112/configure.m4 @@ -0,0 +1,75 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_pmix112_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_pmix112_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/pmix112/Makefile]) + + OPAL_VAR_SCOPE_PUSH([PMIX_VERSION opal_pmix_pmix112_save_CPPFLAGS opal_pmix_pmix112_save_LDFLAGS opal_pmix_pmix112_save_LIBS opal_pmix_pmix112_basedir opal_pmix_pmix112_save_cflags]) + + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AC_MSG_WARN([using an external pmix; disqualifiying this component]) + opal_pmix_pmix112_happy=0], + [PMIX_VERSION= + opal_pmix_pmix112_basedir=opal/mca/pmix/pmix112 + + opal_pmix_pmix112_save_CFLAGS=$CFLAGS + opal_pmix_pmix112_save_CPPFLAGS=$CPPFLAGS + opal_pmix_pmix112_save_LDFLAGS=$LDFLAGS + opal_pmix_pmix112_save_LIBS=$LIBS + + opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + AS_IF([test "$enable_debug" = "yes"], + [opal_pmix_pmix112_args="--enable-debug $opal_pmix_pmix112_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], + [opal_pmix_pmix112_args="--disable-debug $opal_pmix_pmix112_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) + CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" + + OPAL_CONFIG_SUBDIR([$opal_pmix_pmix112_basedir/pmix], + [$opal_pmix_pmix112_args $opal_subdir_args 'CFLAGS=$CFLAGS' 'CPPFLAGS=$CPPFLAGS'], + [opal_pmix_pmix112_happy=1], [opal_pmix_pmix112_happy=0]) + + AS_IF([test $opal_pmix_pmix112_happy -eq 1], + [PMIX_VERSION="internal v`$srcdir/$opal_pmix_pmix112_basedir/pmix/config/pmix_get_version.sh $srcdir/$opal_pmix_pmix112_basedir/pmix/VERSION`" + # Build flags for our Makefile.am + opal_pmix_pmix112_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_pmix_pmix112_basedir"'/pmix/libpmix.la' + opal_pmix_pmix112_CPPFLAGS='-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/include/pmix -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/include -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix -I$(OPAL_TOP_SRCDIR)/opal/mca/pmix/pmix112/pmix' + AC_SUBST([opal_pmix_pmix112_LIBS]) + AC_SUBST([opal_pmix_pmix112_CPPFLAGS])]) + + CFLAGS=$opal_pmix_pmix112_save_CFLAGS + CPPFLAGS=$opal_pmix_pmix112_save_CPPFLAGS + LDFLAGS=$opal_pmix_pmix112_save_LDFLAGS + LIBS=$opal_pmix_pmix112_save_LIBS + ]) + + AS_IF([test $opal_pmix_pmix112_happy -eq 1], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/INSTALL b/opal/mca/pmix/pmix112/pmix/INSTALL similarity index 89% rename from opal/mca/pmix/pmix1xx/pmix/INSTALL rename to opal/mca/pmix/pmix112/pmix/INSTALL index 74db17e5ff7..005301463ff 100644 --- a/opal/mca/pmix/pmix1xx/pmix/INSTALL +++ b/opal/mca/pmix/pmix112/pmix/INSTALL @@ -4,12 +4,12 @@ Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana Copyright (c) 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. -Copyright (c) 2013 Intel, Inc. All rights reserved. +Copyright (c) 2013-2015 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing the PMIx library. Much more information is available on the PMIx web site (e.g., see the FAQ section): - http://www.open-mpi.org/projects/pmix + http://pmix.github.io/pmix/master Developer Builds @@ -34,10 +34,10 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked out from Git), you should read the HACKING file before attempting to build PMIx. You must then run: -shell$ ./autogen.pl +shell$ ./autogen.sh You will need very recent versions of GNU Autoconf, Automake, and -Libtool. If autogen.pl fails, read the HACKING file. If anything +Libtool. If autogen.sh fails, read the HACKING file. If anything else fails, read the HACKING file. Finally, we suggest reading the HACKING file. @@ -61,8 +61,8 @@ all" as a user with write permissions in the build tree, and a separate "make install" as a user with write permissions to the install tree. -Compiling support for various networks or other specific hardware may -require additional command ling flags when running configure. See the +Compiling support for specific compilers and environments may +require additional command line flags when running configure. See the README file for more details. Note that VPATH builds are fully supported. For example: diff --git a/opal/mca/pmix/pmix1xx/pmix/LICENSE b/opal/mca/pmix/pmix112/pmix/LICENSE similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/LICENSE rename to opal/mca/pmix/pmix112/pmix/LICENSE diff --git a/opal/mca/pmix/pmix1xx/pmix/Makefile.am b/opal/mca/pmix/pmix112/pmix/Makefile.am similarity index 80% rename from opal/mca/pmix/pmix1xx/pmix/Makefile.am rename to opal/mca/pmix/pmix112/pmix/Makefile.am index 3cc7e270c86..bd678a59df0 100644 --- a/opal/mca/pmix/pmix1xx/pmix/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/Makefile.am @@ -37,6 +37,11 @@ endif man_MANS = \ man/man3/pmix_init.3 \ + man/man3/pmix_finalize.3 \ + man/man3/pmix_initialized.3 \ + man/man3/pmix_abort.3 \ + man/man3/pmix_put.3 \ + man/man3/pmix_commit.3 \ man/man7/pmix.7 \ man/man7/pmix_constants.7 @@ -52,13 +57,21 @@ include src/server/Makefile.am include src/sec/Makefile.am include src/common/Makefile.am +if PMIX_EMBEDDED_MODE +noinst_LTLIBRARIES = libpmix.la +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = +else lib_LTLIBRARIES = libpmix.la - libpmix_la_SOURCES = $(headers) $(sources) libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) +endif + if ! PMIX_EMBEDDED_MODE -SUBDIRS = . test examples +SUBDIRS = . test +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) endif nroff: @@ -69,10 +82,13 @@ nroff: EXTRA_DIST += README INSTALL VERSION LICENSE autogen.sh \ config/pmix_get_version.sh $(man_MANS) \ + contrib/platform/optimized \ test/test_common.h test/cli_stages.h \ test/server_callbacks.h test/test_fence.h \ test/test_publish.h test/test_resolve_peers.h \ - test/test_spawn.h test/utils.h test/test_cd.h + test/test_spawn.h test/utils.h test/test_cd.h \ + examples/client.c examples/dmodex.c examples/dynamic.c \ + examples/fault.c examples/pub.c dist-hook: diff --git a/opal/mca/pmix/pmix112/pmix/NEWS b/opal/mca/pmix/pmix112/pmix/NEWS new file mode 100644 index 00000000000..2016a2073e2 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/NEWS @@ -0,0 +1,61 @@ +Copyright (c) 2015 Intel, Inc. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This file contains the main features as well as overviews of specific +bug fixes (and other actions) for each version of PMIx since +version 1.0. + +As more fully described in the "Software Version Number" section in +the README file, PMIx typically maintains two separate version +series simultaneously - the current release and one that is locked +to only bug fixes. Since these series are semi-independent of each +other, a single NEWS-worthy item might apply to different series. For +example, a bug might be fixed in the master, and then moved to the +current release as well as the "stable" bug fix release branch. + + +Master (not on release branches yet) +------------------------------------ + + +1.1.2 +----- +- Provide a check for hwloc support - if not found, then + don't pass any topology info down to the client as it + won't know how to unpack it anyway. +- Fix a few places where thread safety wasn't provided +- Fix several issues identified by Paul Hargrove: + * PMIx_Init(NULL) is supported + * Incomplete PMIx_constants man page had some lingering cruft + * Missing prototype for pmix_value_load +- Fix race condition in PMIx_Get/PMIx_Get_nb +- Fix double-free error in pmix_server_commit. +- Fix PMIX_LOAD_BUFFER to be safe. + + +1.1.1 +----- +- Fix an issue where the example and test programs + were incorrectly being installed. Thanks to Orion + Poplawski for reporting it + + +1.1.0 +----- +- major update of APIs to reflect comments received from 1.0.0 + non-production release +- fixed thread-safety issues +- fixed a range of pack/unpack issues +- added unit tests for all APIs + + +1.0.0 +------ +Initial public release of draft APIs for comment - not production +intended diff --git a/opal/mca/pmix/pmix1xx/pmix/README b/opal/mca/pmix/pmix112/pmix/README similarity index 65% rename from opal/mca/pmix/pmix1xx/pmix/README rename to opal/mca/pmix/pmix112/pmix/README index ef92c4d141d..55b7c61f5e3 100644 --- a/opal/mca/pmix/pmix1xx/pmix/README +++ b/opal/mca/pmix/pmix112/pmix/README @@ -15,7 +15,7 @@ Copyright (c) 2007 Myricom, Inc. All rights reserved. Copyright (c) 2008 IBM Corporation. All rights reserved. Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. -Copyright (c) 2013 Intel, Inc. All rights reserved +Copyright (c) 2013-2015 Intel, Inc. All rights reserved $COPYRIGHT$ Additional copyrights may follow @@ -28,64 +28,51 @@ When submitting questions and problems, be sure to include as much extra information as possible. This web page details all the information that we request in order to provide assistance: - http://www.open-mpi.org/community/help/ + http://pmix.github.io/master/community/help/ The best way to report bugs, send comments, or ask questions is to -sign up on the user's and/or developer's mailing list (for user-level -and developer-level questions; when in doubt, send to the user's -list): +sign up on the PMIx mailing list, which is hosted by GoogleGroups: - pmix-users@open-mpi.org - pmix-devel@open-mpi.org + pmix@googlegroups.com -Because of spam, only subscribers are allowed to post to these lists +Because of spam, only subscribers are allowed to post to this list (ensure that you subscribe with and post from exactly the same e-mail address -- joe@example.com is considered different than -joe@mycomputer.example.com!). Visit these pages to subscribe to the -lists: +joe@mycomputer.example.com!). You can subscribe to the list here: - http://www.open-mpi.org/mailman/listinfo.cgi/pmix-users - http://www.open-mpi.org/mailman/listinfo.cgi/pmix-devel + https://groups.google.com/d/forum/pmix Thanks for your time. =========================================================================== -Much, much more information is also available in the PMIx FAQ: +More information is available in the PMIx FAQ: - http://www.open-mpi.org/faq/ + http://pmix.github.io/master/faq/ + +We are in early days, so please be patient - info will grow as questions +are addressed. =========================================================================== The following abbreviated list of release notes applies to this code -base as of this writing (11 November 2013): +base as of this writing (12 November 2015): General notes ------------- - The majority of PMIx's documentation is here in this file, the included man pages, and on the web site FAQ - (http://www.open-mpi.org/projects/pmix). This will eventually be supplemented - with cohesive installation and user documentation files. - -- Note that PMIx documentation uses the word "component" - frequently; the word "plugin" is probably more familiar to most - users. As such, end users can probably completely substitute the - word "plugin" wherever you see "component" in our documentation. - For what it's worth, we use the word "component" for historical - reasons, mainly because it is part of our acronyms and internal API - functionc calls. + (http://pmix.github.io/master/faq). This will eventually be + supplemented with cohesive installation and user documentation files. - Systems that have been tested are: - Linux (various flavors/distros), 32 bit, with gcc - - Linux (various flavors/distros), 64 bit (x86), with gcc, Absoft, - Intel, and Portland (*) - - OS X (10.5, 10.6, 10.7), 32 and 64 bit (x86_64), with gcc and - Absoft compilers (*) + - Linux (various flavors/distros), 64 bit (x86), with gcc, Intel, + and Portland (*) + - OS X (10.7 and above), 32 and 64 bit (x86_64), with gcc (*) - (*) Be sure to read the Compiler Notes, below. - -Compiler Notes +(*) Compiler Notes -------------- - The Portland Group compilers prior to version 7.0 require the @@ -130,79 +117,32 @@ INSTALLATION OPTIONS files in /include, its libraries in /lib, etc. --disable-shared - By default, libmpi is built as a shared library, and all components - are built as dynamic shared objects (DSOs). This switch disables + By default, libpmix is built as a shared library. This switch disables this default; it is really only useful when used with --enable-static. Specifically, this option does *not* imply --enable-static; enabling static libraries and disabling shared libraries are two independent options. --enable-static - Build libmpi as a static library, and statically link in all - components. Note that this option does *not* imply + Build libpmix as a static library. Note that this option does *not* imply --disable-shared; enabling static libraries and disabling shared libraries are two independent options. ---enable-dlopen - Build all of PMIx's components as standalone Dynamic Shared - Objects (DSO's) that are loaded at run-time. The opposite of this - option, --disable-dlopen, causes two things: - - 1. All of PMIx's components will be built as part of PMIx's - normal libraries (e.g., libmpi). - 2. PMIx will not attempt to open any DSO's at run-time. - - Note that this option does *not* imply that PMIx's libraries will be - built as static objects (e.g., libmpi.a). It only specifies the - location of PMIx's components: standalone DSOs or folded into the - PMIx libraries. You can control whether PMIx's libraries - are build as static or dynamic via --enable|disable-static and - --enable|disable-shared. - --with-platform=FILE Load configure options for the build from FILE. Options on the command line that are not in FILE are also used. Options on the command line and in FILE are replaced by what is in FILE. -MISCELLANEOUS SUPPORT LIBRARIES - ---with-libltdl[=VALUE] - This option specifies where to find the GNU Libtool libltdl support - library. The following VALUEs are permitted: - - internal: Use PMIx's internal copy of libltdl. - external: Use an external libltdl installation (rely on default - compiler and linker paths to find it) - : Same as "internal". - : Specify the location of a specific libltdl - installation to use - - By default (or if --with-libltdl is specified with no VALUE), PMIx - will build and use the copy of libltdl that it has in its source - tree. However, if the VALUE is "external", PMIx will look for - the relevant libltdl header file and library in default compiler / - linker locations. Or, VALUE can be a directory tree where the - libltdl header file and library can be found. This option allows - operating systems to include PMIx and use their default libltdl - installation instead of PMIx's bundled libltdl. - - Note that this option is ignored if --disable-dlopen is specified. - ---with-threads=value - Since thread support is only partially tested, it is disabled by - default. To enable threading, use "--with-threads=posix". This is - most useful when combined with --enable-mpi-thread-multiple. - Once PMIx has been built and installed, it is safe to run "make clean" and/or remove the entire build tree. VPATH and parallel builds are fully supported. -Generally speaking, the only thing that users need to do to use Open -MPI /lib is in their LD_LIBRARY_PATH. Users may need to ensure to set -LD_LIBRARY_PATH in their shell setup files (e.g., .bashrc, .cshrc) -so that non-interactive rsh/ssh-based logins will be able to find the -PMIx library. +Generally speaking, the only thing that users need to do to use PMIx +is ensure that /lib is in their LD_LIBRARY_PATH. Users may +need to ensure to set LD_LIBRARY_PATH in their shell setup files (e.g., +.bashrc, .cshrc) so that non-interactive rsh/ssh-based logins will +be able to find the PMIx library. =========================================================================== @@ -236,22 +176,10 @@ major, minor, release, and an optional quantifier. functionality. The minor number is always included in the version number: - o Even minor release numbers are part of "super-stable" - release series (e.g., v1.4.0). Releases in super stable series - are well-tested, time-tested, and mature. Such releases are - recommended for production sites. Changes between subsequent - releases in super stable series are expected to be fairly small. - o Odd minor release numbers are part of "feature" release - series (e.g., 1.3.7). Releases in feature releases are - well-tested, but they are not necessarily time-tested or as - mature as super stable releases. Changes between subsequent - releases in feature series may be large. - * Release: The release number is the third integer in the version string (e.g., v1.2.3). Changes in the release number typically indicate a bug fix in the code base and/or end-user - functionality. If the release number is 0, it is omitted from the - version number (e.g., v1.2 has a release number of 0). + functionality. * Quantifier: PMIx version numbers sometimes have an arbitrary string affixed to the end of the version number. Common strings @@ -267,7 +195,7 @@ major, minor, release, and an optional quantifier. indicating the number of the release candidate (e.g., v1.2.3rc4 indicates the 4th release candidate of version 1.2.3). -Althought the major, minor, and release values (and optional +Although the major, minor, and release values (and optional quantifiers) are reported in PMIx nightly snapshot tarballs, the filenames of these snapshot tarballs follow a slightly different convention. @@ -339,14 +267,14 @@ Application Binary Interface (ABI) Compatibility PMIx provides forward ABI compatibility in all versions of a given feature release series and its corresponding -super stable series. For example, on a single platform, an MPI +super stable series. For example, on a single platform, an pmix application linked against PMIx v1.3.2 shared libraries can be updated to point to the shared libraries in any successive v1.3.x or v1.4 release and still work properly (e.g., via the LD_LIBRARY_PATH environment variable or other operating system mechanism). PMIx reserves the right to break ABI compatibility at new feature -release series. For example, the same MPI application from above +release series. For example, the same pmix application from above (linked against PMIx v1.3.2 shared libraries) will *not* work with PMIx v1.5 shared libraries. @@ -358,7 +286,7 @@ Common Questions Many common questions about building and using PMIx are answered on the FAQ: - http://www.open-mpi.org/faq/ + http://pmix.github.io/master/faq/ =========================================================================== @@ -372,24 +300,16 @@ When submitting questions and problems, be sure to include as much extra information as possible. This web page details all the information that we request in order to provide assistance: - http://www.open-mpi.org/community/help/ + http://pmix.github.io/master/community/help/ -User-level questions and comments should generally be sent to the -user's mailing list (users@open-mpi.org). Because of spam, only +Questions and comments should generally be sent to the PMIx mailing +list (pmix@googlegroups.com). Because of spam, only subscribers are allowed to post to this list (ensure that you subscribe with and post from *exactly* the same e-mail address -- joe@example.com is considered different than joe@mycomputer.example.com!). Visit this page to subscribe to the user's list: - http://www.open-mpi.org/mailman/listinfo.cgi/pmix-users - -Developer-level bug reports, questions, and comments should generally -be sent to the developer's mailing list (devel@open-mpi.org). Please -do not post the same question to both lists. As with the user's list, -only subscribers are allowed to post to the developer's list. Visit -the following web page to subscribe: - - http://www.open-mpi.org/mailman/listinfo.cgi/pmix-devel + https://groups.google.com/d/forum/pmix Make today an PMIx day! diff --git a/opal/mca/pmix/pmix112/pmix/VERSION b/opal/mca/pmix/pmix112/pmix/VERSION new file mode 100644 index 00000000000..d9879fc96ea --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/VERSION @@ -0,0 +1,78 @@ +# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2013 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + +# This is the VERSION file for PMIx, describing the precise +# version of PMIx in this distribution. The various components of +# the version number below are combined to form a single version +# number string. + +# major, minor, and release are generally combined in the form +# ... + +major=1 +minor=1 +release=2 + +# greek is used for alpha or beta release tags. If it is non-empty, +# it will be appended to the version number. It does not have to be +# numeric. Common examples include a1 (alpha release 1), b1 or (beta release 1). +# The only requirement is that it must be entirely printable ASCII +# characters and have no white space. + +greek= + +# If repo_rev is empty, then the repository version number will be +# obtained during "make dist" via the "git describe --tags --always" +# command, or with the date (if "git describe" fails) in the form of +# "date". + +repo_rev=git17ae5a4 + +# If tarball_version is not empty, it is used as the version string in +# the tarball filename, regardless of all other versions listed in +# this file. For example, if tarball_version is empty, the tarball +# filename will be of the form +# openmpi-...tar.*. However, if +# tarball_version is not empty, the tarball filename will be of the +# form openmpi-.tar.*. + +tarball_version= + +# The date when this release was created + +date="Dec 12, 2015" + +# The shared library version of each of PMIx's public libraries. +# These versions are maintained in accordance with the "Library +# Interface Versions" chapter from the GNU Libtool documentation: +# +# - If the library source code has changed at all since the last +# update, then increment revision (`c:r:a' becomes `c:r+1:a'). +# +# - If any interfaces have been added, removed, or changed since +# the last update, increment current, and set revision to 0. +# +# - If any interfaces have been added since the last public release, +# then increment age. +# +# - If any interfaces have been removed since the last public release, +# then set age to 0. +# +# All changes in these version numbers are dictated by the PMIx +# release managers (not individual developers). Notes: + +# 1. Since these version numbers are associated with *releases*, the +# version numbers maintained on the PMIx Github trunk (and developer +# branches) is always 0:0:0 for all libraries. + +# 2. The version number of libpmix refers to the public pmix interfaces. +# It does not refer to any internal interfaces. + +# Version numbers are described in the Libtool current:revision:age +# format. + +libpmix_so_version=2:1:0 diff --git a/opal/mca/pmix/pmix1xx/pmix/autogen.sh b/opal/mca/pmix/pmix112/pmix/autogen.sh similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/autogen.sh rename to opal/mca/pmix/pmix112/pmix/autogen.sh diff --git a/opal/mca/pmix/pmix1xx/pmix/config/Makefile.am b/opal/mca/pmix/pmix112/pmix/config/Makefile.am similarity index 90% rename from opal/mca/pmix/pmix1xx/pmix/config/Makefile.am rename to opal/mca/pmix/pmix112/pmix/config/Makefile.am index 7c81a547406..e78b92de62a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/config/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/config/Makefile.am @@ -1,5 +1,7 @@ # PMIx copyrights: # Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # ######################### # @@ -31,6 +33,7 @@ EXTRA_DIST += \ config/pmix_check_attributes.m4 \ config/pmix_check_broken_qsort.m4 \ config/pmix_check_compiler_version.m4 \ + config/pmix_check_icc.m4 \ config/pmix_check_ident.m4 \ config/pmix_check_munge.m4 \ config/pmix_check_package.m4 \ diff --git a/opal/mca/pmix/pmix1xx/pmix/config/c_get_alignment.m4 b/opal/mca/pmix/pmix112/pmix/config/c_get_alignment.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/c_get_alignment.m4 rename to opal/mca/pmix/pmix112/pmix/config/c_get_alignment.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/distscript.sh b/opal/mca/pmix/pmix112/pmix/config/distscript.sh similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/distscript.sh rename to opal/mca/pmix/pmix112/pmix/config/distscript.sh diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix.m4 index b415b718705..4c2e757f4b0 100644 --- a/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 @@ -105,7 +105,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # replaced, not the entire file. AC_CONFIG_HEADERS(pmix_config_prefix[include/private/autogen/config.h]) AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/autogen/config.h]) - AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/pmix_common.h]) # What prefix are we using? AC_MSG_CHECKING([for pmix symbol prefix]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_attributes.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_attributes.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_attributes.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_attributes.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_broken_qsort.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_broken_qsort.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_broken_qsort.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_broken_qsort.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_compiler_version.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_icc.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_icc.m4 new file mode 100644 index 00000000000..e8a06b25148 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_icc.m4 @@ -0,0 +1,62 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CHECK_ICC_VARARGS],[ +dnl +dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since +dnl va_start was miscompiled... +dnl +AC_MSG_CHECKING([whether icc-8.1 for EM64T works with variable arguments]) +AC_TRY_RUN([ +#include +#include +#include + +void func (int c, char * f, ...) +{ + va_list arglist; + va_start (arglist, f); + /* vprintf (f, arglist); */ + va_end (arglist); +} + +int main () +{ + FILE *f; + func (4711, "Help %d [%s]\n", 10, "ten"); + f=fopen ("conftestval", "w"); + if (!f) exit (1); + return 0; +} + +],[pmix_ac_icc_varargs=`test -f conftestval`],[pmix_ac_icc_varargs=1],[pmix_ac_icc_varargs=1]) + +if test "$pmix_ac_icc_varargs" = "1"; then + AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** Your icc-8.1 compiler seems to miscompile va_start!]) + AC_MSG_WARN([*** Please upgrade compiler to at least version 8.1.027]) + AC_MSG_ERROR([*** Cannot continue.]) +fi + +AC_MSG_RESULT([yes]) + +rm -rf conftest*])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_ident.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_ident.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_ident.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_ident.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_munge.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_munge.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_munge.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_munge.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_package.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_package.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_package.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_package.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_sasl.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_sasl.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_sasl.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_sasl.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_vendor.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_vendor.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_vendor.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_vendor.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_visibility.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_visibility.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_check_visibility.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_check_visibility.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_ensure_contains_optflags.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_ensure_contains_optflags.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_ensure_contains_optflags.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_ensure_contains_optflags.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_functions.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_get_version.sh b/opal/mca/pmix/pmix112/pmix/config/pmix_get_version.sh similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_get_version.sh rename to opal/mca/pmix/pmix112/pmix/config/pmix_get_version.sh diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_load_platform.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_load_platform.m4 new file mode 100644 index 00000000000..53d4afbf7d0 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_load_platform.m4 @@ -0,0 +1,81 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +# PMIX_LOAD_PLATFORM() +# -------------------- +AC_DEFUN([PMIX_LOAD_PLATFORM], [ + + AC_ARG_WITH([platform], + [AC_HELP_STRING([--with-platform=FILE], + [Load options for build from FILE. Options on the + command line not in FILE are used. Options on the + command line and in FILE are replaced by what is + in FILE.])]) + + if test "$with_platform" = "yes" ; then + AC_MSG_ERROR([--with-platform argument must include FILE option]) + elif test "$with_platform" = "no" ; then + AC_MSG_ERROR([--without-platform is not a valid argument]) + elif test "$with_platform" != "" ; then + # if not an absolute path, check in contrib/platform + if test ! "`echo $with_platform | cut -c1`" = "/" && test ! "`echo $with_platform | cut -c2`" = ".." ; then + if test -r "${srcdir}/contrib/platform/$with_platform" ; then + with_platform="${srcdir}/contrib/platform/$with_platform" + fi + fi + + # make sure file exists + if test ! -r "$with_platform" ; then + AC_MSG_ERROR([platform file $with_platform not found]) + fi + + # eval into environment + PMIX_LOG_MSG([Loading environment file $with_platform, with contents below]) + PMIX_LOG_FILE([$with_platform]) + + # setup by getting full pathname for the platform directories + platform_base="`dirname $with_platform`" + platform_file="`basename $with_platform`" + # get full pathname of where we are so we can return + platform_savedir="`pwd`" + # go to where the platform file is located + cd "$platform_base" + # get the full path to this location + platform_file_dir=`pwd` + + . ./"$platform_file" + + # see if they left us a name + if test "$PMIX_PLATFORM_LOADED" != "" ; then + platform_loaded="$PMIX_PLATFORM_LOADED" + else + platform_loaded="$with_platform" + fi + echo "Loaded platform arguments for $platform_loaded" + PMIX_LOG_MSG([Loaded platform arguments for $platform_loaded]) + + # look for default mca param file + + # return to where we started + cd "$platform_savedir" + fi +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_search_libs.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_search_libs.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_search_libs.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_search_libs.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_cc.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_setup_cc.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_cc.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_setup_cc.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_hwloc.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_setup_hwloc.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_hwloc.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_setup_hwloc.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_libevent.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_setup_libevent.m4 similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_libevent.m4 rename to opal/mca/pmix/pmix112/pmix/config/pmix_setup_libevent.m4 diff --git a/opal/mca/pmix/pmix1xx/pmix/configure.ac b/opal/mca/pmix/pmix112/pmix/configure.ac similarity index 97% rename from opal/mca/pmix/pmix1xx/pmix/configure.ac rename to opal/mca/pmix/pmix112/pmix/configure.ac index dc86d4d8ca5..78e1cae0613 100644 --- a/opal/mca/pmix/pmix1xx/pmix/configure.ac +++ b/opal/mca/pmix/pmix112/pmix/configure.ac @@ -41,6 +41,10 @@ AC_CONFIG_AUX_DIR(./config) # -I in ACLOCAL_AMFLAGS in the top-level Makefile.am. AC_CONFIG_MACRO_DIR(./config) +# Get our platform support file. This has to be done very, very early +# because it twiddles random bits of autoconf +PMIX_LOAD_PLATFORM + # setup configure options (e.g., show_title and friends) PMIX_CONFIGURE_SETUP pmix_show_title "Configuring PMIx" @@ -203,8 +207,7 @@ AC_SUBST([CONFIGURE_DEPENDENCIES], ['$(top_srcdir)/VERSION']) AC_SUBST([libpmix_so_version]) AC_CONFIG_FILES(pmix_config_prefix[test/Makefile] - pmix_config_prefix[test/simple/Makefile] - pmix_config_prefix[examples/Makefile]) + pmix_config_prefix[test/simple/Makefile]) pmix_show_title "Configuration complete" diff --git a/opal/mca/pmix/pmix112/pmix/contrib/platform/optimized b/opal/mca/pmix/pmix112/pmix/contrib/platform/optimized new file mode 100644 index 00000000000..e2c6fd0d205 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/contrib/platform/optimized @@ -0,0 +1,3 @@ +enable_mem_debug=no +enable_mem_profile=no +enable_debug=no diff --git a/opal/mca/pmix/pmix1xx/pmix/contrib/pmix-valgrind.supp b/opal/mca/pmix/pmix112/pmix/contrib/pmix-valgrind.supp similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/contrib/pmix-valgrind.supp rename to opal/mca/pmix/pmix112/pmix/contrib/pmix-valgrind.supp diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/client.c b/opal/mca/pmix/pmix112/pmix/examples/client.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/examples/client.c rename to opal/mca/pmix/pmix112/pmix/examples/client.c index 7357167640a..de7bece3e70 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/client.c +++ b/opal/mca/pmix/pmix112/pmix/examples/client.c @@ -23,8 +23,6 @@ * */ -#include - #define _GNU_SOURCE #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/dmodex.c b/opal/mca/pmix/pmix112/pmix/examples/dmodex.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/examples/dmodex.c rename to opal/mca/pmix/pmix112/pmix/examples/dmodex.c index fd97909698f..677e375d099 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/dmodex.c +++ b/opal/mca/pmix/pmix112/pmix/examples/dmodex.c @@ -23,7 +23,6 @@ * */ -#include #include #define _GNU_SOURCE diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/dynamic.c b/opal/mca/pmix/pmix112/pmix/examples/dynamic.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/examples/dynamic.c rename to opal/mca/pmix/pmix112/pmix/examples/dynamic.c index e8ee0f3e485..39a900402af 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/dynamic.c +++ b/opal/mca/pmix/pmix112/pmix/examples/dynamic.c @@ -23,7 +23,6 @@ * */ -#include #include #define _GNU_SOURCE diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/fault.c b/opal/mca/pmix/pmix112/pmix/examples/fault.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/examples/fault.c rename to opal/mca/pmix/pmix112/pmix/examples/fault.c index 4735118366e..11bc611df5b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/fault.c +++ b/opal/mca/pmix/pmix112/pmix/examples/fault.c @@ -23,7 +23,6 @@ * */ -#include #include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/pub.c b/opal/mca/pmix/pmix112/pmix/examples/pub.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/examples/pub.c rename to opal/mca/pmix/pmix112/pmix/examples/pub.c index e369eb3e09f..482a363bbf2 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/pub.c +++ b/opal/mca/pmix/pmix112/pmix/examples/pub.c @@ -23,7 +23,6 @@ * */ -#include #include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/include/Makefile.am b/opal/mca/pmix/pmix112/pmix/include/Makefile.am similarity index 55% rename from opal/mca/pmix/pmix1xx/pmix/include/Makefile.am rename to opal/mca/pmix/pmix112/pmix/include/Makefile.am index 360a1a62a33..296a0ce6755 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/include/Makefile.am @@ -1,5 +1,7 @@ # # Copyright 2015 Intel, Inc. All rights reserved +# Copyright 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ # @@ -18,11 +20,18 @@ include_HEADERS = \ include/pmix_server.h \ include/pmi.h \ include/pmi2.h + +headers += \ + include/private/align.h \ + include/private/hash_string.h \ + include/private/pmix_socket_errno.h \ + include/private/pmix_stdint.h \ + include/private/prefetch.h \ + include/private/types.h include_pmixdir = $(includedir)/pmix include_pmix_HEADERS = \ - include/pmix/rename.h -nodist_include_pmix_HEADERS = \ - include/pmix/pmix_common.h + include/pmix/rename.h \ + include/pmix/pmix_common.h include_pmix_autogendir = $(includedir)/pmix/autogen include_pmix_autogen_HEADERS = \ @@ -30,13 +39,8 @@ include_pmix_autogen_HEADERS = \ include/pmix/autogen/pmix_config_bottom.h nodist_include_pmix_autogen_HEADERS = \ include/pmix/autogen/config.h - -noinst_HEADERS = \ - include/private/align.h \ - include/private/hash_string.h \ - include/private/pmix_socket_errno.h \ - include/private/pmix_stdint.h \ - include/private/prefetch.h \ - include/private/types.h +include_private_autogendir = $(includedir)/private/autogen +nodist_include_private_autogen_HEADERS = \ + include/private/autogen/config.h endif ! PMIX_EMBEDDED_MODE diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmi.h b/opal/mca/pmix/pmix112/pmix/include/pmi.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/pmi.h rename to opal/mca/pmix/pmix112/pmix/include/pmi.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmi2.h b/opal/mca/pmix/pmix112/pmix/include/pmi2.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/pmi2.h rename to opal/mca/pmix/pmix112/pmix/include/pmi2.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix.h b/opal/mca/pmix/pmix112/pmix/include/pmix.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix.h rename to opal/mca/pmix/pmix112/pmix/include/pmix.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in b/opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h.in similarity index 92% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in rename to opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h.in index a68887ff2cc..b6f5637640d 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in +++ b/opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h.in @@ -69,7 +69,7 @@ /* Maybe before gcc 2.95 too */ #ifdef PMIX_HAVE_ATTRIBUTE_UNUSED -#define __PMIX_HAVE_ATTRIBUTE_UNUSED PMIX_HAVE_ATTRIBUTE_UNUSED +#define __PMIX_HAVE_ATTRIBUTE_UNUSED PMIX_HAVE_ATTRIBUTE_UNUSED #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95) #else @@ -82,7 +82,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_MALLOC -#define __PMIX_HAVE_ATTRIBUTE_MALLOC PMIX_HAVE_ATTRIBUTE_MALLOC +#define __PMIX_HAVE_ATTRIBUTE_MALLOC PMIX_HAVE_ATTRIBUTE_MALLOC #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96) #else @@ -95,7 +95,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_CONST -#define __PMIX_HAVE_ATTRIBUTE_CONST PMIX_HAVE_ATTRIBUTE_CONST +#define __PMIX_HAVE_ATTRIBUTE_CONST PMIX_HAVE_ATTRIBUTE_CONST #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95) #else @@ -108,7 +108,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_PURE -#define __PMIX_HAVE_ATTRIBUTE_PURE PMIX_HAVE_ATTRIBUTE_PURE +#define __PMIX_HAVE_ATTRIBUTE_PURE PMIX_HAVE_ATTRIBUTE_PURE #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96) #else @@ -121,7 +121,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_DEPRECATED -#define __PMIX_HAVE_ATTRIBUTE_DEPRECATED PMIX_HAVE_ATTRIBUTE_DEPRECATED +#define __PMIX_HAVE_ATTRIBUTE_DEPRECATED PMIX_HAVE_ATTRIBUTE_DEPRECATED #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3) #else @@ -178,6 +178,12 @@ /* The pmix symbol prefix in all caps */ #undef PMIX_SYM_PREFIX_CAPS +/* ensure we have the version info available for external users */ +#undef PMIX_MAJOR_VERSION +#undef PMIX_MINOR_VERSION +#undef PMIX_RELEASE_VERSION + + #undef BEGIN_C_DECLS #undef END_C_DECLS #if defined(c_plusplus) || defined(__cplusplus) diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/pmix_config_bottom.h b/opal/mca/pmix/pmix112/pmix/include/pmix/autogen/pmix_config_bottom.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/pmix_config_bottom.h rename to opal/mca/pmix/pmix112/pmix/include/pmix/autogen/pmix_config_bottom.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/pmix_config_top.h b/opal/mca/pmix/pmix112/pmix/include/pmix/autogen/pmix_config_top.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/pmix_config_top.h rename to opal/mca/pmix/pmix112/pmix/include/pmix/autogen/pmix_config_top.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in b/opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h similarity index 94% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in rename to opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h index 0216e34aa24..407f4b4f930 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in +++ b/opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h @@ -1,3 +1,4 @@ +/* include/pmix/pmix_common.h. Generated from pmix_common.h.in by configure. */ /* * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * @@ -55,25 +56,6 @@ #include /* for struct timeval */ #endif -#ifndef PMIX_CONFIG_H - -/* ensure we have the version info available for external users */ -#undef PMIX_MAJOR_VERSION -#undef PMIX_MINOR_VERSION -#undef PMIX_RELEASE_VERSION - -#endif - -#undef BEGIN_C_DECLS -#undef END_C_DECLS -#if defined(c_plusplus) || defined(__cplusplus) -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else -#define BEGIN_C_DECLS /* empty */ -#define END_C_DECLS /* empty */ -#endif - BEGIN_C_DECLS /**** PMIX CONSTANTS ****/ @@ -183,7 +165,7 @@ BEGIN_C_DECLS /**** PMIX ERROR CONSTANTS ****/ /* PMIx errors are always negative, with 0 reserved for success */ -#define PMIX_ERROR_MIN -41 // set equal to number of non-zero entries in enum +#define PMIX_ERROR_MIN -42 // set equal to number of non-zero entries in enum typedef enum { PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER = PMIX_ERROR_MIN, @@ -230,6 +212,7 @@ typedef enum { PMIX_ERR_INVALID_CRED, PMIX_EXISTS, + PMIX_ERR_SILENT, PMIX_ERROR, PMIX_SUCCESS } pmix_status_t; @@ -417,8 +400,29 @@ typedef struct { /* release the memory in the value struct data field */ #define PMIX_VALUE_DESTRUCT(m) \ do { \ - if (PMIX_STRING == (m)->type && NULL != (m)->data.string) { \ - free((m)->data.string); \ + if (PMIX_STRING == (m)->type) { \ + if (NULL != (m)->data.string) { \ + free((m)->data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == (m)->type) { \ + if (NULL != (m)->data.bo.bytes) { \ + free((m)->data.bo.bytes); \ + } \ + } else if (PMIX_INFO_ARRAY == (m)->type) { \ + size_t _n; \ + pmix_info_t *_p = (pmix_info_t*)((m)->data.array.array); \ + for (_n=0; _n < (m)->data.array.size; _n++) { \ + if (PMIX_STRING == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.string) { \ + free(_p[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.bo.bytes) { \ + free(_p[_n].value.data.bo.bytes); \ + } \ + } \ + } \ + free(_p); \ } \ } while(0); @@ -433,6 +437,16 @@ typedef struct { } \ } while(0); +/* expose a function that is resolved in the + * PMIx library, but part of a header that + * includes internal functions - so we don't + * want to expose the entire header here + */ +extern void pmix_value_load(pmix_value_t *v, void *data, + pmix_data_type_t type); + + + /**** PMIX INFO STRUCT ****/ typedef struct { diff --git a/opal/mca/pmix/pmix112/pmix/include/pmix/rename.h b/opal/mca/pmix/pmix112/pmix/include/pmix/rename.h new file mode 100644 index 00000000000..7143865813d --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/include/pmix/rename.h @@ -0,0 +1,424 @@ +/* + * Copyright 2015 Intel, Inc. All rights reserved + */ + +#ifndef PMIX_RENAME_H +#define PMIX_RENAME_H + +#include + + +BEGIN_C_DECLS + + +/* Only enact these defines if we're actually renaming the symbols + (i.e., avoid trying to have no-op defines if we're *not* + renaming). + + Note that we don't symbol shift if we aren't in embedded mode + as that wouldn't make any sense. If we are in embedded mode, + then we aren't building the PMI-1/2 APIs as that also doesn't + make any sense +*/ + +#if PMIX_SYM_TRANSFORM + +/* Use a preprocessor two-step in order to get the prefixing right. + Make 2 macros: PMIX_NAME and PMIX_NAME_CAPS for renaming + things. */ + +#define PMIX_MUNGE_NAME(a, b) PMIX_MUNGE_NAME2(a, b) +#define PMIX_MUNGE_NAME2(a, b) a ## b +#define PMIX_NAME(name) PMIX_MUNGE_NAME(PMIX_SYM_PREFIX, pmix_ ## name) +#define PMIX_NAME_CAPS(name) PMIX_MUNGE_NAME(PMIX_SYM_PREFIX_CAPS, PMIx_ ## name) + +/* Now define all the "real" names to be the prefixed names. This + allows us to use the real names throughout the code base (i.e., + "pmix_"); the preprocessor will adjust to have the prefixed + name under the covers. */ + +/* PMIx APIs */ +#define PMI2_Abort PMIX_NAME_CAPS(PMI2_Abort) +#define PMI2_Finalize PMIX_NAME_CAPS(PMI2_Finalize) +#define PMI2_Info_GetJobAttr PMIX_NAME_CAPS(PMI2_Info_GetJobAttr) +#define PMI2_Info_GetJobAttrIntArray PMIX_NAME_CAPS(PMI2_Info_GetJobAttrIntArray) +#define PMI2_Info_GetNodeAttr PMIX_NAME_CAPS(PMI2_Info_GetNodeAttr) +#define PMI2_Info_GetSize PMIX_NAME_CAPS(PMI2_Info_GetSize) +#define PMI2_Info_PutNodeAttr PMIX_NAME_CAPS(PMI2_Info_PutNodeAttr) +#define PMI2_Init PMIX_NAME_CAPS(PMI2_Init) +#define PMI2_Initialized PMIX_NAME_CAPS(PMI2_Initialized) +#define PMI2_Job_Connect PMIX_NAME_CAPS(PMI2_Job_Connect) +#define PMI2_Job_Disconnect PMIX_NAME_CAPS(PMI2_Job_Disconnect) +#define PMI2_Job_GetId PMIX_NAME_CAPS(PMI2_Job_GetId) +#define PMI2_Job_GetRank PMIX_NAME_CAPS(PMI2_Job_GetRank) +#define PMI2_Job_Spawn PMIX_NAME_CAPS(PMI2_Job_Spawn) +#define PMI2_KVS_Fence PMIX_NAME_CAPS(PMI2_KVS_Fence) +#define PMI2_KVS_Get PMIX_NAME_CAPS(PMI2_KVS_Get) +#define PMI2_KVS_Put PMIX_NAME_CAPS(PMI2_KVS_Put) +#define PMI2_Nameserv_lookup PMIX_NAME_CAPS(PMI2_Nameserv_lookup) +#define PMI2_Nameserv_publish PMIX_NAME_CAPS(PMI2_Nameserv_publish) +#define PMI2_Nameserv_unpublish PMIX_NAME_CAPS(PMI2_Nameserv_unpublish) +#define PMI_Abort PMIX_NAME_CAPS(PMI_Abort) +#define PMI_Args_to_keyval PMIX_NAME_CAPS(PMI_Args_to_keyval) +#define PMI_Barrier PMIX_NAME_CAPS(PMI_Barrier) +#define PMI_Finalize PMIX_NAME_CAPS(PMI_Finalize) +#define PMI_Free_keyvals PMIX_NAME_CAPS(PMI_Free_keyvals) +#define PMI_Get_appnum PMIX_NAME_CAPS(PMI_Get_appnum) +#define PMI_Get_clique_ranks PMIX_NAME_CAPS(PMI_Get_clique_ranks) +#define PMI_Get_clique_size PMIX_NAME_CAPS(PMI_Get_clique_size) +#define PMI_Get_id PMIX_NAME_CAPS(PMI_Get_id) +#define PMI_Get_id_length_max PMIX_NAME_CAPS(PMI_Get_id_length_max) +#define PMI_Get_kvs_domain_id PMIX_NAME_CAPS(PMI_Get_kvs_domain_id) +#define PMI_Get_options PMIX_NAME_CAPS(PMI_Get_options) +#define PMI_Get_rank PMIX_NAME_CAPS(PMI_Get_rank) +#define PMI_Get_size PMIX_NAME_CAPS(PMI_Get_size) +#define PMI_Get_universe_size PMIX_NAME_CAPS(PMI_Get_universe_size) +#define PMI_Init PMIX_NAME_CAPS(PMI_Init) +#define PMI_Initialized PMIX_NAME_CAPS(PMI_Initialized) +#define PMI_KVS_Commit PMIX_NAME_CAPS(PMI_KVS_Commit) +#define PMI_KVS_Create PMIX_NAME_CAPS(PMI_KVS_Create) +#define PMI_KVS_Destroy PMIX_NAME_CAPS(PMI_KVS_Destroy) +#define PMI_KVS_Get PMIX_NAME_CAPS(PMI_KVS_Get) +#define PMI_KVS_Get_key_length_max PMIX_NAME_CAPS(PMI_KVS_Get_key_length_max) +#define PMI_KVS_Get_my_name PMIX_NAME_CAPS(PMI_KVS_Get_my_name) +#define PMI_KVS_Get_name_length_max PMIX_NAME_CAPS(PMI_KVS_Get_name_length_max) +#define PMI_KVS_Get_value_length_max PMIX_NAME_CAPS(PMI_KVS_Get_value_length_max) +#define PMI_KVS_Iter_first PMIX_NAME_CAPS(PMI_KVS_Iter_first) +#define PMI_KVS_Iter_next PMIX_NAME_CAPS(PMI_KVS_Iter_next) +#define PMI_KVS_Put PMIX_NAME_CAPS(PMI_KVS_Put) +#define PMI_Lookup_name PMIX_NAME_CAPS(PMI_Lookup_name) +#define PMI_Parse_option PMIX_NAME_CAPS(PMI_Parse_option) +#define PMI_Publish_name PMIX_NAME_CAPS(PMI_Publish_name) +#define PMI_Spawn_multiple PMIX_NAME_CAPS(PMI_Spawn_multiple) +#define PMI_Unpublish_name PMIX_NAME_CAPS(PMI_Unpublish_name) +#define PMIx_Abort PMIX_NAME_CAPS(Abort) +#define PMIx_Commit PMIX_NAME_CAPS(Commit) +#define PMIx_Connect PMIX_NAME_CAPS(Connect) +#define PMIx_Connect_nb PMIX_NAME_CAPS(Connect_nb) +#define PMIx_Deregister_errhandler PMIX_NAME_CAPS(Deregister_errhandler) +#define PMIx_Disconnect PMIX_NAME_CAPS(Disconnect) +#define PMIx_Disconnect_nb PMIX_NAME_CAPS(Disconnect_nb) +#define PMIx_Error_string PMIX_NAME_CAPS(Error_string) +#define PMIx_Fence PMIX_NAME_CAPS(Fence) +#define PMIx_Fence_nb PMIX_NAME_CAPS(Fence_nb) +#define PMIx_Finalize PMIX_NAME_CAPS(Finalize) +#define PMIx_Get PMIX_NAME_CAPS(Get) +#define PMIx_Get_nb PMIX_NAME_CAPS(Get_nb) +#define PMIx_Get_version PMIX_NAME_CAPS(Get_version) +#define PMIx_Init PMIX_NAME_CAPS(Init) +#define PMIx_Initialized PMIX_NAME_CAPS(Initialized) +#define PMIx_Lookup PMIX_NAME_CAPS(Lookup) +#define PMIx_Lookup_nb PMIX_NAME_CAPS(Lookup_nb) +#define PMIx_Notify_error PMIX_NAME_CAPS(Notify_error) +#define PMIx_Publish PMIX_NAME_CAPS(Publish) +#define PMIx_Publish_nb PMIX_NAME_CAPS(Publish_nb) +#define PMIx_Put PMIX_NAME_CAPS(Put) +#define PMIx_Register_errhandler PMIX_NAME_CAPS(Register_errhandler) +#define PMIx_Resolve_nodes PMIX_NAME_CAPS(Resolve_nodes) +#define PMIx_Resolve_peers PMIX_NAME_CAPS(Resolve_peers) +#define PMIx_Spawn PMIX_NAME_CAPS(Spawn) +#define PMIx_Spawn_nb PMIX_NAME_CAPS(Spawn_nb) +#define PMIx_Store_internal PMIX_NAME_CAPS(Store_internal) +#define PMIx_Unpublish PMIX_NAME_CAPS(Unpublish) +#define PMIx_Unpublish_nb PMIX_NAME_CAPS(Unpublish_nb) +#define PMIx_generate_ppn PMIX_NAME_CAPS(generate_ppn) +#define PMIx_generate_regex PMIX_NAME_CAPS(generate_regex) +#define PMIx_server_deregister_client PMIX_NAME_CAPS(server_deregister_client) +#define PMIx_server_deregister_nspace PMIX_NAME_CAPS(server_deregister_nspace) +#define PMIx_server_dmodex_request PMIX_NAME_CAPS(server_dmodex_request) +#define PMIx_server_finalize PMIX_NAME_CAPS(server_finalize) +#define PMIx_server_init PMIX_NAME_CAPS(server_init) +#define PMIx_server_register_client PMIX_NAME_CAPS(server_register_client) +#define PMIx_server_register_nspace PMIX_NAME_CAPS(server_register_nspace) +#define PMIx_server_setup_fork PMIX_NAME_CAPS(server_setup_fork) + +/* internal functions */ +#define pmix_argv_append PMIX_NAME(argv_append) +#define pmix_argv_append_nosize PMIX_NAME(argv_append_nosize) +#define pmix_argv_append_unique_nosize PMIX_NAME(argv_append_unique_nosize) +#define pmix_argv_copy PMIX_NAME(argv_copy) +#define pmix_argv_count PMIX_NAME(argv_count) +#define pmix_argv_delete PMIX_NAME(argv_delete) +#define pmix_argv_free PMIX_NAME(argv_free) +#define pmix_argv_insert PMIX_NAME(argv_insert) +#define pmix_argv_insert_element PMIX_NAME(argv_insert_element) +#define pmix_argv_join PMIX_NAME(argv_join) +#define pmix_argv_join_range PMIX_NAME(argv_join_range) +#define pmix_argv_len PMIX_NAME(argv_len) +#define pmix_argv_prepend_nosize PMIX_NAME(argv_prepend_nosize) +#define pmix_argv_split PMIX_NAME(argv_split) +#define pmix_argv_split_with_empty PMIX_NAME(argv_split_with_empty) +#define pmix_asprintf PMIX_NAME(asprintf) +#define pmix_basename PMIX_NAME(basename) +#define pmix_bcopy_csum_partial PMIX_NAME(bcopy_csum_partial) +#define pmix_bcopy_uicrc_partial PMIX_NAME(bcopy_uicrc_partial) +#define pmix_bcopy_uicsum_partial PMIX_NAME(bcopy_uicsum_partial) +#define pmix_bfrop PMIX_NAME(bfrop) +#define pmix_bfrop_buffer_extend PMIX_NAME(bfrop_buffer_extend) +#define pmix_bfrop_close PMIX_NAME(bfrop_close) +#define pmix_bfrop_copy PMIX_NAME(bfrop_copy) +#define pmix_bfrop_copy_app PMIX_NAME(bfrop_copy_app) +#define pmix_bfrop_copy_array PMIX_NAME(bfrop_copy_array) +#define pmix_bfrop_copy_bo PMIX_NAME(bfrop_copy_bo) +#define pmix_bfrop_copy_buf PMIX_NAME(bfrop_copy_buf) +#define pmix_bfrop_copy_info PMIX_NAME(bfrop_copy_info) +#define pmix_bfrop_copy_kval PMIX_NAME(bfrop_copy_kval) +#define pmix_bfrop_copy_modex PMIX_NAME(bfrop_copy_modex) +#define pmix_bfrop_copy_payload PMIX_NAME(bfrop_copy_payload) +#define pmix_bfrop_copy_pdata PMIX_NAME(bfrop_copy_pdata) +#define pmix_bfrop_copy_persist PMIX_NAME(bfrop_copy_persist) +#define pmix_bfrop_copy_proc PMIX_NAME(bfrop_copy_proc) +#define pmix_bfrop_copy_string PMIX_NAME(bfrop_copy_string) +#define pmix_bfrop_copy_topo PMIX_NAME(bfrop_copy_topo) +#define pmix_bfrop_copy_value PMIX_NAME(bfrop_copy_value) +#define pmix_bfrop_get_data_type PMIX_NAME(bfrop_get_data_type) +#define pmix_bfrop_initial_size PMIX_NAME(pmix_bfrop_initial_size) +#define pmix_bfrop_initialized PMIX_NAME(bfrop_initialized) +#define pmix_bfrop_num_reg_types PMIX_NAME(pmix_bfrop_num_reg_types) +#define pmix_bfrop_open PMIX_NAME(bfrop_open) +#define pmix_bfrop_pack PMIX_NAME(bfrop_pack) +#define pmix_bfrop_pack_app PMIX_NAME(bfrop_pack_app) +#define pmix_bfrop_pack_array PMIX_NAME(bfrop_pack_array) +#define pmix_bfrop_pack_bo PMIX_NAME(bfrop_pack_bo) +#define pmix_bfrop_pack_bool PMIX_NAME(bfrop_pack_bool) +#define pmix_bfrop_pack_buf PMIX_NAME(bfrop_pack_buf) +#define pmix_bfrop_pack_buffer PMIX_NAME(bfrop_pack_buffer) +#define pmix_bfrop_pack_byte PMIX_NAME(bfrop_pack_byte) +#define pmix_bfrop_pack_datatype PMIX_NAME(bfrop_pack_datatype) +#define pmix_bfrop_pack_double PMIX_NAME(bfrop_pack_double) +#define pmix_bfrop_pack_float PMIX_NAME(bfrop_pack_float) +#define pmix_bfrop_pack_info PMIX_NAME(bfrop_pack_info) +#define pmix_bfrop_pack_int PMIX_NAME(bfrop_pack_int) +#define pmix_bfrop_pack_int16 PMIX_NAME(bfrop_pack_int16) +#define pmix_bfrop_pack_int32 PMIX_NAME(bfrop_pack_int32) +#define pmix_bfrop_pack_int64 PMIX_NAME(bfrop_pack_int64) +#define pmix_bfrop_pack_kval PMIX_NAME(bfrop_pack_kval) +#define pmix_bfrop_pack_modex PMIX_NAME(bfrop_pack_modex) +#define pmix_bfrop_pack_pdata PMIX_NAME(bfrop_pack_pdata) +#define pmix_bfrop_pack_persist PMIX_NAME(bfrop_pack_persist) +#define pmix_bfrop_pack_pid PMIX_NAME(bfrop_pack_pid) +#define pmix_bfrop_pack_proc PMIX_NAME(bfrop_pack_proc) +#define pmix_bfrop_pack_sizet PMIX_NAME(bfrop_pack_sizet) +#define pmix_bfrop_pack_string PMIX_NAME(bfrop_pack_string) +#define pmix_bfrop_pack_time PMIX_NAME(bfrop_pack_time) +#define pmix_bfrop_pack_timeval PMIX_NAME(bfrop_pack_timeval) +#define pmix_bfrop_pack_topo PMIX_NAME(bfrop_pack_topo) +#define pmix_bfrop_pack_value PMIX_NAME(bfrop_pack_value) +#define pmix_bfrop_print PMIX_NAME(bfrop_print) +#define pmix_bfrop_print_app PMIX_NAME(bfrop_print_app) +#define pmix_bfrop_print_array PMIX_NAME(bfrop_print_array) +#define pmix_bfrop_print_bo PMIX_NAME(bfrop_print_bo) +#define pmix_bfrop_print_bool PMIX_NAME(bfrop_print_bool) +#define pmix_bfrop_print_buf PMIX_NAME(bfrop_print_buf) +#define pmix_bfrop_print_byte PMIX_NAME(bfrop_print_byte) +#define pmix_bfrop_print_double PMIX_NAME(bfrop_print_double) +#define pmix_bfrop_print_float PMIX_NAME(bfrop_print_float) +#define pmix_bfrop_print_info PMIX_NAME(bfrop_print_info) +#define pmix_bfrop_print_int PMIX_NAME(bfrop_print_int) +#define pmix_bfrop_print_int16 PMIX_NAME(bfrop_print_int16) +#define pmix_bfrop_print_int32 PMIX_NAME(bfrop_print_int32) +#define pmix_bfrop_print_int64 PMIX_NAME(bfrop_print_int64) +#define pmix_bfrop_print_int8 PMIX_NAME(bfrop_print_int8) +#define pmix_bfrop_print_kval PMIX_NAME(bfrop_print_kval) +#define pmix_bfrop_print_modex PMIX_NAME(bfrop_print_modex) +#define pmix_bfrop_print_pdata PMIX_NAME(bfrop_print_pdata) +#define pmix_bfrop_print_persist PMIX_NAME(bfrop_print_persist) +#define pmix_bfrop_print_pid PMIX_NAME(bfrop_print_pid) +#define pmix_bfrop_print_proc PMIX_NAME(bfrop_print_proc) +#define pmix_bfrop_print_size PMIX_NAME(bfrop_print_size) +#define pmix_bfrop_print_string PMIX_NAME(bfrop_print_string) +#define pmix_bfrop_print_time PMIX_NAME(bfrop_print_time) +#define pmix_bfrop_print_timeval PMIX_NAME(bfrop_print_timeval) +#define pmix_bfrop_print_topo PMIX_NAME(bfrop_print_topo) +#define pmix_bfrop_print_uint PMIX_NAME(bfrop_print_uint) +#define pmix_bfrop_print_uint16 PMIX_NAME(bfrop_print_uint16) +#define pmix_bfrop_print_uint32 PMIX_NAME(bfrop_print_uint32) +#define pmix_bfrop_print_uint64 PMIX_NAME(bfrop_print_uint64) +#define pmix_bfrop_print_uint8 PMIX_NAME(bfrop_print_uint8) +#define pmix_bfrop_print_value PMIX_NAME(bfrop_print_value) +#define pmix_bfrop_std_copy PMIX_NAME(bfrop_std_copy) +#define pmix_bfrop_store_data_type PMIX_NAME(bfrop_store_data_type) +#define pmix_bfrop_threshold_size PMIX_NAME(pmix_bfrop_threshold_size) +#define pmix_bfrop_too_small PMIX_NAME(bfrop_too_small) +#define pmix_bfrop_types PMIX_NAME(bfrop_types) +#define pmix_bfrop_type_info_t_class PMIX_NAME(bfrop_type_info_t_class) +#define pmix_bfrop_unpack PMIX_NAME(bfrop_unpack) +#define pmix_bfrop_unpack_app PMIX_NAME(bfrop_unpack_app) +#define pmix_bfrop_unpack_array PMIX_NAME(bfrop_unpack_array) +#define pmix_bfrop_unpack_bo PMIX_NAME(bfrop_unpack_bo) +#define pmix_bfrop_unpack_bool PMIX_NAME(bfrop_unpack_bool) +#define pmix_bfrop_unpack_buf PMIX_NAME(bfrop_unpack_buf) +#define pmix_bfrop_unpack_buffer PMIX_NAME(bfrop_unpack_buffer) +#define pmix_bfrop_unpack_byte PMIX_NAME(bfrop_unpack_byte) +#define pmix_bfrop_unpack_datatype PMIX_NAME(bfrop_unpack_datatype) +#define pmix_bfrop_unpack_double PMIX_NAME(bfrop_unpack_double) +#define pmix_bfrop_unpack_float PMIX_NAME(bfrop_unpack_float) +#define pmix_bfrop_unpack_info PMIX_NAME(bfrop_unpack_info) +#define pmix_bfrop_unpack_int PMIX_NAME(bfrop_unpack_int) +#define pmix_bfrop_unpack_int16 PMIX_NAME(bfrop_unpack_int16) +#define pmix_bfrop_unpack_int32 PMIX_NAME(bfrop_unpack_int32) +#define pmix_bfrop_unpack_int64 PMIX_NAME(bfrop_unpack_int64) +#define pmix_bfrop_unpack_kval PMIX_NAME(bfrop_unpack_kval) +#define pmix_bfrop_unpack_modex PMIX_NAME(bfrop_unpack_modex) +#define pmix_bfrop_unpack_pdata PMIX_NAME(bfrop_unpack_pdata) +#define pmix_bfrop_unpack_persist PMIX_NAME(bfrop_unpack_persist) +#define pmix_bfrop_unpack_pid PMIX_NAME(bfrop_unpack_pid) +#define pmix_bfrop_unpack_proc PMIX_NAME(bfrop_unpack_proc) +#define pmix_bfrop_unpack_sizet PMIX_NAME(bfrop_unpack_sizet) +#define pmix_bfrop_unpack_string PMIX_NAME(bfrop_unpack_string) +#define pmix_bfrop_unpack_time PMIX_NAME(bfrop_unpack_time) +#define pmix_bfrop_unpack_timeval PMIX_NAME(bfrop_unpack_timeval) +#define pmix_bfrop_unpack_topo PMIX_NAME(bfrop_unpack_topo) +#define pmix_bfrop_unpack_value PMIX_NAME(bfrop_unpack_value) +#define pmix_buffer_t_class PMIX_NAME(buffer_t_class) +#define pmix_cb_t_class PMIX_NAME(cb_t_class) +#define pmix_class_finalize PMIX_NAME(class_finalize) +#define pmix_class_initialize PMIX_NAME(class_initialize) +#define pmix_client_globals PMIX_NAME(pmix_client_globals) +#define pmix_client_process_nspace_blob PMIX_NAME(client_process_nspace_blob) +#define pmix_csum_partial PMIX_NAME(csum_partial) +#define pmix_dirname PMIX_NAME(dirname) +#define pmix_dmdx_local_t_class PMIX_NAME(dmdx_local_t_class) +#define pmix_dmdx_remote_t_class PMIX_NAME(dmdx_remote_t_class) +#define pmix_dmdx_reply_caddy_t_class PMIX_NAME(dmdx_reply_caddy_t_class) +#define pmix_dmdx_request_t_class PMIX_NAME(dmdx_request_t_class) +#define pmix_environ_merge PMIX_NAME(environ_merge) +#define pmix_errhandler_invoke PMIX_NAME(errhandler_invoke) +#define pmix_fd_read PMIX_NAME(fd_read) +#define pmix_fd_set_cloexec PMIX_NAME(fd_set_cloexec) +#define pmix_fd_write PMIX_NAME(fd_write) +#define pmix_globals PMIX_NAME(globals) +#define pmix_globals_finalize PMIX_NAME(globals_finalize) +#define pmix_globals_init PMIX_NAME(globals_init) +#define pmix_hash_fetch PMIX_NAME(hash_fetch) +#define pmix_hash_remove_data PMIX_NAME(hash_remove_data) +#define pmix_hash_store PMIX_NAME(hash_store) +#define pmix_hash_table_get_first_key_uint32 PMIX_NAME(hash_table_get_first_key_uint32) +#define pmix_hash_table_get_first_key_uint64 PMIX_NAME(hash_table_get_first_key_uint64) +#define pmix_hash_table_get_next_key_uint32 PMIX_NAME(hash_table_get_next_key_uint32) +#define pmix_hash_table_get_next_key_uint64 PMIX_NAME(hash_table_get_next_key_uint64) +#define pmix_hash_table_get_value_ptr PMIX_NAME(hash_table_get_value_ptr) +#define pmix_hash_table_get_value_uint32 PMIX_NAME(hash_table_get_value_uint32) +#define pmix_hash_table_get_value_uint64 PMIX_NAME(hash_table_get_value_uint64) +#define pmix_hash_table_init PMIX_NAME(hash_table_init) +#define pmix_hash_table_remove_all PMIX_NAME(hash_table_remove_all) +#define pmix_hash_table_remove_value_ptr PMIX_NAME(hash_table_remove_value_ptr) +#define pmix_hash_table_remove_value_uint32 PMIX_NAME(hash_table_remove_value_uint32) +#define pmix_hash_table_remove_value_uint64 PMIX_NAME(hash_table_remove_value_uint64) +#define pmix_hash_table_set_value_ptr PMIX_NAME(hash_table_set_value_ptr) +#define pmix_hash_table_set_value_uint32 PMIX_NAME(hash_table_set_value_uint32) +#define pmix_hash_table_set_value_uint64 PMIX_NAME(hash_table_set_value_uint64) +#define pmix_hash_table_t_class PMIX_NAME(hash_table_t_class) +#define pmix_home_directory PMIX_NAME(home_directory) +#define pmix_host_server PMIX_NAME(pmix_host_server) +#define pmix_initialize_crc_table PMIX_NAME(initialize_crc_table) +#define pmix_kval_t_class PMIX_NAME(kval_t_class) +#define pmix_list_insert PMIX_NAME(list_insert) +#define pmix_list_item_t_class PMIX_NAME(list_item_t_class) +#define pmix_list_join PMIX_NAME(list_join) +#define pmix_list_sort PMIX_NAME(list_sort) +#define pmix_list_splice PMIX_NAME(list_splice) +#define pmix_list_t_class PMIX_NAME(list_t_class) +#define pmix_munge_module PMIX_NAME(munge_module) +#define pmix_native_module PMIX_NAME(native_module) +#define pmix_notify_caddy_t_class PMIX_NAME(notify_caddy_t_class) +#define pmix_nrec_t_class PMIX_NAME(nrec_t_class) +#define pmix_nspace_t_class PMIX_NAME(nspace_t_class) +#define pmix_object_t_class PMIX_NAME(object_t_class) +#define pmix_os_path PMIX_NAME(os_path) +#define pmix_output PMIX_NAME(output) +#define pmix_output_close PMIX_NAME(output_close) +#define pmix_output_finalize PMIX_NAME(output_finalize) +#define pmix_output_get_verbosity PMIX_NAME(output_get_verbosity) +#define pmix_output_init PMIX_NAME(output_init) +#define pmix_output_open PMIX_NAME(output_open) +#define pmix_output_redirected_syslog_pri PMIX_NAME(pmix_output_redirected_syslog_pri) +#define pmix_output_redirected_to_syslog PMIX_NAME(output_redirected_to_syslog) +#define pmix_output_reopen PMIX_NAME(output_reopen) +#define pmix_output_reopen_all PMIX_NAME(output_reopen_all) +#define pmix_output_set_output_file_info PMIX_NAME(output_set_output_file_info) +#define pmix_output_set_verbosity PMIX_NAME(output_set_verbosity) +#define pmix_output_stream_t_class PMIX_NAME(output_stream_t_class) +#define pmix_output_string PMIX_NAME(output_string) +#define pmix_output_switch PMIX_NAME(output_switch) +#define pmix_output_verbose PMIX_NAME(output_verbose) +#define pmix_output_vstring PMIX_NAME(output_vstring) +#define pmix_output_vverbose PMIX_NAME(output_vverbose) +#define pmix_pack_proc_map PMIX_NAME(pack_proc_map) +#define pmix_peer_t_class PMIX_NAME(peer_t_class) +#define pmix_pending_connection_t_class PMIX_NAME(pending_connection_t_class) +#define pmix_pending_nspace_requests PMIX_NAME(pending_nspace_requests) +#define pmix_pending_resolve PMIX_NAME(pending_resolve) +#define pmix_pointer_array_add PMIX_NAME(pointer_array_add) +#define pmix_pointer_array_init PMIX_NAME(pointer_array_init) +#define pmix_pointer_array_set_item PMIX_NAME(pointer_array_set_item) +#define pmix_pointer_array_set_size PMIX_NAME(pointer_array_set_size) +#define pmix_pointer_array_t_class PMIX_NAME(pointer_array_t_class) +#define pmix_pointer_array_test_and_set_item PMIX_NAME(pointer_array_test_and_set_item) +#define pmix_rank_info_t_class PMIX_NAME(rank_info_t_class) +#define pmix_regex_parse_nodes PMIX_NAME(regex_parse_nodes) +#define pmix_regex_parse_procs PMIX_NAME(regex_parse_procs) +#define pmix_regex_range_t_class PMIX_NAME(regex_range_t_class) +#define pmix_regex_value_t_class PMIX_NAME(regex_value_t_class) +#define pmix_sec PMIX_NAME(pmix_sec) +#define pmix_sec_finalize PMIX_NAME(sec_finalize) +#define pmix_sec_init PMIX_NAME(sec_init) +#define pmix_server_abort PMIX_NAME(server_abort) +#define pmix_server_caddy_t_class PMIX_NAME(server_caddy_t_class) +#define pmix_server_commit PMIX_NAME(server_commit) +#define pmix_server_connect PMIX_NAME(server_connect) +#define pmix_server_deregister_errhandler PMIX_NAME(server_deregister_errhandler) +#define pmix_server_fence PMIX_NAME(server_fence) +#define pmix_server_get PMIX_NAME(server_get) +#define pmix_server_globals PMIX_NAME(pmix_server_globals) +#define pmix_server_lookup PMIX_NAME(server_lookup) +#define pmix_server_notify_error PMIX_NAME(server_notify_error) +#define pmix_server_nspace_t_class PMIX_NAME(server_nspace_t_class) +#define pmix_server_publish PMIX_NAME(server_publish) +#define pmix_server_register_errhandler PMIX_NAME(server_register_errhandler) +#define pmix_server_spawn PMIX_NAME(server_spawn) +#define pmix_server_trkr_t_class PMIX_NAME(server_trkr_t_class) +#define pmix_server_unpublish PMIX_NAME(server_unpublish) +#define pmix_setenv PMIX_NAME(setenv) +#define pmix_setup_caddy_t_class PMIX_NAME(setup_caddy_t_class) +#define pmix_shift_caddy_t_class PMIX_NAME(shift_caddy_t_class) +#define pmix_snd_caddy_t_class PMIX_NAME(snd_caddy_t_class) +#define pmix_snprintf PMIX_NAME(snprintf) +#define pmix_start_listening PMIX_NAME(start_listening) +#define pmix_start_progress_thread PMIX_NAME(start_progress_thread) +#define pmix_stop_listening PMIX_NAME(stop_listening) +#define pmix_stop_progress_thread PMIX_NAME(stop_progress_thread) +#define pmix_timer_t_class PMIX_NAME(timer_t_class) +#define pmix_tmp_directory PMIX_NAME(tmp_directory) +#define pmix_trkr_caddy_t_class PMIX_NAME(trkr_caddy_t_class) +#define pmix_uicrc_partial PMIX_NAME(uicrc_partial) +#define pmix_uicsum_partial PMIX_NAME(uicsum_partial) +#define pmix_unsetenv PMIX_NAME(unsetenv) +#define pmix_usock_finalize PMIX_NAME(usock_finalize) +#define pmix_usock_globals PMIX_NAME(pmix_usock_globals) +#define pmix_usock_init PMIX_NAME(usock_init) +#define pmix_usock_posted_recv_t_class PMIX_NAME(usock_posted_recv_t_class) +#define pmix_usock_process_msg PMIX_NAME(usock_process_msg) +#define pmix_usock_queue_t_class PMIX_NAME(usock_queue_t_class) +#define pmix_usock_recv_blocking PMIX_NAME(usock_recv_blocking) +#define pmix_usock_recv_handler PMIX_NAME(usock_recv_handler) +#define pmix_usock_recv_t_class PMIX_NAME(usock_recv_t_class) +#define pmix_usock_send_blocking PMIX_NAME(usock_send_blocking) +#define pmix_usock_send_handler PMIX_NAME(usock_send_handler) +#define pmix_usock_send_recv PMIX_NAME(usock_send_recv) +#define pmix_usock_send_t_class PMIX_NAME(usock_send_t_class) +#define pmix_usock_set_blocking PMIX_NAME(usock_set_blocking) +#define pmix_usock_set_nonblocking PMIX_NAME(usock_set_nonblocking) +#define pmix_usock_sr_t_class PMIX_NAME(usock_sr_t_class) +#define pmix_value_load PMIX_NAME(value_load) +#define pmix_value_unload PMIX_NAME(value_unload) +#define pmix_value_xfer PMIX_NAME(value_xfer) +#define pmix_vasprintf PMIX_NAME(vasprintf) +#define pmix_vsnprintf PMIX_NAME(vsnprintf) + + +#endif /* PMIX_SYM_TRANSFORM */ + +END_C_DECLS + +#endif /* PMIX_RENAME_H */ diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h b/opal/mca/pmix/pmix112/pmix/include/pmix_server.h similarity index 95% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h rename to opal/mca/pmix/pmix112/pmix/include/pmix_server.h index 6c8a1d61b33..984e00709bd 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix112/pmix/include/pmix_server.h @@ -308,8 +308,13 @@ typedef struct pmix_server_module_1_0_0_t { /* Initialize the server support library, and provide a * pointer to a pmix_server_module_t structure - * containing the caller's callback functions */ -pmix_status_t PMIx_server_init(pmix_server_module_t *module); + * containing the caller's callback functions. The + * array of pmix_info_t structs is used to pass + * additional info that may be required by the server + * when initializing - e.g., a user/group ID to set + * on the rendezvous file for the Unix Domain Socket */ +pmix_status_t PMIx_server_init(pmix_server_module_t *module, + pmix_info_t info[], size_t ninfo); /* Finalize the server support library. If internal comm is * in-use, the server will shut it down at this time. All @@ -376,6 +381,13 @@ pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister an nspace and purge all objects relating to + * it, including any client info from that nspace. This is + * intended to support persistent PMIx servers by providing + * an opportunity for the host RM to tell the PMIx server + * library to release all memory for a completed job */ +void PMIx_server_deregister_nspace(const char nspace[]); + /* Register a client process with the PMIx server library. The * expected user ID and group ID of the child process helps the * server library to properly authenticate clients as they connect @@ -394,6 +406,12 @@ pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister a client and purge all data relating to it. The + * deregister_nspace API will automatically delete all client + * info for that nspace - this API is therefore intended solely + * for use in exception cases */ +void PMIx_server_deregister_client(const pmix_proc_t *proc); + /* Setup the environment of a child process to be forked * by the host so it can correctly interact with the PMIx * server. The PMIx client needs some setup information diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/align.h b/opal/mca/pmix/pmix112/pmix/include/private/align.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/align.h rename to opal/mca/pmix/pmix112/pmix/include/private/align.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/autogen/README.txt b/opal/mca/pmix/pmix112/pmix/include/private/autogen/README.txt similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/autogen/README.txt rename to opal/mca/pmix/pmix112/pmix/include/private/autogen/README.txt diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/hash_string.h b/opal/mca/pmix/pmix112/pmix/include/private/hash_string.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/hash_string.h rename to opal/mca/pmix/pmix112/pmix/include/private/hash_string.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/pmix_socket_errno.h b/opal/mca/pmix/pmix112/pmix/include/private/pmix_socket_errno.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/pmix_socket_errno.h rename to opal/mca/pmix/pmix112/pmix/include/private/pmix_socket_errno.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/pmix_stdint.h b/opal/mca/pmix/pmix112/pmix/include/private/pmix_stdint.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/pmix_stdint.h rename to opal/mca/pmix/pmix112/pmix/include/private/pmix_stdint.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/prefetch.h b/opal/mca/pmix/pmix112/pmix/include/private/prefetch.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/prefetch.h rename to opal/mca/pmix/pmix112/pmix/include/private/prefetch.h diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/types.h b/opal/mca/pmix/pmix112/pmix/include/private/types.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/include/private/types.h rename to opal/mca/pmix/pmix112/pmix/include/private/types.h diff --git a/opal/mca/pmix/pmix112/pmix/man/man3/pmix_abort.3 b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_abort.3 new file mode 100644 index 00000000000..e18b0d8dfa4 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_abort.3 @@ -0,0 +1,62 @@ +.TH "pmix_abort" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Abort \- Abort the specified processes +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Abort(int\ status,\ const\ char\ msg[], +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ pmix_proc_t\ procs[],\ size_t\ nprocs); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]status\f[] : Status value to be returned. +A value of zero is permitted by PMIx, but may not be returned by some +resource managers. +.PP +\f[I]msg\f[] : A string message to be displayed +.PP +\f[I]procs\f[] : An array of pmix_proc_t structures defining the +processes to be aborted. +A \f[I]NULL\f[] for the proc array indicates that all processes in the +caller\[aq]s nspace are to be aborted. +A wildcard value for the rank in any structure indicates that all +processes in that nspace are to be aborted. +.PP +\f[I]nprocs\f[] : Number of pmix_proc_t structures in the \f[I]procs\f[] +array +.SH DESCRIPTION +.PP +Request that the provided array of procs be aborted, returning the +provided \f[I]status\f[] and printing the provided message. +A \f[I]NULL\f[] for the proc array indicates that all processes in the +caller\[aq]s nspace are to be aborted. +.PP +The response to this request is somewhat dependent on the specific +resource manager and its configuration (e.g., some resource managers +will not abort the application if the provided \f[I]status\f[] is zero +unless specifically configured to do so), and thus lies outside the +control of PMIx itself. +However, the client will inform the RM of the request that the +application be aborted, regardless of the value of the provided +\f[I]status\f[]. +.PP +Passing a \f[I]NULL\f[] msg parameter is allowed. +Note that race conditions caused by multiple processes calling +PMIx_Abort are left to the server implementation to resolve with regard +to which status is returned and what messages (if any) are printed. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix112/pmix/man/man3/pmix_commit.3 b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_commit.3 new file mode 100644 index 00000000000..ad3e3f8613e --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_commit.3 @@ -0,0 +1,35 @@ +.TH "pmix_commit" "3" "2015\-10\-27" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Commit \- Push all previously \f[I]PMIx\f[]Put_ values to the local +PMIx server. +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Commit(void); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]none\f[] +.SH DESCRIPTION +.PP +This is an asynchronous operation \- the library will immediately return +to the caller while the data is transmitted to the local server in the +background +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.PP +\f[C]PMIx_Put\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix112/pmix/man/man3/pmix_finalize.3 b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_finalize.3 new file mode 100644 index 00000000000..46593089fde --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_finalize.3 @@ -0,0 +1,31 @@ +.TH "pmix_finalize" "3" "2015\-10\-27" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Finalize \- Finalize the PMIx Client +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Finalize(void); +\f[] +.fi +.SH ARGUMENTS +.SH DESCRIPTION +.PP +Finalize the PMIx client, closing the connection with the local PMIx +server. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.PP +\f[C]PMIx_Init\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix112/pmix/man/man3/pmix_init.3 b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_init.3 new file mode 100644 index 00000000000..3a8486c68c8 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_init.3 @@ -0,0 +1,49 @@ +.TH "pmix_init" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Init \- Initialize the PMIx Client +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Init(pmix_proc_t\ *proc); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]proc\f[] : Pointer to a pmix_proc_t object in which the +client\[aq]s namespace and rank are to be returned. +.SH DESCRIPTION +.PP +Initialize the PMIx client, returning the process identifier assigned to +this client\[aq]s application in the provided pmix_proc_t struct. +Passing a parameter of \f[I]NULL\f[] for this parameter is allowed if +the user wishes solely to initialize the PMIx system and does not +require return of the identifier at that time. +.PP +When called, the PMIx client will check for the required connection +information of the local PMIx server and will establish the connection. +If the information is not found, or the server connection fails, then an +appropriate error constant will be returned. +.PP +If successful, the function will return PMIX_SUCCESS and will fill the +provided structure with the server\-assigned namespace and rank of the +process within the application. +.PP +Note that the PMIx client library is referenced counted, and so multiple +calls to PMIx_Init are allowed. +Thus, one way to obtain the namespace and rank of the process is to +simply call PMIx_Init with a non\-NULL parameter. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix112/pmix/man/man3/pmix_initialized.3 b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_initialized.3 new file mode 100644 index 00000000000..ec35e2e0079 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_initialized.3 @@ -0,0 +1,30 @@ +.TH "pmix_initialized" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Initialized \- Check if \f[I]PMIx\f[]Init_ has been called +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +int\ PMIx_Initialized(void); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]none\f[] +.SH DESCRIPTION +.PP +Check to see if the PMIx Client library has been intialized +.SH RETURN VALUE +.PP +Returns \f[I]true\f[] if the PMIx Client has been initialized, and +\f[I]false\f[] if not. +.SH ERRORS +.SH NOTES +.SH SEE ALSO +.PP +\f[C]PMIx_Init\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix112/pmix/man/man3/pmix_put.3 b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_put.3 new file mode 100644 index 00000000000..35e41d61a09 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man3/pmix_put.3 @@ -0,0 +1,60 @@ +.TH "pmix_put" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Put \- Push a value into the client\[aq]s namespace +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Init(pmix_scope_t\ scope,\ const\ char\ key[],\ pmix_value_t\ *val); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]scope\f[] : Defines a scope for data "put" by PMI per the +following: +.IP +.nf +\f[C] +*\ PMI_LOCAL\ \-\ the\ data\ is\ intended\ only\ for\ other\ application +\ \ \ \ \ \ \ \ \ \ \ \ \ \ processes\ on\ the\ same\ node.\ Data\ marked\ in\ this\ way +\ \ \ \ \ \ \ \ \ \ \ \ \ \ will\ not\ be\ included\ in\ data\ packages\ sent\ to\ remote\ requestors +*\ PMI_REMOTE\ \-\ the\ data\ is\ intended\ solely\ for\ applications\ processes\ on +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ remote\ nodes.\ Data\ marked\ in\ this\ way\ will\ not\ be\ shared\ with +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ other\ processes\ on\ the\ same\ node +*\ PMI_GLOBAL\ \-\ the\ data\ is\ to\ be\ shared\ with\ all\ other\ requesting\ processes, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ regardless\ of\ location +\f[] +.fi +.PP +\f[I]key\f[] String key identifying the information. +This can be either one of the PMIx defined attributes, or a +user\-defined value +.PP +\f[I]val\f[] Pointer to a pmix_value_t structure containing the data to +be pushed along with the type of the provided data. +.SH DESCRIPTION +.PP +Push a value into the client\[aq]s namespace. +The client library will cache the information locally until +\f[I]PMIx\f[]Commit_ is called. +The provided scope value is passed to the local PMIx server, which will +distribute the data as directed. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.PP +See \[aq]pmix_common.h\[aq] for definition of the pmix_value_t +structure. +.SH SEE ALSO +.PP +\f[C]PMIx_Constants\f[](7), \f[C]PMIx_Structures\f[](7) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix112/pmix/man/man7/pmix.7 b/opal/mca/pmix/pmix112/pmix/man/man7/pmix.7 new file mode 100644 index 00000000000..d2e545af94f --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/man/man7/pmix.7 @@ -0,0 +1,35 @@ +.TH "pmix" "7" "2015\-10\-29" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +Process Management Interface \- Exascale +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ +\f[] +.fi +.SH OVERVIEW +.PP +The Process Management Interface (PMI) has been used for quite some time +as a means of exchanging wireup information needed for interprocess +communication. +Two versions (PMI\-1 and PMI\-2) have been released as part of the MPICH +effort. +While PMI\-2 demonstrates better scaling properties than its PMI\-1 +predecessor, attaining rapid launch and wireup of the roughly 1M +processes executing across 100k nodes expected for exascale operations +remains challenging. +.PP +PMI Exascale (PMIx) represents an attempt to resolve these questions by +providing an extended version of the PMI standard specifically designed +to support clusters up to and including exascale sizes. +The overall objective of the project is not to branch the existing +pseudo\-standard definitions \- in fact, PMIx fully supports both of the +existing PMI\-1 and PMI\-2 APIs \- but rather to (a) augment and extend +those APIs to eliminate some current restrictions that impact +scalability, and (b) provide a reference implementation of the +PMI\-server that demonstrates the desired level of scalability. +.SH SEE ALSO +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 b/opal/mca/pmix/pmix112/pmix/man/man7/pmix_constants.7 similarity index 92% rename from opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 rename to opal/mca/pmix/pmix112/pmix/man/man7/pmix_constants.7 index 3de75d16b8c..4561121ac52 100644 --- a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 +++ b/opal/mca/pmix/pmix112/pmix/man/man7/pmix_constants.7 @@ -1,4 +1,4 @@ -.TH "pmix_constants" "7" "2015\-09\-16" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.TH "pmix_constants" "7" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP PMIx Constants @@ -87,8 +87,6 @@ Unlike other data transfer interfaces, atomic operations are aware of the data formatting at the target memory region. .SH SEE ALSO .PP -\f[C]pmix\f[](7), \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), -\f[C]fi_domain\f[](3), \f[C]fi_av\f[](3), \f[C]fi_eq\f[](3), -\f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3), \f[C]fi_mr\f[](3) +\f[C]pmix\f[](7) .SH AUTHORS PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/buffer_ops.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/buffer_ops.h index a0ea21d8ad6..0d05fa7241c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h +++ b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/buffer_ops.h @@ -38,8 +38,6 @@ BEGIN_C_DECLS -PMIX_DECLSPEC extern const char pmix_version_string[]; - /* internally used object for transferring data * to/from the server and for storing in the * hash tables */ @@ -68,6 +66,8 @@ PMIX_DECLSPEC pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, (b)->bytes_allocated = (s); \ (b)->pack_ptr = ((char*)(b)->base_ptr) + (s); \ (b)->unpack_ptr = (b)->base_ptr; \ + (d) = NULL; \ + (s) = 0; \ } while(0); #define PMIX_UNLOAD_BUFFER(b, d, s) \ diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/copy.c b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/copy.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/copy.c rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/copy.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/internal.h b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/internal.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/internal.h rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/internal.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/internal_functions.c b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/internal_functions.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/internal_functions.c rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/internal_functions.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/open_close.c b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/open_close.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/open_close.c rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/open_close.c index bd0a263032a..3b408a88e99 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/open_close.c +++ b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/open_close.c @@ -34,13 +34,6 @@ #include "src/util/argv.h" #include "src/buffer_ops/internal.h" -#if PMIX_CC_USE_PRAGMA_IDENT -#pragma ident PMIX_VERSION -#elif PMIX_CC_USE_IDENT -#ident PMIX_VERSION -#endif -const char pmix_version_string[] = PMIX_VERSION; - /** * globals diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/pack.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/pack.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/print.c b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/print.c similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/print.c rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/print.c index bda2232ee9e..e63756376cc 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/print.c +++ b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/print.c @@ -663,7 +663,7 @@ int pmix_bfrop_print_pdata(char **output, char *prefix, pmix_bfrop_print_proc(&tmp1, NULL, &src->proc, PMIX_PROC); pmix_bfrop_print_value(&tmp2, NULL, &src->value, PMIX_VALUE); - asprintf(output, "%sPROC: %s KEY: %s %s", prefix, tmp1, src->key, + asprintf(output, "%s %s KEY: %s %s", prefix, tmp1, src->key, (NULL == tmp2) ? "NULL" : tmp2); if (NULL != tmp1) { free(tmp1); @@ -689,6 +689,13 @@ int pmix_bfrop_print_app(char **output, char *prefix, int pmix_bfrop_print_proc(char **output, char *prefix, pmix_proc_t *src, pmix_data_type_t type) { + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + asprintf(output, "%sPROC: %s:%d", prefx, src->nspace, src->rank); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/types.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/types.h index 62c91368461..d9734969bf7 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h +++ b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/types.h @@ -34,7 +34,7 @@ #include "src/class/pmix_object.h" #include "src/class/pmix_pointer_array.h" #include "src/class/pmix_list.h" -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix112/pmix/src/buffer_ops/unpack.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c rename to opal/mca/pmix/pmix112/pmix/src/buffer_ops/unpack.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/class/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/class/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.c b/opal/mca/pmix/pmix112/pmix/src/class/pmix_hash_table.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.c rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_hash_table.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h b/opal/mca/pmix/pmix112/pmix/src/class/pmix_hash_table.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_hash_table.h index 55168ab4092..1c0a2b32d08 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h +++ b/opal/mca/pmix/pmix112/pmix/src/class/pmix_hash_table.h @@ -40,7 +40,7 @@ #include "src/class/pmix_list.h" -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_list.c b/opal/mca/pmix/pmix112/pmix/src/class/pmix_list.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_list.c rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_list.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_list.h b/opal/mca/pmix/pmix112/pmix/src/class/pmix_list.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_list.h rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_list.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_object.c b/opal/mca/pmix/pmix112/pmix/src/class/pmix_object.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_object.c rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_object.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_object.h b/opal/mca/pmix/pmix112/pmix/src/class/pmix_object.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_object.h rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_object.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.c b/opal/mca/pmix/pmix112/pmix/src/class/pmix_pointer_array.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.c rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_pointer_array.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h b/opal/mca/pmix/pmix112/pmix/src/class/pmix_pointer_array.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h rename to opal/mca/pmix/pmix112/pmix/src/class/pmix_pointer_array.h index 680d52cdfd9..0e299b007ee 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h +++ b/opal/mca/pmix/pmix112/pmix/src/class/pmix_pointer_array.h @@ -30,7 +30,7 @@ #endif #include "src/class/pmix_object.h" -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/client/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/client/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmi1.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmi2.c index 7c9df035af9..b8394f20371 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c @@ -507,7 +507,7 @@ int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) PMI2_CHECK(); - if (NULL == jobid || NULL == conn) { + if (NULL == conn) { return PMI2_ERR_INVALID_ARGS; } @@ -524,10 +524,6 @@ int PMI2_Job_Disconnect(const char jobid[]) PMI2_CHECK(); - if (NULL == jobid) { - return PMI2_ERR_INVALID_ARGS; - } - (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Disconnect(&proc, 1, NULL, 0); diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c similarity index 88% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c index 8d67b70df66..b1379bbf197 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -45,6 +45,14 @@ #endif #include PMIX_EVENT_HEADER +#if PMIX_CC_USE_PRAGMA_IDENT +#pragma ident PMIX_VERSION +#elif PMIX_CC_USE_IDENT +#ident PMIX_VERSION +#endif +static const char pmix_version_string[] = PMIX_VERSION; + + #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" #include "src/util/argv.h" @@ -161,10 +169,14 @@ static void job_data(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, /* unpack the nspace - we don't really need it, but have to * unpack it to maintain sequence */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nspace, &cnt, PMIX_STRING))) { + nspace = NULL; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nspace, &cnt, PMIX_STRING))) { PMIX_ERROR_LOG(rc); return; } + if (NULL != nspace) { + free(nspace); + } /* decode it */ pmix_client_process_nspace_blob(pmix_globals.myid.nspace, buf); cb->status = PMIX_SUCCESS; @@ -228,16 +240,14 @@ int PMIx_Init(pmix_proc_t *proc) pmix_nspace_t *nsptr; pmix_cb_t cb; - if (NULL == proc) { - return PMIX_ERR_BAD_PARAM; - } - if (0 < pmix_globals.init_cntr) { /* since we have been called before, the nspace and * rank should be known. So return them here if * requested */ - (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - proc->rank = pmix_globals.myid.rank; + if (NULL != proc) { + (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + proc->rank = pmix_globals.myid.rank; + } return PMIX_SUCCESS; } @@ -281,7 +291,9 @@ int PMIx_Init(pmix_proc_t *proc) pmix_class_finalize(); return PMIX_ERR_INVALID_NAMESPACE; } - (void)strncpy(proc->nspace, evar, PMIX_MAX_NSLEN); + if (NULL != proc) { + (void)strncpy(proc->nspace, evar, PMIX_MAX_NSLEN); + } (void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); nsptr = PMIX_NEW(pmix_nspace_t); (void)strncpy(nsptr->nspace, evar, PMIX_MAX_NSLEN); @@ -336,7 +348,9 @@ int PMIx_Init(pmix_proc_t *proc) return PMIX_ERR_DATA_VALUE_NOT_FOUND; } pmix_globals.myid.rank = strtol(evar, NULL, 10); - proc->rank = pmix_globals.myid.rank; + if (NULL != proc) { + proc->rank = pmix_globals.myid.rank; + } pmix_globals.pindex = -1; /* setup the support */ @@ -530,37 +544,28 @@ int PMIx_Abort(int flag, const char msg[], return PMIX_SUCCESS; } -pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) +static void _putfn(int sd, short args, void *cbdata) { + pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; pmix_kval_t *kv; pmix_nspace_t *ns; - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: executing put for key %s type %d", - key, val->type); - - if (pmix_globals.init_cntr <= 0) { - return PMIX_ERR_INIT; - } - /* setup to xfer the data */ kv = PMIX_NEW(pmix_kval_t); - kv->key = strdup((char*)key); + kv->key = strdup(cb->key); // need to copy as the input belongs to the user kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - rc = pmix_value_xfer(kv->value, val); + rc = pmix_value_xfer(kv->value, cb->value); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kv); - return rc; + goto done; } /* put it in our own modex hash table in case something * internal to us wants it - our nsrecord is always * first on the list */ if (NULL == (ns = (pmix_nspace_t*)pmix_list_get_first(&pmix_globals.nspaces))) { /* shouldn't be possible */ - PMIX_RELEASE(kv); - return PMIX_ERR_INIT; + goto done; } if (PMIX_SUCCESS != (rc = pmix_hash_store(&ns->modex, pmix_globals.myid.rank, kv))) { PMIX_ERROR_LOG(rc); @@ -568,56 +573,81 @@ pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) /* pack the cache that matches the scope - global scope needs * to go into both local and remote caches */ - if (PMIX_LOCAL == scope || PMIX_GLOBAL == scope) { + if (PMIX_LOCAL == cb->scope || PMIX_GLOBAL == cb->scope) { if (NULL == pmix_globals.cache_local) { pmix_globals.cache_local = PMIX_NEW(pmix_buffer_t); } pmix_output_verbose(2, pmix_globals.debug_output, "pmix: put %s data for key %s in local cache", - key, (PMIX_GLOBAL == scope) ? "global" : "local"); + cb->key, (PMIX_GLOBAL == cb->scope) ? "global" : "local"); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(pmix_globals.cache_local, kv, 1, PMIX_KVAL))) { PMIX_ERROR_LOG(rc); } } - if (PMIX_REMOTE == scope || PMIX_GLOBAL == scope) { + if (PMIX_REMOTE == cb->scope || PMIX_GLOBAL == cb->scope) { if (NULL == pmix_globals.cache_remote) { pmix_globals.cache_remote = PMIX_NEW(pmix_buffer_t); } pmix_output_verbose(2, pmix_globals.debug_output, "pmix: put %s data for key %s in remote cache", - key, (PMIX_GLOBAL == scope) ? "global" : "remote"); + cb->key, (PMIX_GLOBAL == cb->scope) ? "global" : "remote"); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(pmix_globals.cache_remote, kv, 1, PMIX_KVAL))) { PMIX_ERROR_LOG(rc); } } + done: PMIX_RELEASE(kv); // maintain accounting + cb->pstatus = rc; + cb->active = false; +} + +pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) +{ + pmix_cb_t *cb; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: executing put for key %s type %d", + key, val->type); + + if (pmix_globals.init_cntr <= 0) { + return PMIX_ERR_INIT; + } + + /* create a callback object */ + cb = PMIX_NEW(pmix_cb_t); + cb->active = true; + cb->scope = scope; + cb->key = (char*)key; + cb->value = val; + + /* pass this into the event library for thread protection */ + PMIX_THREAD_SHIFT(cb, _putfn); + + /* wait for the result */ + PMIX_WAIT_FOR_COMPLETION(cb->active); + rc = cb->pstatus; + PMIX_RELEASE(cb); return rc; } -pmix_status_t PMIx_Commit(void) +static void _commitfn(int sd, short args, void *cbdata) { + pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; pmix_scope_t scope; pmix_buffer_t *msgout; pmix_cmd_t cmd=PMIX_COMMIT_CMD; - /* if we are a server, or we aren't connected, don't attempt to send */ - if (pmix_globals.server) { - return PMIX_SUCCESS; // not an error - } - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msgout = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - return rc; + goto done; } /* if we haven't already done it, ensure we have committed our values */ @@ -626,12 +656,12 @@ pmix_status_t PMIx_Commit(void) if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &scope, 1, PMIX_SCOPE))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - return rc; + goto done; } if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &pmix_globals.cache_local, 1, PMIX_BUFFER))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - return rc; + goto done; } PMIX_RELEASE(pmix_globals.cache_local); } @@ -640,12 +670,12 @@ pmix_status_t PMIx_Commit(void) if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &scope, 1, PMIX_SCOPE))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - return rc; + goto done; } if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &pmix_globals.cache_remote, 1, PMIX_BUFFER))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - return rc; + goto done; } PMIX_RELEASE(pmix_globals.cache_remote); } @@ -654,28 +684,56 @@ pmix_status_t PMIx_Commit(void) * send, even if we have nothing to contribute, so the server knows * that we contributed whatever we had */ PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msgout, NULL, NULL); - return PMIX_SUCCESS; + + done: + cb->pstatus = rc; + cb->active = false; } -pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, - pmix_proc_t **procs, size_t *nprocs) +pmix_status_t PMIx_Commit(void) { + pmix_cb_t *cb; + pmix_status_t rc; + + /* if we are a server, or we aren't connected, don't attempt to send */ + if (pmix_globals.server) { + return PMIX_SUCCESS; // not an error + } + if (!pmix_globals.connected) { + return PMIX_ERR_UNREACH; + } + + /* create a callback object */ + cb = PMIX_NEW(pmix_cb_t); + cb->active = true; + + /* pass this into the event library for thread protection */ + PMIX_THREAD_SHIFT(cb, _commitfn); + + /* wait for the result */ + PMIX_WAIT_FOR_COMPLETION(cb->active); + rc = cb->pstatus; + PMIX_RELEASE(cb); + + return rc; +} + +static void _peersfn(int sd, short args, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + pmix_status_t rc; char **nsprocs=NULL, **nsps=NULL, **tmp; pmix_nspace_t *nsptr; pmix_nrec_t *nptr; size_t i; - /* set the default */ - *procs = NULL; - *nprocs = 0; - /* cycle across our known nspaces */ tmp = NULL; PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (NULL == nspace || 0 == strcmp(nsptr->nspace, nspace)) { + if (0 == strncmp(nsptr->nspace, cb->nspace, PMIX_MAX_NSLEN)) { /* cycle across the nodes in this nspace */ PMIX_LIST_FOREACH(nptr, &nsptr->nodes, pmix_nrec_t) { - if (0 == strcmp(nodename, nptr->name)) { + if (0 == strcmp(cb->key, nptr->name)) { /* add the contribution from this node */ tmp = pmix_argv_split(nptr->procs, ','); for (i=0; NULL != tmp[i]; i++) { @@ -689,41 +747,71 @@ pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, } } if (0 == (i = pmix_argv_count(nsps))) { - /* if we don't already have a record for this nspace, - * see if we have the data in our local cache */ - - return PMIX_ERR_NOT_FOUND; + /* we don't know this nspace */ + rc = PMIX_ERR_NOT_FOUND; + goto done; } /* create the required storage */ - i = pmix_argv_count(nsps); - PMIX_PROC_CREATE(*procs, i); - *nprocs = pmix_argv_count(nsps); + PMIX_PROC_CREATE(cb->procs, i); + cb->nvals = pmix_argv_count(nsps); /* transfer the data */ for (i=0; NULL != nsps[i]; i++) { - (void)strncpy((*procs)[i].nspace, nsps[i], PMIX_MAX_NSLEN); - (*procs)[i].rank = strtol(nsprocs[i], NULL, 10); + (void)strncpy(cb->procs[i].nspace, nsps[i], PMIX_MAX_NSLEN); + cb->procs[i].rank = strtol(nsprocs[i], NULL, 10); } pmix_argv_free(nsps); pmix_argv_free(nsprocs); + rc = PMIX_SUCCESS; - return PMIX_SUCCESS; + done: + cb->pstatus = rc; + cb->active = false; } -pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) +pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, + pmix_proc_t **procs, size_t *nprocs) { + pmix_cb_t *cb; + pmix_status_t rc; + + /* create a callback object */ + cb = PMIX_NEW(pmix_cb_t); + cb->active = true; + cb->key = (char*)nodename; + if (NULL != nspace) { + (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); + } + + /* pass this into the event library for thread protection */ + PMIX_THREAD_SHIFT(cb, _peersfn); + + /* wait for the result */ + PMIX_WAIT_FOR_COMPLETION(cb->active); + rc = cb->pstatus; + /* transfer the result */ + *procs = cb->procs; + *nprocs = cb->nvals; + + /* cleanup */ + PMIX_RELEASE(cb); + + return rc; +} + +static void _nodesfn(int sd, short args, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + pmix_status_t rc; char **tmp; pmix_nspace_t *nsptr; pmix_nrec_t *nptr; - /* set the default */ - *nodelist = NULL; - /* cycle across our known nspaces */ tmp = NULL; PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (NULL == nspace || 0 == strcmp(nsptr->nspace, nspace)) { + if (0 == strncmp(nsptr->nspace, cb->nspace, PMIX_MAX_NSLEN)) { /* cycle across the nodes in this nspace */ PMIX_LIST_FOREACH(nptr, &nsptr->nodes, pmix_nrec_t) { pmix_argv_append_unique_nosize(&tmp, nptr->name, false); @@ -731,11 +819,39 @@ pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) } } if (NULL == tmp) { - return PMIX_ERR_NOT_FOUND; + rc = PMIX_ERR_NOT_FOUND; + } else { + cb->key = pmix_argv_join(tmp, ','); + pmix_argv_free(tmp); + rc = PMIX_SUCCESS; } - *nodelist = pmix_argv_join(tmp, ','); - pmix_argv_free(tmp); - return PMIX_SUCCESS; + + cb->pstatus = rc; + cb->active = false; +} + +pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) +{ + pmix_cb_t *cb; + pmix_status_t rc; + + /* create a callback object */ + cb = PMIX_NEW(pmix_cb_t); + cb->active = true; + if (NULL != nspace) { + (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); + } + + /* pass this into the event library for thread protection */ + PMIX_THREAD_SHIFT(cb, _nodesfn); + + /* wait for the result */ + PMIX_WAIT_FOR_COMPLETION(cb->active); + rc = cb->pstatus; + *nodelist = cb->key; + PMIX_RELEASE(cb); + + return rc; } @@ -919,9 +1035,6 @@ void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr) bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); - /* protect the data */ - kptr->value->data.bo.bytes = NULL; - kptr->value->data.bo.size = 0; PMIX_RELEASE(kptr); /* start by unpacking the rank */ cnt = 1; @@ -958,9 +1071,6 @@ void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr) bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); - /* protect the data */ - kptr->value->data.bo.bytes = NULL; - kptr->value->data.bo.size = 0; PMIX_RELEASE(kptr); /* start by unpacking the number of nodes */ cnt = 1; diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_connect.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_connect.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client_connect.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_fence.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client_fence.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c similarity index 77% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c index d41be9cbe31..a9b27b0b94a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c @@ -59,9 +59,12 @@ static pmix_buffer_t* pack_get(char *nspace, int rank, const pmix_info_t info[], size_t ninfo, pmix_cmd_t cmd); + +static void _getnbfn(int sd, short args, void *cbdata); + static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata); -static void getnb_shortcut(int fd, short flags, void *cbdata); + static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata); int PMIx_Get(const pmix_proc_t *proc, const char key[], @@ -90,10 +93,8 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[], * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, value_cbfunc, cb))) { PMIX_RELEASE(cb); - *val = NULL; return rc; } @@ -113,13 +114,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, const pmix_info_t info[], size_t ninfo, pmix_value_cbfunc_t cbfunc, void *cbdata) { - pmix_value_t *val; - pmix_buffer_t *msg; pmix_cb_t *cb; - pmix_status_t rc; - char *nm; - pmix_nspace_t *ns, *nptr; - size_t n; if (NULL == proc) { return PMIX_ERR_BAD_PARAM; @@ -139,184 +134,18 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, return PMIX_ERR_BAD_PARAM; } - /* if the nspace is empty, then the caller is referencing - * our own nspace */ - if (0 == strlen(proc->nspace)) { - nm = pmix_globals.myid.nspace; - } else { - nm = (char*)proc->nspace; - } - - /* find the nspace object */ - nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(nm, ns->nspace)) { - nptr = ns; - break; - } - } - if (NULL == nptr) { - /* we are asking for info about a new nspace - give us - * a chance to learn about it from the server. If the - * server has never heard of it, the server will return - * an error */ - nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nptr->nspace, nm, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); - /* there is no point in looking for data in this nspace - * object, so let's just go generate the request */ - goto request; - } - - /* the requested data could be in the job-data table, so let's - * just check there first. */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, key, &val))) { - /* found it - return it via appropriate channel */ - cb = PMIX_NEW(pmix_cb_t); - (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); - cb->rank = proc->rank; - cb->key = strdup(key); - cb->value_cbfunc = cbfunc; - cb->cbdata = cbdata; - /* pack the return data so the unpack routine can get it */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&cb->data, val, 1, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - } - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - /* activate the event */ - event_assign(&(cb->ev), pmix_globals.evbase, -1, - EV_WRITE, getnb_shortcut, cb); - event_active(&(cb->ev), EV_WRITE, 1); - return PMIX_SUCCESS; - } - if (PMIX_RANK_WILDCARD == proc->rank) { - /* can't be anywhere else */ - return PMIX_ERR_NOT_FOUND; - } - - /* it could still be in the job-data table, only stored under its own - * rank and not WILDCARD - e.g., this is true of data returned about - * ourselves during startup */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, proc->rank, key, &val))) { - /* found it - return it via appropriate channel */ - cb = PMIX_NEW(pmix_cb_t); - (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); - cb->rank = proc->rank; - cb->key = strdup(key); - cb->value_cbfunc = cbfunc; - cb->cbdata = cbdata; - /* pack the return data so the unpack routine can get it */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&cb->data, val, 1, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - } - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - /* activate the event */ - event_assign(&(cb->ev), pmix_globals.evbase, -1, - EV_WRITE, getnb_shortcut, cb); - event_active(&(cb->ev), EV_WRITE, 1); - return PMIX_SUCCESS; - } - - /* not finding it is not an error - it could be in the - * modex hash table, so check it */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, proc->rank, key, &val))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: value retrieved from dstore"); - /* need to push this into the event library to ensure - * the callback occurs within an event */ - cb = PMIX_NEW(pmix_cb_t); - (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); - cb->rank = proc->rank; - cb->key = strdup(key); - cb->value_cbfunc = cbfunc; - cb->cbdata = cbdata; - /* pack the return data so the unpack routine can get it */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&cb->data, val, 1, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - } - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - /* activate the event */ - event_assign(&(cb->ev), pmix_globals.evbase, -1, - EV_WRITE, getnb_shortcut, cb); - event_active(&(cb->ev), EV_WRITE, 1); - return PMIX_SUCCESS; - } else if (PMIX_ERR_NOT_FOUND == rc) { - /* we have the modex data from this proc, but didn't find the key - * the user requested. At this time, there is no way for the - * key to eventually be found, so all we can do is return - * the error */ - pmix_output_verbose(2, pmix_globals.debug_output, - "Error requesting key=%s for rank = %d, namespace = %s", - key, proc->rank, nm); - return rc; - } - - request: - /* if we got here, then we don't have the data for this proc. If we - * are a server, or we are a client and not connected, then there is - * nothing more we can do */ - if (pmix_globals.server || (!pmix_globals.server && !pmix_globals.connected)) { - return PMIX_ERR_NOT_FOUND; - } - - /* we also have to check the user's directives to see if they do not want - * us to attempt to retrieve it from the server */ - for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_OPTIONAL) && - info[n].value.data.flag) { - /* they don't want us to try and retrieve it */ - pmix_output_verbose(2, pmix_globals.debug_output, - "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", - key, proc->rank, nm); - return PMIX_ERR_NOT_FOUND; - } - } - /* see if we already have a request in place with the server for data from - * this nspace:rank. If we do, then no need to ask again as the - * request will return _all_ data from that proc */ - PMIX_LIST_FOREACH(cb, &pmix_client_globals.pending_requests, pmix_cb_t) { - if (0 == strncmp(nm, cb->nspace, PMIX_MAX_NSLEN) && cb->rank == proc->rank) { - /* we do have a pending request, but we still need to track this - * outstanding request so we can satisfy it once the data is returned */ - cb = PMIX_NEW(pmix_cb_t); - (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); - cb->rank = proc->rank; - cb->key = strdup(key); - cb->value_cbfunc = cbfunc; - cb->cbdata = cbdata; - pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); - return PMIX_SUCCESS; - } - } - - /* we don't have a pending request, so let's create one - don't worry - * about packing the key as we return everything from that proc */ - if (NULL == (msg = pack_get(nm, proc->rank, info, ninfo, PMIX_GETNB_CMD))) { - return PMIX_ERROR; - } - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ + /* thread-shift so we can check global objects */ cb = PMIX_NEW(pmix_cb_t); - (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); + cb->active = true; + (void)strncpy(cb->nspace, proc->nspace, PMIX_MAX_NSLEN); cb->rank = proc->rank; - cb->key = strdup(key); + cb->key = (char*)key; + cb->info = (pmix_info_t*)info; + cb->ninfo = ninfo; cb->value_cbfunc = cbfunc; cb->cbdata = cbdata; - pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); + PMIX_THREAD_SHIFT(cb, _getnbfn); - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, getnb_cbfunc, cb); return PMIX_SUCCESS; } @@ -377,6 +206,9 @@ static pmix_buffer_t* pack_get(char *nspace, int rank, return msg; } +/* this callback is coming from the usock recv, and thus + * is occurring inside of our progress thread - hence, no + * need to thread shift */ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { @@ -410,7 +242,7 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, /* look up the nspace object for this proc */ nptr = NULL; PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(cb->nspace, ns->nspace)) { + if (0 == strncmp(cb->nspace, ns->nspace, PMIX_MAX_NSLEN)) { nptr = ns; break; } @@ -458,6 +290,7 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, PMIX_RELEASE(bptr); // free's the data region if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); + rc = PMIX_ERR_SILENT; // avoid error-logging twice break; } } @@ -499,23 +332,169 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, } } -static void getnb_shortcut(int fd, short flags, void *cbdata) +static void _getnbfn(int fd, short flags, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; - pmix_value_t val; + pmix_cb_t *cbret; + pmix_buffer_t *msg; + pmix_value_t *val; pmix_status_t rc; - int32_t m; + char *nm; + pmix_nspace_t *ns, *nptr; + size_t n; pmix_output_verbose(2, pmix_globals.debug_output, - "getnb_shortcut called with %s cbfunc", - (NULL == cb->value_cbfunc) ? "NULL" : "NON-NULL"); + "pmix: getnbfn value for proc %s:%d key %s", + cb->nspace, cb->rank, + (NULL == cb->key) ? "NULL" : cb->key); - PMIX_VALUE_CONSTRUCT(&val); - if (NULL != cb->value_cbfunc) { - m=1; - rc = pmix_bfrop.unpack(&cb->data, &val, &m, PMIX_VALUE); - cb->value_cbfunc(rc, &val, cb->cbdata); + /* if the nspace is empty, then the caller is referencing + * our own nspace */ + if (0 == strlen(cb->nspace)) { + nm = pmix_globals.myid.nspace; + } else { + nm = (char*)cb->nspace; } - PMIX_VALUE_DESTRUCT(&val); - PMIX_RELEASE(cb); + + /* find the nspace object */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(nm, ns->nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + /* we are asking for info about a new nspace - give us + * a chance to learn about it from the server. If the + * server has never heard of it, the server will return + * an error */ + nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(nptr->nspace, nm, PMIX_MAX_NSLEN); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + /* there is no point in looking for data in this nspace + * object, so let's just go generate the request */ + goto request; + } + + /* the requested data could be in the job-data table, so let's + * just check there first. */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) { + /* found it - we are in an event, so we can + * just execute the callback */ + cb->value_cbfunc(rc, val, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; + } + if (PMIX_RANK_WILDCARD == cb->rank) { + /* can't be anywhere else */ + cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + + /* it could still be in the job-data table, only stored under its own + * rank and not WILDCARD - e.g., this is true of data returned about + * ourselves during startup */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { + /* found it - we are in an event, so we can + * just execute the callback */ + cb->value_cbfunc(rc, val, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; + } + + /* not finding it is not an error - it could be in the + * modex hash table, so check it */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, cb->key, &val))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: value retrieved from dstore"); + /* found it - we are in an event, so we can + * just execute the callback */ + cb->value_cbfunc(rc, val, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; + } else if (PMIX_ERR_NOT_FOUND == rc) { + /* we have the modex data from this proc, but didn't find the key + * the user requested. At this time, there is no way for the + * key to eventually be found, so all we can do is return + * the error */ + pmix_output_verbose(2, pmix_globals.debug_output, + "Error requesting key=%s for rank = %d, namespace = %s", + cb->key, cb->rank, nm); + cb->value_cbfunc(rc, NULL, cb->cbdata); + /* protect the data */ + cb->procs = NULL; + cb->key = NULL; + cb->info = NULL; + PMIX_RELEASE(cb); + return; + } + + request: + /* if we got here, then we don't have the data for this proc. If we + * are a server, or we are a client and not connected, then there is + * nothing more we can do */ + if (pmix_globals.server || (!pmix_globals.server && !pmix_globals.connected)) { + cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + + /* we also have to check the user's directives to see if they do not want + * us to attempt to retrieve it from the server */ + for (n=0; n < cb->ninfo; n++) { + if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) && + cb->info[n].value.data.flag) { + /* they don't want us to try and retrieve it */ + pmix_output_verbose(2, pmix_globals.debug_output, + "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", + cb->key, cb->rank, nm); + cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + } + + /* see if we already have a request in place with the server for data from + * this nspace:rank. If we do, then no need to ask again as the + * request will return _all_ data from that proc */ + PMIX_LIST_FOREACH(cbret, &pmix_client_globals.pending_requests, pmix_cb_t) { + if (0 == strncmp(cbret->nspace, nm, PMIX_MAX_NSLEN) && + cbret->rank == cb->rank) { + /* we do have a pending request, but we still need to track this + * outstanding request so we can satisfy it once the data is returned */ + pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); + return; + } + } + + /* we don't have a pending request, so let's create one - don't worry + * about packing the key as we return everything from that proc */ + msg = pack_get(nm, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD); + if (NULL == msg) { + cb->value_cbfunc(PMIX_ERROR, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + + /* create a callback object as we need to pass it to the + * recv routine so we know which callback to use when + * the return message is recvd */ + pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); + + /* push the message into our event base to send to the server */ + PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, getnb_cbfunc, cb); } diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_ops.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_ops.h rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client_ops.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_pub.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_pub.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client_pub.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_spawn.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_spawn.c rename to opal/mca/pmix/pmix112/pmix/src/client/pmix_client_spawn.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/common/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/common/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/common/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/common/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c b/opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c similarity index 95% rename from opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c rename to opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c index 43a872d85b3..62b65119fc3 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c +++ b/opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c @@ -1,44 +1,44 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include "src/include/pmix_globals.h" - -void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, - pmix_notification_fn_t errhandler, - pmix_errhandler_reg_cbfunc_t cbfunc, - void *cbdata) -{ - /* common err handler registration to be added */ -} - -void PMIx_Deregister_errhandler(int errhandler_ref, - pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - /* common err handler deregistration goes here */ -} - -pmix_status_t PMIx_Notify_error(pmix_status_t status, - pmix_proc_t procs[], size_t nprocs, - pmix_proc_t error_procs[], size_t error_nprocs, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - /* common err notify goes here */ - return PMIX_SUCCESS; -} +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include "src/include/pmix_globals.h" + +void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) +{ + /* common err handler registration to be added */ +} + +void PMIx_Deregister_errhandler(int errhandler_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + /* common err handler deregistration goes here */ +} + +pmix_status_t PMIx_Notify_error(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_proc_t error_procs[], size_t error_nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + /* common err notify goes here */ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix1xx/pmix/src/include/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/include/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/include/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/include/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.c rename to opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h rename to opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h index 7e529737c85..2afc2db016e 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h @@ -30,7 +30,7 @@ #endif #include PMIX_EVENT_HEADER -#include +#include #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/sec/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/sec/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_munge.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_munge.c index 0336c67cbb5..70cea510f34 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c +++ b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_munge.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #include "src/util/argv.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.h b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_munge.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.h rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_munge.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_native.c similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_native.c index d353e827204..a116556d3dc 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c +++ b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_native.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #include "src/util/argv.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.h b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_native.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.h rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_native.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.c b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.c rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.h b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.h rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sec.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_sec.c index 578cc05c0f7..6afcd529fad 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c +++ b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sec.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #ifdef HAVE_STRING_H diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.h b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sec.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.h rename to opal/mca/pmix/pmix112/pmix/src/sec/pmix_sec.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/server/Makefile.am similarity index 83% rename from opal/mca/pmix/pmix1xx/pmix/src/server/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/server/Makefile.am index 5422b780dc6..88b0468e477 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/src/server/Makefile.am @@ -16,4 +16,5 @@ sources += \ src/server/pmix_server.c \ src/server/pmix_server_ops.c \ src/server/pmix_server_regex.c \ - src/server/pmix_server_listener.c + src/server/pmix_server_listener.c \ + src/server/pmix_server_get.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c similarity index 93% rename from opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c rename to opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c index d16ae16212a..d43cdaae376 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #ifdef HAVE_STRING_H @@ -44,6 +44,7 @@ #include #endif #include +#include #include PMIX_EVENT_HEADER #include "src/util/argv.h" @@ -140,13 +141,13 @@ static void _queue_message(int fd, short args, void *cbdata) { pmix_usock_queue_t *queue = (pmix_usock_queue_t*)cbdata; pmix_usock_send_t *snd; + pmix_output_verbose(2, pmix_globals.debug_output, - "[%s:%d] queue callback called: reply to %s:%d on tag %d," - "event_is_active=%d", + "[%s:%d] queue callback called: reply to %s:%d on tag %d", __FILE__, __LINE__, (queue->peer)->info->nptr->nspace, - (queue->peer)->info->rank, (queue->tag), - (queue->peer)->send_ev_active); + (queue->peer)->info->rank, (queue->tag)); + snd = PMIX_NEW(pmix_usock_send_t); snd->hdr.pindex = pmix_globals.pindex; snd->hdr.tag = (queue->tag); @@ -179,12 +180,10 @@ static void _queue_message(int fd, short args, void *cbdata) queue->buf = (b); \ queue->tag = (t); \ pmix_output_verbose(2, pmix_globals.debug_output, \ - "[%s:%d] queue reply to %s:%d on tag %d," \ - "event_is_active=%d", \ + "[%s:%d] queue reply to %s:%d on tag %d", \ __FILE__, __LINE__, \ (queue->peer)->info->nptr->nspace, \ - (queue->peer)->info->rank, (queue->tag), \ - (queue->peer)->send_ev_active); \ + (queue->peer)->info->rank, (queue->tag)); \ event_assign(&queue->ev, pmix_globals.evbase, -1, \ EV_WRITE, _queue_message, queue); \ event_priority_set(&queue->ev, 0); \ @@ -226,12 +225,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) pmix_globals.myid.rank = strtol(evar, NULL, 10); } + /* initialize the datatype support */ + pmix_bfrop_open(); + /* setup the server-specific globals */ PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); PMIX_CONSTRUCT(&pmix_server_globals.collectives, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.remote_pnd, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t); /* see if debug is requested */ if (NULL != (evar = getenv("PMIX_DEBUG"))) { @@ -247,9 +250,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) memset(&pmix_host_server, 0, sizeof(pmix_server_module_t)); pmix_host_server = *module; - /* initialize the datatype support */ - pmix_bfrop_open(); - /* init security */ pmix_sec_init(); security_mode = strdup(pmix_sec.name); @@ -271,17 +271,19 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid); asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path); - pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server constructed uri %s", myuri); return PMIX_SUCCESS; } -pmix_status_t PMIx_server_init(pmix_server_module_t *module) +pmix_status_t PMIx_server_init(pmix_server_module_t *module, + pmix_info_t info[], size_t ninfo) { pmix_usock_posted_recv_t *req; pmix_status_t rc; + size_t n; + pmix_kval_t kv; ++pmix_globals.init_cntr; if (1 < pmix_globals.init_cntr) { @@ -316,6 +318,35 @@ pmix_status_t PMIx_server_init(pmix_server_module_t *module) return PMIX_ERR_INIT; } + /* check the info keys for a directive about the uid/gid + * to be set for the rendezvous file, and any info we + * need to provide to every client */ + if (NULL != info) { + PMIX_CONSTRUCT(&kv, pmix_kval_t); + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key, PMIX_USERID)) { + /* the userid is in the uint32_t storage */ + chown(myaddress.sun_path, info[n].value.data.uint32, -1); + } else if (0 == strcmp(info[n].key, PMIX_GRPID)) { + /* the grpid is in the uint32_t storage */ + chown(myaddress.sun_path, -1, info[n].value.data.uint32); + } else { + /* store and pass along to every client */ + kv.key = info[n].key; + kv.value = &info[n].value; + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pmix_server_globals.gdata, &kv, 1, PMIX_KVAL))) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&kv); + return rc; + } + } + } + /* protect the incoming data */ + kv.key = NULL; + kv.value = NULL; + PMIX_DESTRUCT(&kv); + } + return PMIX_SUCCESS; } @@ -333,10 +364,14 @@ static void cleanup_server_state(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.collectives); PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); + PMIX_DESTRUCT(&pmix_server_globals.gdata); if (NULL != myuri) { free(myuri); } + if (NULL != security_mode) { + free(security_mode); + } pmix_bfrop_close(); pmix_sec_finalize(); @@ -522,6 +557,13 @@ static void _register_nspace(int sd, short args, void *cbdata) goto release; } PMIX_DESTRUCT(&buf2); +#if !PMIX_HAVE_HWLOC + } else if (0 == strcmp(cd->info[i].key, PMIX_LOCAL_TOPO) || + 0 == strcmp(cd->info[i].key, PMIX_NET_TOPO)) { + /* if we weren't built with hwloc support, then we + * don't know how to deal with these */ + continue; +#endif } else { /* just a value relating to the entire job */ kv.key = cd->info[i].key; @@ -575,6 +617,45 @@ pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs, return PMIX_SUCCESS; } +static void _deregister_nspace(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_nspace_t *tmp; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server _deregister_nspace %s", + cd->proc.nspace); + + /* see if we already have this nspace */ + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { + pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super); + PMIX_RELEASE(tmp); + break; + } + } + + PMIX_RELEASE(cd); +} + +void PMIx_server_deregister_nspace(const char nspace[]) +{ + pmix_setup_caddy_t *cd; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server deregister nspace %s", + nspace); + + cd = PMIX_NEW(pmix_setup_caddy_t); + (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); + + /* we have to push this into our event library to avoid + * potential threading issues */ + event_assign(&cd->ev, pmix_globals.evbase, -1, + EV_WRITE, _deregister_nspace, cd); + event_active(&cd->ev, EV_WRITE, 1); +} + static void _execute_collective(int sd, short args, void *cbdata) { pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata; @@ -619,12 +700,10 @@ static void _execute_collective(int sd, short args, void *cbdata) pmix_bfrop.pack(&rankbuf, &info->rank, 1, PMIX_INT); PMIX_CONSTRUCT(&xfer, pmix_buffer_t); PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); + PMIX_VALUE_RELEASE(val); pmix_buffer_t *pxfer = &xfer; pmix_bfrop.pack(&rankbuf, &pxfer, 1, PMIX_BUFFER); - xfer.base_ptr = NULL; - xfer.bytes_used = 0; PMIX_DESTRUCT(&xfer); - PMIX_VALUE_RELEASE(val); /* now pack this proc's contribution into the bucket */ pmix_buffer_t *pdatabuf = &rankbuf; pmix_bfrop.pack(&databuf, &pdatabuf, 1, PMIX_BUFFER); @@ -723,7 +802,7 @@ static void _register_client(int sd, short args, void *cbdata) * someone has been waiting for a request on a remote proc * in one of our nspaces, but we didn't know all the local procs * and so couldn't determine the proc was remote */ - pmix_pending_nspace_fix(nptr); + pmix_pending_nspace_requests(nptr); } /* let the caller know we are done */ if (NULL != cd->opcbfunc) { @@ -759,6 +838,60 @@ pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, return PMIX_SUCCESS; } +static void _deregister_client(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_rank_info_t *info; + pmix_nspace_t *nptr, *tmp; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server _deregister_client for nspace %s rank %d", + cd->proc.nspace, cd->proc.rank); + + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { + nptr = tmp; + break; + } + } + if (NULL == nptr) { + /* nothing to do */ + goto cleanup; + } + /* find an remove this client */ + PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { + if (info->rank == cd->proc.rank) { + pmix_list_remove_item(&nptr->server->ranks, &info->super); + PMIX_RELEASE(info); + break; + } + } + + cleanup: + PMIX_RELEASE(cd); +} + +void PMIx_server_deregister_client(const pmix_proc_t *proc) +{ + pmix_setup_caddy_t *cd; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server deregister client %s:%d", + proc->nspace, proc->rank); + + cd = PMIX_NEW(pmix_setup_caddy_t); + (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + cd->proc.rank = proc->rank; + + /* we have to push this into our event library to avoid + * potential threading issues */ + event_assign(&cd->ev, pmix_globals.evbase, -1, + EV_WRITE, _deregister_client, cd); + event_active(&cd->ev, EV_WRITE, 1); +} + /* setup the envars for a child process */ pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char ***env) { @@ -1944,6 +2077,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_REQ_CMD == cmd) { reply = PMIX_NEW(pmix_buffer_t); pmix_bfrop.copy_payload(reply, &(peer->info->nptr->server->job_info)); + pmix_bfrop.copy_payload(reply, &(pmix_server_globals.gdata)); PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c new file mode 100644 index 00000000000..9d482697239 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c @@ -0,0 +1,551 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Artem Y. Polyakov . + * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include + +#include +#include "src/include/pmix_globals.h" + +#ifdef HAVE_STRING_H +#include +#endif +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif +#ifdef HAVE_SYS_UIO_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include PMIX_EVENT_HEADER + +#include "src/class/pmix_list.h" +#include "src/buffer_ops/buffer_ops.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/pmix_environ.h" +#include "src/util/progress_threads.h" +#include "src/usock/usock.h" +#include "src/sec/pmix_sec.h" + +#include "pmix_server_ops.h" + +extern pmix_server_module_t pmix_host_server; + +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_status_t status; + const char *data; + size_t ndata; + pmix_dmdx_local_t *lcd; + pmix_release_cbfunc_t relcbfunc; + void *cbdata; +} pmix_dmdx_reply_caddy_t; +static void dcd_con(pmix_dmdx_reply_caddy_t *p) +{ + p->status = PMIX_ERROR; + p->ndata = 0; + p->lcd = NULL; + p->relcbfunc = NULL; + p->cbdata = NULL; +} +PMIX_CLASS_INSTANCE(pmix_dmdx_reply_caddy_t, + pmix_object_t, dcd_con, NULL); + + +static void dmdx_cbfunc(pmix_status_t status, const char *data, + size_t ndata, void *cbdata, + pmix_release_cbfunc_t relfn, void *relcbdata); +static pmix_status_t _satisfy_request(pmix_hash_table_t *ht, int rank, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t create_local_tracker(char nspace[], int rank, + pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, + void *cbdata, + pmix_dmdx_local_t **lcd); + + +/* declare a function whose sole purpose is to + * free data that we provided to our host server + * when servicing dmodex requests */ +static void relfn(void *cbdata) +{ + char *data = (char*)cbdata; + free(data); +} + + +pmix_status_t pmix_server_get(pmix_buffer_t *buf, + pmix_modex_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + int rank; + char *cptr; + char nspace[PMIX_MAX_NSLEN+1]; + pmix_nspace_t *ns, *nptr; + pmix_info_t *info=NULL; + size_t ninfo=0; + pmix_dmdx_local_t *lcd; + pmix_rank_info_t *iptr; + pmix_hash_table_t *ht; + bool local; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd GET"); + + /* setup */ + memset(nspace, 0, sizeof(nspace)); + + /* retrieve the nspace and rank of the requested proc */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cptr, &cnt, PMIX_STRING))) { + PMIX_ERROR_LOG(rc); + return rc; + } + (void)strncpy(nspace, cptr, PMIX_MAX_NSLEN); + free(cptr); + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &rank, &cnt, PMIX_INT))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* retrieve any provided info structs */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + if (0 < ninfo) { + PMIX_INFO_CREATE(info, ninfo); + cnt = ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + PMIX_INFO_FREE(info, ninfo); + return rc; + } + } + + /* find the nspace object for this client */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(nspace, ns->nspace)) { + nptr = ns; + break; + } + } + + pmix_output_verbose(2, pmix_globals.debug_output, + "%s:%d EXECUTE GET FOR %s:%d", + pmix_globals.myid.nspace, + pmix_globals.myid.rank, nspace, rank); + + if (NULL == nptr || NULL == nptr->server) { + /* this is for an nspace we don't know about yet, so + * record the request for data from this process and + * give the host server a chance to tell us about it */ + rc = create_local_tracker(nspace, rank, info, ninfo, + cbfunc, cbdata, &lcd); + return rc; + } + + /* We have to wait for all local clients to be registered before + * we can know whether this request is for data from a local or a + * remote client because one client might ask for data about another + * client that the host RM hasn't told us about yet. Fortunately, + * we do know how many clients to expect, so first check to see if + * all clients have been registered with us */ + if (!nptr->server->all_registered) { + /* we cannot do anything further, so just track this request + * for now */ + rc = create_local_tracker(nspace, rank, info, ninfo, + cbfunc, cbdata, &lcd); + return rc; + } + + /* Since we know about all the local clients in this nspace, + * let's first try to satisfy the request with any available data. + * By default, we assume we are looking for data from a remote + * client, and then check to see if this is one of my local + * clients - if so, then we look in that hash table */ + ht = &nptr->server->remote; + local = false; + PMIX_LIST_FOREACH(iptr, &nptr->server->ranks, pmix_rank_info_t) { + if (iptr->rank == rank) { + /* it is known local client - check the local table */ + ht = &nptr->server->mylocal; + local = true; + break; + } + } + + /* see if we already have this data */ + rc = _satisfy_request(ht, rank, cbfunc, cbdata); + if( PMIX_SUCCESS == rc ){ + /* request was successfully satisfied */ + PMIX_INFO_FREE(info, ninfo); + return rc; + } + + /* If we get here, then we don't have the data at this time. Check + * to see if we already have a pending request for the data - if + * we do, then we can just wait for it to arrive */ + rc = create_local_tracker(nspace, rank, info, ninfo, + cbfunc, cbdata, &lcd); + if (PMIX_SUCCESS == rc) { + /* we are already waiting for the data - nothing more + * for us to do as the function added the new request + * to the tracker for us */ + return PMIX_SUCCESS; + } + if (PMIX_ERR_NOT_FOUND != rc || NULL == lcd) { + /* we have a problem - e.g., out of memory */ + return rc; + } + + /* Getting here means that we didn't already have a request for + * for data pending, and so we created a new tracker for this + * request. We know the identity of all our local clients, so + * if this is one, then we have nothing further to do - we will + * fulfill the request once the process commits its data */ + if (local) { + return PMIX_SUCCESS; + } + + /* this isn't a local client of ours, so we need to ask the host + * resource manager server to please get the info for us from + * whomever is hosting the target process */ + if (NULL != pmix_host_server.direct_modex) { + rc = pmix_host_server.direct_modex(&lcd->proc, info, ninfo, dmdx_cbfunc, lcd); + } else { + /* if we don't have direct modex feature, just respond with "not found" */ + cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); + PMIX_INFO_FREE(info, ninfo); + pmix_list_remove_item(&pmix_server_globals.local_reqs, &lcd->super); + PMIX_LIST_DESTRUCT(&lcd->loc_reqs); + PMIX_RELEASE(lcd); + rc = PMIX_ERR_NOT_FOUND; + } + + return rc; +} + +static pmix_status_t create_local_tracker(char nspace[], int rank, + pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, + void *cbdata, + pmix_dmdx_local_t **ld) +{ + pmix_dmdx_local_t *lcd, *cd; + pmix_dmdx_request_t *req; + pmix_status_t rc; + + /* define default */ + *ld = NULL; + + /* see if we already have an existing request for data + * from this namespace/rank */ + lcd = NULL; + PMIX_LIST_FOREACH(cd, &pmix_server_globals.local_reqs, pmix_dmdx_local_t) { + if (0 != strncmp(nspace, cd->proc.nspace, PMIX_MAX_NSLEN) || + rank != cd->proc.rank ) { + continue; + } + lcd = cd; + break; + } + if (NULL != lcd) { + /* we already have a request, so just track that someone + * else wants data from the same target */ + rc = PMIX_SUCCESS; // indicates we found an existing request + goto complete; + } + /* we do not have an existing request, so let's create + * one and add it to our list */ + lcd = PMIX_NEW(pmix_dmdx_local_t); + if (NULL == lcd){ + PMIX_INFO_FREE(info, ninfo); + return PMIX_ERR_NOMEM; + } + strncpy(lcd->proc.nspace, nspace, PMIX_MAX_NSLEN); + lcd->proc.rank = rank; + lcd->info = info; + lcd->ninfo = ninfo; + pmix_list_append(&pmix_server_globals.local_reqs, &lcd->super); + rc = PMIX_ERR_NOT_FOUND; // indicates that we created a new request tracker + + complete: + /* track this specific requestor so we return the + * data to them */ + req = PMIX_NEW(pmix_dmdx_request_t); + req->cbfunc = cbfunc; + req->cbdata = cbdata; + pmix_list_append(&lcd->loc_reqs, &req->super); + *ld = lcd; + return rc; +} + +void pmix_pending_nspace_requests(pmix_nspace_t *nptr) +{ + pmix_dmdx_local_t *cd, *cd_next; + + /* Now that we know all local ranks, go along request list and ask for remote data + * for the non-local ranks, and resolve all pending requests for local procs + * that were waiting for registration to complete + */ + PMIX_LIST_FOREACH_SAFE(cd, cd_next, &pmix_server_globals.local_reqs, pmix_dmdx_local_t) { + pmix_rank_info_t *info; + bool found = false; + + if (0 != strncmp(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN) ) { + continue; + } + + PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { + if (info->rank == cd->proc.rank) { + found = true; // we will satisy this request upon commit from new proc + break; + } + } + + /* if not found - this is remote process and we need to send + * corresponding direct modex request */ + if( !found ){ + if( NULL != pmix_host_server.direct_modex ){ + pmix_host_server.direct_modex(&cd->proc, cd->info, cd->ninfo, dmdx_cbfunc, cd); + } else { + pmix_dmdx_request_t *req, *req_next; + PMIX_LIST_FOREACH_SAFE(req, req_next, &cd->loc_reqs, pmix_dmdx_request_t) { + req->cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, req->cbdata, NULL, NULL); + pmix_list_remove_item(&cd->loc_reqs, &req->super); + PMIX_RELEASE(req); + } + pmix_list_remove_item(&pmix_server_globals.local_reqs, &cd->super); + PMIX_RELEASE(cd); + } + } + } +} + +static pmix_status_t _satisfy_request(pmix_hash_table_t *ht, int rank, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t rc; + pmix_value_t *val; + char *data; + size_t sz; + pmix_buffer_t xfer, pbkt, *xptr; + + /* check to see if this data already has been + * obtained as a result of a prior direct modex request from + * a remote peer, or due to data from a local client + * having been committed */ + rc = pmix_hash_fetch(ht, rank, "modex", &val); + if (PMIX_SUCCESS == rc && NULL != val) { + /* the client is expecting this to arrive as a byte object + * containing a buffer, so package it accordingly */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + xptr = &xfer; + PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); + pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER); + xfer.base_ptr = NULL; // protect the passed data + xfer.bytes_used = 0; + PMIX_DESTRUCT(&xfer); + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); + PMIX_VALUE_RELEASE(val); + /* pass it back */ + cbfunc(rc, data, sz, cbdata, relfn, data); + return rc; + } + return PMIX_ERR_NOT_FOUND; +} + +/* Resolve pending requests to this namespace/rank */ +pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, int rank, + pmix_status_t status, pmix_dmdx_local_t *lcd) +{ + pmix_dmdx_local_t *cd; + + /* find corresponding request (if exists) */ + if (NULL == lcd && NULL != nptr) { + PMIX_LIST_FOREACH(cd, &pmix_server_globals.local_reqs, pmix_dmdx_local_t) { + if (0 != strncmp(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN) || + rank != cd->proc.rank) { + continue; + } + lcd = cd; + break; + } + } + + /* If somebody was interested in this rank */ + if (NULL != lcd) { + pmix_dmdx_request_t *req; + + if (PMIX_SUCCESS != status){ + /* if we've got an error for this request - just forward it*/ + PMIX_LIST_FOREACH(req, &lcd->loc_reqs, pmix_dmdx_request_t) { + /* if we can't satisfy this request - respond with error */ + req->cbfunc(status, NULL, 0, req->cbdata, NULL, NULL); + } + } else if (NULL != nptr) { + /* if we've got the blob - try to satisfy requests */ + pmix_hash_table_t *ht; + pmix_rank_info_t *iptr; + + /* by default we are looking for the remote data */ + ht = &nptr->server->remote; + /* check if this rank is local */ + PMIX_LIST_FOREACH(iptr, &nptr->server->ranks, pmix_rank_info_t) { + if (iptr->rank == rank) { + ht = &nptr->server->mylocal; + break; + } + } + + /* run through all the requests to this rank */ + PMIX_LIST_FOREACH(req, &lcd->loc_reqs, pmix_dmdx_request_t) { + pmix_status_t rc; + rc = _satisfy_request(ht, rank, req->cbfunc, req->cbdata); + if( PMIX_SUCCESS != rc ){ + /* if we can't satisfy this particular request (missing key?) */ + req->cbfunc(rc, NULL, 0, req->cbdata, NULL, NULL); + } + } + } + /* remove all requests to this rank and cleanup the corresponding structure */ + pmix_list_remove_item(&pmix_server_globals.local_reqs, (pmix_list_item_t*)lcd); + PMIX_RELEASE(lcd); + } + return PMIX_SUCCESS; +} + +/* process the returned data from the host RM server */ +static void _process_dmdx_reply(int fd, short args, void *cbdata) +{ + pmix_dmdx_reply_caddy_t *caddy = (pmix_dmdx_reply_caddy_t *)cbdata; + pmix_kval_t *kp; + pmix_nspace_t *ns, *nptr; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "[%s:%d] process dmdx reply from %s:%d", + __FILE__, __LINE__, + caddy->lcd->proc.nspace, caddy->lcd->proc.rank); + + /* find the nspace object for this client */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(caddy->lcd->proc.nspace, ns->nspace)) { + nptr = ns; + break; + } + } + + if (NULL == nptr) { + /* should be impossible */ + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + caddy->status = PMIX_ERR_NOT_FOUND; + goto cleanup; + } + + /* if the request was successfully satisfied, then store the data + * in our hash table for remote procs. Although we could immediately + * resolve any outstanding requests on our tracking list, we instead + * store the data first so we can immediately satisfy any future + * requests. Then, rather than duplicate the resolve code here, we + * will let the pmix_pending_resolve function go ahead and retrieve + * it from the hash table */ + if (PMIX_SUCCESS == caddy->status) { + kp = PMIX_NEW(pmix_kval_t); + kp->key = strdup("modex"); + PMIX_VALUE_CREATE(kp->value, 1); + kp->value->type = PMIX_BYTE_OBJECT; + /* we don't know if the host is going to save this data + * or not, so we have to copy it - the client is expecting + * this to arrive as a byte object containing a buffer, so + * package it accordingly */ + kp->value->data.bo.bytes = malloc(caddy->ndata); + memcpy(kp->value->data.bo.bytes, caddy->data, caddy->ndata); + kp->value->data.bo.size = caddy->ndata; + /* store it in the appropriate hash */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->server->remote, caddy->lcd->proc.rank, kp))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kp); // maintain acctg + } + + /* always execute the callback to avoid having the client hang */ + pmix_pending_resolve(nptr, caddy->lcd->proc.rank, caddy->status, caddy->lcd); + +cleanup: + /* now call the release function so the host server + * knows it can release the data */ + if (NULL != caddy->relcbfunc) { + caddy->relcbfunc(caddy->cbdata); + } + PMIX_RELEASE(caddy); +} + +/* this is the callback function that the host RM server will call + * when it gets requested info back from a remote server */ +static void dmdx_cbfunc(pmix_status_t status, + const char *data, size_t ndata, void *cbdata, + pmix_release_cbfunc_t release_fn, void *release_cbdata) +{ + pmix_dmdx_reply_caddy_t *caddy; + + /* because the host RM is calling us from their own thread, we + * need to thread-shift into our local progress thread before + * accessing any global info */ + caddy = PMIX_NEW(pmix_dmdx_reply_caddy_t); + caddy->status = status; + /* point to the callers cbfunc */ + caddy->relcbfunc = release_fn; + caddy->cbdata = release_cbdata; + + /* point to the returned data and our own internal + * tracker */ + caddy->data = data; + caddy->ndata = ndata; + caddy->lcd = (pmix_dmdx_local_t *)cbdata; + pmix_output_verbose(2, pmix_globals.debug_output, + "[%s:%d] queue dmdx reply for %s:%d", + __FILE__, __LINE__, + caddy->lcd->proc.nspace, caddy->lcd->proc.rank); + event_assign(&caddy->ev, pmix_globals.evbase, -1, EV_WRITE, + _process_dmdx_reply, caddy); + event_priority_set(&caddy->ev, 0); + event_active(&caddy->ev, EV_WRITE, 1); +} + diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_listener.c similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c rename to opal/mca/pmix/pmix112/pmix/src/server/pmix_server_listener.c index 30cd98a5e7d..4fe76cfb9cf 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_listener.c @@ -41,6 +41,7 @@ #include #endif #include +#include #include PMIX_EVENT_HEADER #include @@ -86,6 +87,11 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address) printf("%s:%d bind() failed", __FILE__, __LINE__); return PMIX_ERROR; } + /* set the mode as required */ + if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) { + pmix_output(0, "CANNOT CHMOD %s", address->sun_path); + return PMIX_ERROR; + } /* setup listen backlog to maximum allowed by kernel */ if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) { @@ -139,10 +145,11 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address) return PMIX_ERR_OUT_OF_RESOURCE; } /* fork off the listener thread */ + pmix_server_globals.listen_thread_active = true; if (0 > pthread_create(&engine, NULL, listen_thread, NULL)) { + pmix_server_globals.listen_thread_active = false; return PMIX_ERROR; } - pmix_server_globals.listen_thread_active = true; } return PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c new file mode 100644 index 00000000000..85a9e94a8f0 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c @@ -0,0 +1,1091 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Artem Y. Polyakov . + * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include + +#include +#include "src/include/pmix_globals.h" + +#ifdef HAVE_STRING_H +#include +#endif +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif +#ifdef HAVE_SYS_UIO_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include PMIX_EVENT_HEADER + +#include "src/class/pmix_list.h" +#include "src/buffer_ops/buffer_ops.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/pmix_environ.h" +#include "src/util/progress_threads.h" +#include "src/usock/usock.h" +#include "src/sec/pmix_sec.h" + +#include "pmix_server_ops.h" + +pmix_server_module_t pmix_host_server = {0}; + +pmix_status_t pmix_server_abort(pmix_peer_t *peer, pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + int status; + char *msg; + size_t nprocs; + pmix_proc_t *procs = NULL; + pmix_proc_t proc; + + pmix_output_verbose(2, pmix_globals.debug_output, "recvd ABORT"); + + /* unpack the status */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &status, &cnt, PMIX_INT))) { + return rc; + } + /* unpack the message */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &msg, &cnt, PMIX_STRING))) { + return rc; + } + /* unpack the number of procs */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { + return rc; + } + + /* unpack any provided procs - these are the procs the caller + * wants aborted */ + if (0 < nprocs) { + PMIX_PROC_CREATE(procs, nprocs); + cnt = nprocs; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { + return rc; + } + } + + /* let the local host's server execute it */ + if (NULL != pmix_host_server.abort) { + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + rc = pmix_host_server.abort(&proc, peer->info->server_object, status, msg, + procs, nprocs, cbfunc, cbdata); + } else { + rc = PMIX_ERR_NOT_SUPPORTED; + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(rc, cbdata); + } + } + PMIX_PROC_FREE(procs, nprocs); + + /* the client passed this msg to us so we could give + * it to the host server - we are done with it now */ + if (NULL != msg) { + free(msg); + } + + return rc; +} + +pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) +{ + int32_t cnt; + pmix_status_t rc; + pmix_buffer_t *b2; + pmix_kval_t *kp; + pmix_scope_t scope; + pmix_hash_table_t *ht; + pmix_nspace_t *nptr; + pmix_rank_info_t *info; + pmix_dmdx_remote_t *dcd, *dcdnext; + pmix_buffer_t pbkt; + pmix_value_t *val; + char *data; + size_t sz; + + /* shorthand */ + info = peer->info; + nptr = info->nptr; + + /* this buffer will contain one or more buffers, each + * representing a different scope. These need to be locally + * stored separately so we can provide required data based + * on the requestor's location */ + cnt = 1; + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &scope, &cnt, PMIX_SCOPE))) { + if (PMIX_LOCAL == scope) { + ht = &nptr->server->mylocal; + } else if (PMIX_REMOTE == scope) { + ht = &nptr->server->myremote; + } else { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_BAD_PARAM; + return rc; + } + /* unpack and store the blob */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &b2, &cnt, PMIX_BUFFER))) { + PMIX_ERROR_LOG(rc); + return rc; + } + kp = PMIX_NEW(pmix_kval_t); + kp->key = strdup("modex"); + PMIX_VALUE_CREATE(kp->value, 1); + kp->value->type = PMIX_BYTE_OBJECT; + PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); + PMIX_RELEASE(b2); + /* store it in the appropriate hash */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kp); // maintain acctg + cnt = 1; + } + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + rc = PMIX_SUCCESS; + /* mark us as having successfully received a blob from this proc */ + info->modex_recvd = true; + + /* see if anyone remote is waiting on this data - could be more than one */ + PMIX_LIST_FOREACH_SAFE(dcd, dcdnext, &pmix_server_globals.remote_pnd, pmix_dmdx_remote_t) { + if (0 != strncmp(dcd->cd->proc.nspace, nptr->nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (dcd->cd->proc.rank == info->rank) { + /* we can now fulfill this request - collect the + * remote/global data from this proc */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + /* get any remote contribution - note that there + * may not be a contribution */ + if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) && + NULL != val) { + PMIX_LOAD_BUFFER(&pbkt, val->data.bo.bytes, val->data.bo.size); + PMIX_VALUE_RELEASE(val); + } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); + /* execute the callback */ + dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata); + if (NULL != data) { + free(data); + } + /* we have finished this request */ + pmix_list_remove_item(&pmix_server_globals.remote_pnd, &dcd->super); + PMIX_RELEASE(dcd); + } + } + /* see if anyone local is waiting on this data - could be more than one */ + return pmix_pending_resolve(nptr, info->rank, PMIX_SUCCESS, NULL); +} + +/* get an existing object for tracking LOCAL participation in a collective + * operation such as "fence". The only way this function can be + * called is if at least one local client process is participating + * in the operation. Thus, we know that at least one process is + * involved AND has called the collective operation. + * + * NOTE: the host server *cannot* call us with a collective operation + * as there is no mechanism by which it can do so. We call the host + * server only after all participating local procs have called us. + * So it is impossible for us to be called with a collective without + * us already knowing about all local participants. + * + * procs - the array of procs participating in the collective, + * regardless of location + * nprocs - the number of procs in the array + */ +static pmix_server_trkr_t* get_tracker(pmix_proc_t *procs, + size_t nprocs, pmix_cmd_t type) +{ + pmix_server_trkr_t *trk; + size_t i, j; + size_t matches; + + pmix_output_verbose(5, pmix_globals.debug_output, + "get_tracker called with %d procs", (int)nprocs); + + /* bozo check - should never happen outside of programmer error */ + if (NULL == procs) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return NULL; + } + + /* there is no shortcut way to search the trackers - all + * we can do is perform a brute-force search. Fortunately, + * it is highly unlikely that there will be more than one + * or two active at a time, and they are most likely to + * involve only a single proc with WILDCARD rank - so this + * shouldn't take long */ + PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) { + /* Collective operation if unique identified by + * the set of participating processes and the type of collective + */ + if (nprocs != trk->npcs) { + continue; + } + if (type != trk->type) { + continue; + } + matches = 0; + for (i=0; i < nprocs; i++) { + /* the procs may be in different order, so we have + * to do an exhaustive search */ + for (j=0; j < trk->npcs; j++) { + if (0 == strcmp(procs[i].nspace, trk->pcs[j].nspace) && + procs[i].rank == trk->pcs[j].rank) { + ++matches; + break; + } + } + } + if (trk->npcs == matches) { + return trk; + } + } + /* No tracker was found */ + return NULL; +} + +/* create a new object for tracking LOCAL participation in a collective + * operation such as "fence". The only way this function can be + * called is if at least one local client process is participating + * in the operation. Thus, we know that at least one process is + * involved AND has called the collective operation. + * + * NOTE: the host server *cannot* call us with a collective operation + * as there is no mechanism by which it can do so. We call the host + * server only after all participating local procs have called us. + * So it is impossible for us to be called with a collective without + * us already knowing about all local participants. + * + * procs - the array of procs participating in the collective, + * regardless of location + * nprocs - the number of procs in the array + */ +static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, + size_t nprocs, pmix_cmd_t type) +{ + pmix_server_trkr_t *trk; + pmix_rank_info_t *iptr, *info; + size_t i; + bool all_def; + pmix_nspace_t *nptr, *ns; + + pmix_output_verbose(5, pmix_globals.debug_output, + "new_tracker called with %d procs", (int)nprocs); + + /* bozo check - should never happen outside of programmer error */ + if (NULL == procs) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return NULL; + } + + assert( NULL == get_tracker(procs, nprocs, type) ); + + pmix_output_verbose(5, pmix_globals.debug_output, + "adding new tracker with %d procs", (int)nprocs); + + /* get here if this tracker is new - create it */ + trk = PMIX_NEW(pmix_server_trkr_t); + + /* copy the procs */ + PMIX_PROC_CREATE(trk->pcs, nprocs); + trk->npcs = nprocs; + trk->type = type; + + all_def = true; + for (i=0; i < nprocs; i++) { + (void)strncpy(trk->pcs[i].nspace, procs[i].nspace, PMIX_MAX_NSLEN); + trk->pcs[i].rank = procs[i].rank; + /* is this nspace known to us? */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(procs[i].nspace, ns->nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + /* cannot be a local proc */ + pmix_output_verbose(5, pmix_globals.debug_output, + "new_tracker: unknown nspace %s", + procs[i].nspace); + continue; + } + /* have all the clients for this nspace been defined? */ + if (!nptr->server->all_registered) { + /* nope, so no point in going further on this one - we'll + * process it once all the procs are known */ + all_def = false; + pmix_output_verbose(5, pmix_globals.debug_output, + "new_tracker: all clients not registered nspace %s", + procs[i].nspace); + continue; + } + /* is this one of my local ranks? */ + PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { + if (procs[i].rank == info->rank || + PMIX_RANK_WILDCARD == procs[i].rank) { + pmix_output_verbose(5, pmix_globals.debug_output, + "adding local proc %s.%d to tracker", + info->nptr->nspace, info->rank); + /* add a tracker for this proc - don't need more than + * the nspace pointer and rank */ + iptr = PMIX_NEW(pmix_rank_info_t); + PMIX_RETAIN(info->nptr); + iptr->nptr = info->nptr; + iptr->rank = info->rank; + pmix_list_append(&trk->ranks, &iptr->super); + /* track the count */ + ++trk->nlocal; + if (PMIX_RANK_WILDCARD != procs[i].rank) { + break; + } + } + } + } + if (all_def) { + trk->def_complete = true; + } + pmix_list_append(&pmix_server_globals.collectives, &trk->super); + return trk; +} + +pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, + pmix_buffer_t *buf, + pmix_modex_cbfunc_t modexcbfunc, + pmix_op_cbfunc_t opcbfunc) +{ + int32_t cnt; + pmix_status_t rc; + size_t nprocs; + pmix_proc_t *procs=NULL; + bool collect_data = false; + pmix_server_trkr_t *trk; + char *data = NULL; + size_t sz = 0; + pmix_buffer_t bucket, xfer; + pmix_rank_info_t *rkinfo; + pmix_value_t *val; + pmix_info_t *info = NULL; + size_t ninfo=0, n; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd FENCE"); + + if (NULL == pmix_host_server.fence_nb) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); + return PMIX_ERR_NOT_SUPPORTED; + } + + /* unpack the number of procs */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { + return rc; + } + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd fence with %d procs", (int)nprocs); + /* there must be at least one as the client has to at least provide + * their own namespace */ + if (nprocs < 1) { + return PMIX_ERR_BAD_PARAM; + } + + /* create space for the procs */ + PMIX_PROC_CREATE(procs, nprocs); + /* unpack the procs */ + cnt = nprocs; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { + goto cleanup; + } + + /* unpack the number of provided info structs */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + return rc; + } + if (0 < ninfo) { + PMIX_INFO_CREATE(info, ninfo); + /* unpack the info */ + cnt = ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + goto cleanup; + } + /* see if we are to collect data - we don't internally care + * about any other directives */ + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key, PMIX_COLLECT_DATA)) { + collect_data = true; + break; + } + } + } + + /* find/create the local tracker for this operation */ + if (NULL == (trk = get_tracker(procs, nprocs, PMIX_FENCENB_CMD))) { + /* If no tracker was found - create and initialize it once */ + if (NULL == (trk = new_tracker(procs, nprocs, PMIX_FENCENB_CMD))) { + /* only if a bozo error occurs */ + PMIX_ERROR_LOG(PMIX_ERROR); + /* DO NOT HANG */ + if (NULL != opcbfunc) { + opcbfunc(PMIX_ERROR, cd); + } + rc = PMIX_ERROR; + goto cleanup; + } + trk->type = PMIX_FENCENB_CMD; + trk->modexcbfunc = modexcbfunc; + /* mark if they want the data back */ + if (collect_data) { + trk->collect_type = PMIX_COLLECT_YES; + } else { + trk->collect_type = PMIX_COLLECT_NO; + } + } else { + switch (trk->collect_type) { + case PMIX_COLLECT_NO: + if (collect_data) { + trk->collect_type = PMIX_COLLECT_INVALID; + } + break; + case PMIX_COLLECT_YES: + if (!collect_data) { + trk->collect_type = PMIX_COLLECT_INVALID; + } + break; + default: + break; + } + } + /* we only save the info structs from the first caller + * who provides them - it is a user error to provide + * different values from different participants */ + if (NULL == trk->info) { + trk->info = info; + trk->ninfo = ninfo; + } else { + /* cleanup */ + PMIX_INFO_FREE(info, ninfo); + info = NULL; + } + + /* add this contributor to the tracker so they get + * notified when we are done */ + PMIX_RETAIN(cd); + pmix_list_append(&trk->local_cbs, &cd->super); + /* if all local contributions have been received, + * let the local host's server know that we are at the + * "fence" point - they will callback once the barrier + * across all participants has been completed */ + if (trk->def_complete && + pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { + pmix_output_verbose(2, pmix_globals.debug_output, + "fence complete"); + /* if the user asked us to collect data, then we have + * to provide any locally collected data to the host + * server so they can circulate it - only take data + * from the specified procs as not everyone is necessarily + * participating! And only take data intended for remote + * distribution */ + + PMIX_CONSTRUCT(&bucket, pmix_buffer_t); + + assert( PMIX_COLLECT_MAX < UCHAR_MAX ); + unsigned char tmp = (unsigned char)trk->collect_type; + pmix_bfrop.pack(&bucket, &tmp, 1, PMIX_BYTE); + + if (PMIX_COLLECT_YES == trk->collect_type) { + pmix_buffer_t databuf; + PMIX_CONSTRUCT(&databuf, pmix_buffer_t); + pmix_output_verbose(2, pmix_globals.debug_output, + "fence - assembling data"); + PMIX_LIST_FOREACH(rkinfo, &trk->ranks, pmix_rank_info_t) { + pmix_buffer_t rankbuf; + PMIX_CONSTRUCT(&rankbuf, pmix_buffer_t); + /* get any remote contribution - note that there + * may not be a contribution */ + if (PMIX_SUCCESS == pmix_hash_fetch(&rkinfo->nptr->server->myremote, rkinfo->rank, "modex", &val) && + NULL != val) { + /* pack the proc so we know the source */ + char *foobar = rkinfo->nptr->nspace; + pmix_bfrop.pack(&rankbuf, &foobar, 1, PMIX_STRING); + pmix_bfrop.pack(&rankbuf, &rkinfo->rank, 1, PMIX_INT); + PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); + PMIX_VALUE_RELEASE(val); + pmix_buffer_t *pxfer = &xfer; + pmix_bfrop.pack(&rankbuf, &pxfer, 1, PMIX_BUFFER); + PMIX_DESTRUCT(&xfer); + /* now pack this proc's contribution into the bucket */ + pmix_buffer_t *pdatabuf = &rankbuf; + pmix_bfrop.pack(&databuf, &pdatabuf, 1, PMIX_BUFFER); + } + PMIX_DESTRUCT(&rankbuf); + } + // TODO: we have multiple data movings while only one is actually need + pmix_buffer_t *pbkt = &databuf; + pmix_bfrop.pack(&bucket, &pbkt, 1, PMIX_BUFFER); + PMIX_DESTRUCT(&databuf); + } + + PMIX_UNLOAD_BUFFER(&bucket, data, sz); + PMIX_DESTRUCT(&bucket); + pmix_host_server.fence_nb(trk->pcs, trk->npcs, + trk->info, trk->ninfo, + data, sz, trk->modexcbfunc, trk); + } + + cleanup: + PMIX_PROC_FREE(procs, nprocs); + return rc; +} + +pmix_status_t pmix_server_publish(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t rc; + int32_t cnt; + size_t ninfo, einfo; + pmix_info_t *info = NULL; + pmix_proc_t proc; + uint32_t uid; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd PUBLISH"); + + if (NULL == pmix_host_server.publish) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* unpack the effective user id */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &uid, &cnt, PMIX_UINT32))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the number of info objects */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* we will be adding one for the user id */ + einfo = ninfo + 1; + PMIX_INFO_CREATE(info, einfo); + /* unpack the array of info objects */ + if (0 < ninfo) { + cnt=ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } + (void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + info[einfo-1].value.type = PMIX_UINT32; + info[einfo-1].value.data.uint32 = uid; + + /* call the local server */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + rc = pmix_host_server.publish(&proc, info, einfo, cbfunc, cbdata); + + cleanup: + PMIX_INFO_FREE(info, einfo); + return rc; +} + +pmix_status_t pmix_server_lookup(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + size_t nkeys, i; + char **keys=NULL, *sptr; + pmix_info_t *info = NULL; + size_t ninfo, einfo; + pmix_proc_t proc; + uint32_t uid; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd LOOKUP"); + + if (NULL == pmix_host_server.lookup) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* unpack the effective user id */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &uid, &cnt, PMIX_UINT32))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the number of keys */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the array of keys */ + for (i=0; i < nkeys; i++) { + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + pmix_argv_append_nosize(&keys, sptr); + free(sptr); + } + /* unpack the number of info objects */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* we will be adding one for the user id */ + einfo = ninfo + 1; + PMIX_INFO_CREATE(info, einfo); + /* unpack the array of info objects */ + if (0 < ninfo) { + cnt=ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } + (void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + info[einfo-1].value.type = PMIX_UINT32; + info[einfo-1].value.data.uint32 = uid; + + /* call the local server */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + rc = pmix_host_server.lookup(&proc, keys, info, einfo, cbfunc, cbdata); + + cleanup: + PMIX_INFO_FREE(info, einfo); + pmix_argv_free(keys); + return rc; +} + +pmix_status_t pmix_server_unpublish(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + size_t i, nkeys, ninfo, einfo; + char **keys=NULL, *sptr; + pmix_proc_t proc; + uint32_t uid; + pmix_info_t *info; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd UNPUBLISH"); + + if (NULL == pmix_host_server.unpublish) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* unpack the effective user id */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &uid, &cnt, PMIX_UINT32))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the number of keys */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the keys */ + for (i=0; i < nkeys; i++) { + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + pmix_argv_append_nosize(&keys, sptr); + free(sptr); + } + /* unpack the number of info objects */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* we will be adding one for the user id */ + einfo = ninfo + 1; + PMIX_INFO_CREATE(info, einfo); + /* unpack the array of info objects */ + if (0 < ninfo) { + cnt=ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } + (void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + info[einfo-1].value.type = PMIX_UINT32; + info[einfo-1].value.data.uint32 = uid; + + /* call the local server */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + rc = pmix_host_server.unpublish(&proc, keys, info, einfo, cbfunc, cbdata); + + cleanup: + pmix_argv_free(keys); + return rc; +} + +pmix_status_t pmix_server_spawn(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_spawn_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + size_t napps, ninfo; + pmix_info_t *info=NULL; + pmix_app_t *apps=NULL; + pmix_status_t rc; + pmix_proc_t proc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd SPAWN"); + + if (NULL == pmix_host_server.spawn) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); + return PMIX_ERR_NOT_SUPPORTED; + } + + /* unpack the number of job-level directives */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the array of apps */ + if (0 < ninfo) { + PMIX_INFO_CREATE(info, ninfo); + cnt=ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the number of apps */ + cnt=1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &napps, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* unpack the array of apps */ + if (0 < napps) { + PMIX_APP_CREATE(apps, napps); + cnt=napps; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, apps, &cnt, PMIX_APP))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } + /* call the local server */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + rc = pmix_host_server.spawn(&proc, info, ninfo, apps, napps, cbfunc, cbdata); + + cleanup: + if (NULL != info) { + PMIX_INFO_FREE(info, ninfo); + } + if (NULL != apps) { + PMIX_APP_FREE(apps, napps); + } + return rc; +} + +pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, + pmix_buffer_t *buf, bool disconnect, + pmix_op_cbfunc_t cbfunc) +{ + int32_t cnt; + pmix_status_t rc; + pmix_proc_t *procs; + size_t nprocs; + pmix_server_trkr_t *trk; + pmix_info_t *info = NULL; + size_t ninfo=0; + pmix_cmd_t type = PMIX_CONNECTNB_CMD; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd CONNECT from peer %s:%d", + cd->peer->info->nptr->nspace, cd->peer->info->rank); + + if ((disconnect && NULL == pmix_host_server.disconnect) || + (!disconnect && NULL == pmix_host_server.connect)) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* unpack the number of procs */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* there must be at least one proc - we do not allow the client + * to send us NULL proc as the server has no idea what to do + * with that situation. Instead, the client should at least send + * us their own namespace for the use-case where the connection + * spans all procs in that namespace */ + if (nprocs < 1) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + /* unpack the procs */ + PMIX_PROC_CREATE(procs, nprocs); + cnt = nprocs; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { + PMIX_ERROR_LOG(rc); + return rc; + } + + /* unpack the number of provided info structs */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + return rc; + } + if (0 < ninfo) { + PMIX_INFO_CREATE(info, ninfo); + /* unpack the info */ + cnt = ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + goto cleanup; + } + } + + /* find/create the local tracker for this operation */ + if (disconnect) { + type = PMIX_DISCONNECTNB_CMD; + } + if (NULL == (trk = get_tracker(procs, nprocs, type))) { + if (NULL == (trk = new_tracker(procs, nprocs, type))) { + /* only if a bozo error occurs */ + PMIX_ERROR_LOG(PMIX_ERROR); + /* DO NOT HANG */ + if (NULL != cbfunc) { + cbfunc(PMIX_ERROR, cd); + } + rc = PMIX_ERROR; + goto cleanup; + } + trk->op_cbfunc = cbfunc; + } + + /* add this contributor to the tracker so they get + * notified when we are done */ + PMIX_RETAIN(cd); + pmix_list_append(&trk->local_cbs, &cd->super); + /* if all local contributions have been received, + * let the local host's server know that we are at the + * "fence" point - they will callback once the [dis]connect + * across all participants has been completed */ + if (trk->def_complete && + pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { + if (disconnect) { + rc = pmix_host_server.disconnect(procs, nprocs, info, ninfo, cbfunc, trk); + } else { + rc = pmix_host_server.connect(procs, nprocs, info, ninfo, cbfunc, trk); + } + } else { + rc = PMIX_SUCCESS; + } + + cleanup: + PMIX_PROC_FREE(procs, nprocs); + PMIX_INFO_FREE(info, ninfo); + return rc; +} + +// instance server library classes +static void tcon(pmix_server_trkr_t *t) +{ + t->pcs = NULL; + t->npcs = 0; + t->active = true; + t->def_complete = false; + PMIX_CONSTRUCT(&t->ranks, pmix_list_t); + PMIX_CONSTRUCT(&t->local_cbs, pmix_list_t); + t->nlocal = 0; + t->local_cnt = 0; + t->info = NULL; + t->ninfo = 0; + /* this needs to be set explicitly */ + t->collect_type = PMIX_COLLECT_INVALID; + t->modexcbfunc = NULL; + t->op_cbfunc = NULL; +} +static void tdes(pmix_server_trkr_t *t) +{ + if (NULL != t->pcs) { + free(t->pcs); + } + PMIX_LIST_DESTRUCT(&t->ranks); + PMIX_LIST_DESTRUCT(&t->local_cbs); + PMIX_INFO_FREE(t->info, t->ninfo); +} +PMIX_CLASS_INSTANCE(pmix_server_trkr_t, + pmix_list_item_t, + tcon, tdes); + +static void cdcon(pmix_server_caddy_t *cd) +{ + cd->peer = NULL; + PMIX_CONSTRUCT(&cd->snd, pmix_snd_caddy_t); +} +static void cddes(pmix_server_caddy_t *cd) +{ + if (NULL != cd->peer) { + PMIX_RELEASE(cd->peer); + } + PMIX_DESTRUCT(&cd->snd); +} +PMIX_CLASS_INSTANCE(pmix_server_caddy_t, + pmix_list_item_t, + cdcon, cddes); + + +PMIX_CLASS_INSTANCE(pmix_snd_caddy_t, + pmix_object_t, + NULL, NULL); + +static void scadcon(pmix_setup_caddy_t *p) +{ + memset(&p->proc, 0, sizeof(pmix_proc_t)); + p->active = true; + p->server_object = NULL; + p->nlocalprocs = 0; + p->info = NULL; + p->ninfo = 0; + p->cbfunc = NULL; + p->cbdata = NULL; +} +static void scaddes(pmix_setup_caddy_t *p) +{ +} +PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, + pmix_object_t, + scadcon, scaddes); + +static void ncon(pmix_notify_caddy_t *p) +{ + p->active = true; + p->procs = NULL; + p->nprocs = 0; + p->error_procs = NULL; + p->error_nprocs = 0; + p->info = NULL; + p->ninfo = 0; + p->buf = PMIX_NEW(pmix_buffer_t); +} +static void ndes(pmix_notify_caddy_t *p) +{ + if (NULL != p->procs) { + PMIX_PROC_FREE(p->procs, p->nprocs); + } + if (NULL != p->error_procs) { + PMIX_PROC_FREE(p->error_procs, p->error_nprocs); + } + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->ninfo); + } + if (NULL != p->buf) { + PMIX_RELEASE(p->buf); + } +} +PMIX_CLASS_INSTANCE(pmix_notify_caddy_t, + pmix_object_t, + ncon, ndes); + +PMIX_CLASS_INSTANCE(pmix_trkr_caddy_t, + pmix_object_t, + NULL, NULL); + +static void dmcon(pmix_dmdx_remote_t *p) +{ + p->cd = NULL; +} +static void dmdes(pmix_dmdx_remote_t *p) +{ + if (NULL != p->cd) { + PMIX_RELEASE(p->cd); + } +} +PMIX_CLASS_INSTANCE(pmix_dmdx_remote_t, + pmix_list_item_t, + dmcon, dmdes); + +PMIX_CLASS_INSTANCE(pmix_dmdx_request_t, + pmix_list_item_t, + NULL, NULL); + +static void lmcon(pmix_dmdx_local_t *p) +{ + memset(&p->proc, 0, sizeof(pmix_proc_t)); + PMIX_CONSTRUCT(&p->loc_reqs, pmix_list_t); + p->info = NULL; + p->ninfo = 0; +} +static void lmdes(pmix_dmdx_local_t *p) +{ + PMIX_INFO_FREE(p->info, p->ninfo); + PMIX_DESTRUCT(&p->loc_reqs); +} +PMIX_CLASS_INSTANCE(pmix_dmdx_local_t, + pmix_list_item_t, + lmcon, lmdes); + +PMIX_CLASS_INSTANCE(pmix_pending_connection_t, + pmix_object_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.h similarity index 91% rename from opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h rename to opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.h index c6279d53922..cfaa949d73c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.h @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include "src/usock/usock.h" #include "src/util/hash.h" @@ -138,13 +138,14 @@ typedef struct { PMIX_CLASS_DECLARATION(pmix_pending_connection_t); typedef struct { - pmix_pointer_array_t clients; // array of pmix_peer_t local clients - pmix_list_t collectives; // list of active pmix_server_trkr_t - pmix_list_t remote_pnd; // list of pmix_dmdx_remote_t awaiting arrival of data fror servicing remote req's - pmix_list_t local_reqs; // list of pmix_dmdx_local_t awaiting arrival of data from local neighbours - bool listen_thread_active; // listen thread is running - int listen_socket; // socket listener is watching - int stop_thread[2]; // pipe used to stop listener thread + pmix_pointer_array_t clients; // array of pmix_peer_t local clients + pmix_list_t collectives; // list of active pmix_server_trkr_t + pmix_list_t remote_pnd; // list of pmix_dmdx_remote_t awaiting arrival of data fror servicing remote req's + pmix_list_t local_reqs; // list of pmix_dmdx_local_t awaiting arrival of data from local neighbours + volatile bool listen_thread_active; // listen thread is running + int listen_socket; // socket listener is watching + int stop_thread[2]; // pipe used to stop listener thread + pmix_buffer_t gdata; // cache of data given to me for passing to all clients } pmix_server_globals_t; #define PMIX_PEER_CADDY(c, p, t) \ @@ -183,10 +184,7 @@ void pmix_stop_listening(void); bool pmix_server_trk_update(pmix_server_trkr_t *trk); -pmix_status_t pmix_pending_request(pmix_nspace_t *nptr, int rank, - pmix_info_t *info, size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata); -void pmix_pending_nspace_fix(pmix_nspace_t *nptr); +void pmix_pending_nspace_requests(pmix_nspace_t *nptr); pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, int rank, pmix_status_t status, pmix_dmdx_local_t *lcd); diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_regex.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_regex.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_regex.c rename to opal/mca/pmix/pmix112/pmix/src/server/pmix_server_regex.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/usock/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/usock/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/usock/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/usock/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c b/opal/mca/pmix/pmix112/pmix/src/usock/usock.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c rename to opal/mca/pmix/pmix112/pmix/src/usock/usock.c index 5156f6a3aa5..94e3f871139 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c +++ b/opal/mca/pmix/pmix112/pmix/src/usock/usock.c @@ -273,13 +273,14 @@ static void cbcon(pmix_cb_t *p) p->rank = -1; p->key = NULL; p->value = NULL; + p->procs = NULL; + p->info = NULL; + p->ninfo = 0; + p->nvals = 0; } static void cbdes(pmix_cb_t *p) { PMIX_DESTRUCT(&p->data); - if (NULL != p->key) { - free(p->key); - } } PMIX_CLASS_INSTANCE(pmix_cb_t, pmix_list_item_t, diff --git a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h b/opal/mca/pmix/pmix112/pmix/src/usock/usock.h similarity index 94% rename from opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h rename to opal/mca/pmix/pmix112/pmix/src/usock/usock.h index 42df5055cf1..6aaf0e0781c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h +++ b/opal/mca/pmix/pmix112/pmix/src/usock/usock.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #ifdef HAVE_UNISTD_H #include @@ -169,6 +169,8 @@ typedef struct { pmix_event_t ev; volatile bool active; int status; + pmix_status_t pstatus; + pmix_scope_t scope; pmix_buffer_t data; pmix_usock_cbfunc_t cbfunc; pmix_op_cbfunc_t op_cbfunc; @@ -180,10 +182,22 @@ typedef struct { int rank; char *key; pmix_value_t *value; + pmix_proc_t *procs; + pmix_info_t *info; + size_t ninfo; size_t nvals; } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); +/* an internal macro for shifting incoming requests + * to the internal event thread */ +#define PMIX_THREAD_SHIFT(c, f) \ + do { \ + event_assign(&((c)->ev), pmix_globals.evbase, -1, \ + EV_WRITE, (f), (c)); \ + event_active(&((c)->ev), EV_WRITE, 1); \ + } while(0); + typedef struct { pmix_object_t super; pmix_event_t ev; diff --git a/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c b/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c new file mode 100644 index 00000000000..f77e15155bd --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Artem Y. Polyakov . + * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include +#include +#include +#include +#include + +#ifdef HAVE_STRING_H +#include +#endif +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif +#ifdef HAVE_SYS_UIO_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#include "src/class/pmix_pointer_array.h" +#include "src/include/pmix_globals.h" +#include "src/server/pmix_server_ops.h" +#include "src/util/error.h" + +#include "usock.h" + +static uint32_t current_tag = 1; // 0 is reserved for system purposes + +static void lost_connection(pmix_peer_t *peer, pmix_status_t err) +{ + /* stop all events */ + if (peer->recv_ev_active) { + event_del(&peer->recv_event); + peer->recv_ev_active = false; + } + if (peer->send_ev_active) { + event_del(&peer->send_event); + peer->send_ev_active = false; + } + if (NULL != peer->recv_msg) { + PMIX_RELEASE(peer->recv_msg); + peer->recv_msg = NULL; + } + CLOSE_THE_SOCKET(peer->sd); + if (pmix_globals.server) { + /* if I am a server, then we need to + * do some cleanup as the client has + * left us */ + pmix_pointer_array_set_item(&pmix_server_globals.clients, + peer->index, NULL); + PMIX_RELEASE(peer); + } else { + /* if I am a client, there is only + * one connection we can have */ + pmix_globals.connected = false; + } + PMIX_REPORT_ERROR(err); +} + +static pmix_status_t send_bytes(int sd, char **buf, size_t *remain) +{ + pmix_status_t ret = PMIX_SUCCESS; + int rc; + char *ptr = *buf; + while (0 < *remain) { + rc = write(sd, ptr, *remain); + if (rc < 0) { + if (pmix_socket_errno == EINTR) { + continue; + } else if (pmix_socket_errno == EAGAIN) { + /* tell the caller to keep this message on active, + * but let the event lib cycle so other messages + * can progress while this socket is busy + */ + ret = PMIX_ERR_RESOURCE_BUSY; + goto exit; + } else if (pmix_socket_errno == EWOULDBLOCK) { + /* tell the caller to keep this message on active, + * but let the event lib cycle so other messages + * can progress while this socket is busy + */ + ret = PMIX_ERR_WOULD_BLOCK; + goto exit; + } + /* we hit an error and cannot progress this message */ + pmix_output(0, "pmix_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]", + strerror(pmix_socket_errno), + pmix_socket_errno, sd); + ret = PMIX_ERR_COMM_FAILURE; + goto exit; + } + /* update location */ + (*remain) -= rc; + ptr += rc; + } + /* we sent the full data block */ +exit: + *buf = ptr; + return ret; +} + +static int read_bytes(int sd, char **buf, size_t *remain) +{ + int ret = PMIX_SUCCESS, rc; + char *ptr = *buf; + + /* read until all bytes recvd or error */ + while (0 < *remain) { + rc = read(sd, ptr, *remain); + if (rc < 0) { + if(pmix_socket_errno == EINTR) { + continue; + } else if (pmix_socket_errno == EAGAIN) { + /* tell the caller to keep this message on active, + * but let the event lib cycle so other messages + * can progress while this socket is busy + */ + ret = PMIX_ERR_RESOURCE_BUSY; + goto exit; + } else if (pmix_socket_errno == EWOULDBLOCK) { + /* tell the caller to keep this message on active, + * but let the event lib cycle so other messages + * can progress while this socket is busy + */ + ret = PMIX_ERR_WOULD_BLOCK; + goto exit; + } + /* we hit an error and cannot progress this message - report + * the error back to the RML and let the caller know + * to abort this message + */ + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_usock_msg_recv: readv failed: %s (%d)", + strerror(pmix_socket_errno), + pmix_socket_errno); + ret = PMIX_ERR_UNREACH; + goto exit; + } else if (0 == rc) { + /* the remote peer closed the connection */ + ret = PMIX_ERR_UNREACH; + goto exit; + } + /* we were able to read something, so adjust counters and location */ + *remain -= rc; + ptr += rc; + } + /* we read the full data block */ +exit: + *buf = ptr; + return ret; +} + +/* + * A file descriptor is available/ready for send. Check the state + * of the socket and take the appropriate action. + */ +void pmix_usock_send_handler(int sd, short flags, void *cbdata) +{ + pmix_peer_t *peer = (pmix_peer_t*)cbdata; + pmix_usock_send_t *msg = peer->send_msg; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "sock:send_handler SENDING TO PEER %s:%d with %s msg", + peer->info->nptr->nspace, peer->info->rank, + (NULL == msg) ? "NULL" : "NON-NULL"); + if (NULL != msg) { + if (!msg->hdr_sent) { + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:send_handler SENDING HEADER"); + if (PMIX_SUCCESS == (rc = send_bytes(peer->sd, &msg->sdptr, &msg->sdbytes))) { + /* header is completely sent */ + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:send_handler HEADER SENT"); + msg->hdr_sent = true; + /* setup to send the data */ + if (NULL == msg->data) { + /* this was a zero-byte msg - nothing more to do */ + PMIX_RELEASE(msg); + peer->send_msg = NULL; + goto next; + } else { + /* send the data as a single block */ + msg->sdptr = msg->data->base_ptr; + msg->sdbytes = msg->hdr.nbytes; + } + /* fall thru and let the send progress */ + } else if (PMIX_ERR_RESOURCE_BUSY == rc || + PMIX_ERR_WOULD_BLOCK == rc) { + /* exit this event and let the event lib progress */ + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:send_handler RES BUSY OR WOULD BLOCK"); + return; + } else { + // report the error + event_del(&peer->send_event); + peer->send_ev_active = false; + PMIX_RELEASE(msg); + peer->send_msg = NULL; + lost_connection(peer, rc); + return; + } + } + + if (msg->hdr_sent) { + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:send_handler SENDING BODY OF MSG"); + if (PMIX_SUCCESS == (rc = send_bytes(peer->sd, &msg->sdptr, &msg->sdbytes))) { + // message is complete + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:send_handler BODY SENT"); + PMIX_RELEASE(msg); + peer->send_msg = NULL; + } else if (PMIX_ERR_RESOURCE_BUSY == rc || + PMIX_ERR_WOULD_BLOCK == rc) { + /* exit this event and let the event lib progress */ + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:send_handler RES BUSY OR WOULD BLOCK"); + return; + } else { + // report the error + pmix_output(0, "pmix_usock_peer_send_handler: unable to send message ON SOCKET %d", + peer->sd); + event_del(&peer->send_event); + peer->send_ev_active = false; + PMIX_RELEASE(msg); + peer->send_msg = NULL; + lost_connection(peer, rc); + return; + } + } + + next: + /* if current message completed - progress any pending sends by + * moving the next in the queue into the "on-deck" position. Note + * that this doesn't mean we send the message right now - we will + * wait for another send_event to fire before doing so. This gives + * us a chance to service any pending recvs. + */ + peer->send_msg = (pmix_usock_send_t*) + pmix_list_remove_first(&peer->send_queue); + } + + /* if nothing else to do unregister for send event notifications */ + if (NULL == peer->send_msg && peer->send_ev_active) { + event_del(&peer->send_event); + peer->send_ev_active = false; + } +} + +/* + * Dispatch to the appropriate action routine based on the state + * of the connection with the peer. + */ + +void pmix_usock_recv_handler(int sd, short flags, void *cbdata) +{ + int rc; + pmix_peer_t *peer = (pmix_peer_t*)cbdata; + pmix_usock_recv_t *msg = NULL; + + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:recv:handler called with peer %s:%d", + (NULL == peer) ? "NULL" : peer->info->nptr->nspace, + (NULL == peer) ? -1 : peer->info->rank); + + if (NULL == peer) { + return; + } + /* allocate a new message and setup for recv */ + if (NULL == peer->recv_msg) { + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:recv:handler allocate new recv msg"); + peer->recv_msg = PMIX_NEW(pmix_usock_recv_t); + if (NULL == peer->recv_msg) { + pmix_output(0, "usock_recv_handler: unable to allocate recv message\n"); + goto err_close; + } + peer->recv_msg->peer = peer; // provide a handle back to the peer object + /* start by reading the header */ + peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; + peer->recv_msg->rdbytes = sizeof(pmix_usock_hdr_t); + } + msg = peer->recv_msg; + msg->sd = sd; + /* if the header hasn't been completely read, read it */ + if (!msg->hdr_recvd) { + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:recv:handler read hdr on socket %d", peer->sd); + if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { + /* completed reading the header */ + peer->recv_msg->hdr_recvd = true; + /* if this is a zero-byte message, then we are done */ + if (0 == peer->recv_msg->hdr.nbytes) { + pmix_output_verbose(2, pmix_globals.debug_output, + "RECVD ZERO-BYTE MESSAGE FROM %s:%d for tag %d", + peer->info->nptr->nspace, peer->info->rank, + peer->recv_msg->hdr.tag); + peer->recv_msg->data = NULL; // make sure + peer->recv_msg->rdptr = NULL; + peer->recv_msg->rdbytes = 0; + } else { + pmix_output_verbose(2, pmix_globals.debug_output, + "usock:recv:handler allocate data region of size %lu", + (unsigned long)peer->recv_msg->hdr.nbytes); + /* allocate the data region */ + peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes); + memset(peer->recv_msg->data, 0, peer->recv_msg->hdr.nbytes); + /* point to it */ + peer->recv_msg->rdptr = peer->recv_msg->data; + peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes; + } + /* fall thru and attempt to read the data */ + } else if (PMIX_ERR_RESOURCE_BUSY == rc || + PMIX_ERR_WOULD_BLOCK == rc) { + /* exit this event and let the event lib progress */ + return; + } else { + /* the remote peer closed the connection - report that condition + * and let the caller know + */ + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_usock_msg_recv: peer closed connection"); + goto err_close; + } + } + + if (peer->recv_msg->hdr_recvd) { + /* continue to read the data block - we start from + * wherever we left off, which could be at the + * beginning or somewhere in the message + */ + if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { + /* we recvd all of the message */ + pmix_output_verbose(2, pmix_globals.debug_output, + "RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + (int)peer->recv_msg->hdr.nbytes, + peer->recv_msg->hdr.tag, peer->sd); + /* post it for delivery */ + PMIX_ACTIVATE_POST_MSG(peer->recv_msg); + peer->recv_msg = NULL; + return; + } else if (PMIX_ERR_RESOURCE_BUSY == rc || + PMIX_ERR_WOULD_BLOCK == rc) { + /* exit this event and let the event lib progress */ + return; + } else { + /* the remote peer closed the connection - report that condition + * and let the caller know + */ + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_usock_msg_recv: peer closed connection"); + goto err_close; + } + } + /* success */ + return; + err_close: + /* stop all events */ + if (peer->recv_ev_active) { + event_del(&peer->recv_event); + peer->recv_ev_active = false; + } + if (peer->send_ev_active) { + event_del(&peer->send_event); + peer->send_ev_active = false; + } + if (NULL != peer->recv_msg) { + PMIX_RELEASE(peer->recv_msg); + peer->recv_msg = NULL; + } + lost_connection(peer, PMIX_ERR_UNREACH); +} + +void pmix_usock_send_recv(int fd, short args, void *cbdata) +{ + pmix_usock_sr_t *ms = (pmix_usock_sr_t*)cbdata; + pmix_usock_posted_recv_t *req; + pmix_usock_send_t *snd; + uint32_t tag; + + /* set the tag */ + tag = current_tag++; + + if (NULL != ms->cbfunc) { + /* if a callback msg is expected, setup a recv for it */ + req = PMIX_NEW(pmix_usock_posted_recv_t); + /* take the next tag in the sequence */ + if (UINT32_MAX == current_tag ) { + current_tag = 1; + } + req->tag = tag; + req->cbfunc = ms->cbfunc; + req->cbdata = ms->cbdata; + pmix_output_verbose(5, pmix_globals.debug_output, + "posting recv on tag %d", req->tag); + /* add it to the list of recvs - we cannot have unexpected messages + * in this subsystem as the server never sends us something that + * we didn't previously request */ + pmix_list_prepend(&pmix_usock_globals.posted_recvs, &req->super); + } + + snd = PMIX_NEW(pmix_usock_send_t); + snd->hdr.pindex = pmix_globals.pindex; + snd->hdr.tag = tag; + snd->hdr.nbytes = ms->bfr->bytes_used; + snd->data = ms->bfr; + /* always start with the header */ + snd->sdptr = (char*)&snd->hdr; + snd->sdbytes = sizeof(pmix_usock_hdr_t); + + /* if there is no message on-deck, put this one there */ + if (NULL == ms->peer->send_msg) { + ms->peer->send_msg = snd; + } else { + /* add it to the queue */ + pmix_list_append(&ms->peer->send_queue, &snd->super); + } + /* ensure the send event is active */ + if (!ms->peer->send_ev_active) { + event_add(&ms->peer->send_event, 0); + ms->peer->send_ev_active = true; + } + /* cleanup */ + PMIX_RELEASE(ms); +} + +void pmix_usock_process_msg(int fd, short flags, void *cbdata) +{ + pmix_usock_recv_t *msg = (pmix_usock_recv_t*)cbdata; + pmix_usock_posted_recv_t *rcv; + pmix_buffer_t buf; + + pmix_output_verbose(5, pmix_globals.debug_output, + "message received %d bytes for tag %u on socket %d", + (int)msg->hdr.nbytes, msg->hdr.tag, msg->sd); + + /* see if we have a waiting recv for this message */ + PMIX_LIST_FOREACH(rcv, &pmix_usock_globals.posted_recvs, pmix_usock_posted_recv_t) { + pmix_output_verbose(5, pmix_globals.debug_output, + "checking msg on tag %u for tag %u", + msg->hdr.tag, rcv->tag); + + if (msg->hdr.tag == rcv->tag || UINT_MAX == rcv->tag) { + if (NULL != rcv->cbfunc) { + /* construct and load the buffer */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + if (NULL != msg->data) { + buf.base_ptr = (char*)msg->data; + buf.bytes_allocated = buf.bytes_used = msg->hdr.nbytes; + buf.unpack_ptr = buf.base_ptr; + buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; + } + msg->data = NULL; // protect the data region + if (NULL != rcv->cbfunc) { + rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); + } + PMIX_DESTRUCT(&buf); // free's the msg data + /* also done with the recv, if not a wildcard or the error tag */ + if (UINT32_MAX != rcv->tag && 0 != rcv->tag) { + pmix_list_remove_item(&pmix_usock_globals.posted_recvs, &rcv->super); + PMIX_RELEASE(rcv); + } + PMIX_RELEASE(msg); + return; + } + } + } + + /* we get here if no matching recv was found - this is an error */ + pmix_output(0, "UNEXPECTED MESSAGE tag =%d", msg->hdr.tag); + PMIX_RELEASE(msg); + PMIX_REPORT_ERROR(PMIX_ERROR); +} diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/util/Makefile.am similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/Makefile.am rename to opal/mca/pmix/pmix112/pmix/src/util/Makefile.am diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/argv.c b/opal/mca/pmix/pmix112/pmix/src/util/argv.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/argv.c rename to opal/mca/pmix/pmix112/pmix/src/util/argv.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/argv.h b/opal/mca/pmix/pmix112/pmix/src/util/argv.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/util/argv.h rename to opal/mca/pmix/pmix112/pmix/src/util/argv.h index 2dfc7d20f76..6939b6c7581 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/argv.h +++ b/opal/mca/pmix/pmix112/pmix/src/util/argv.h @@ -41,7 +41,7 @@ #include #endif -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/basename.c b/opal/mca/pmix/pmix112/pmix/src/util/basename.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/basename.c rename to opal/mca/pmix/pmix112/pmix/src/util/basename.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/basename.h b/opal/mca/pmix/pmix112/pmix/src/util/basename.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/basename.h rename to opal/mca/pmix/pmix112/pmix/src/util/basename.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/crc.c b/opal/mca/pmix/pmix112/pmix/src/util/crc.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/crc.c rename to opal/mca/pmix/pmix112/pmix/src/util/crc.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/crc.h b/opal/mca/pmix/pmix112/pmix/src/util/crc.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/crc.h rename to opal/mca/pmix/pmix112/pmix/src/util/crc.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/error.c b/opal/mca/pmix/pmix112/pmix/src/util/error.c similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/src/util/error.c rename to opal/mca/pmix/pmix112/pmix/src/util/error.c index 8cc4bcd978a..0bf5587d736 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/error.c @@ -31,7 +31,7 @@ #include #endif -#include +#include #include "src/util/error.h" #include "src/include/pmix_globals.h" @@ -123,6 +123,8 @@ const char* PMIx_Error_string(pmix_status_t errnum) case PMIX_EXISTS: return "EXISTS"; + case PMIX_ERR_SILENT: + return "SILENT"; case PMIX_ERROR: return "ERROR"; case PMIX_SUCCESS: diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/error.h b/opal/mca/pmix/pmix112/pmix/src/util/error.h similarity index 68% rename from opal/mca/pmix/pmix1xx/pmix/src/util/error.h rename to opal/mca/pmix/pmix112/pmix/src/util/error.h index f72227aedc5..0efabcdabfd 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/error.h +++ b/opal/mca/pmix/pmix112/pmix/src/util/error.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,20 +23,21 @@ #include #include -#include +#include #include "src/util/output.h" BEGIN_C_DECLS -#define PMIX_ERROR_LOG(r) \ - pmix_output(0, "PMIX ERROR: %s in file %s at line %d", \ - PMIx_Error_string((r)), __FILE__, __LINE__); +#define PMIX_ERROR_LOG(r) \ + do { \ + if (PMIX_ERR_SILENT != (r)) { \ + pmix_output(0, "PMIX ERROR: %s in file %s at line %d", \ + PMIx_Error_string((r)), __FILE__, __LINE__); \ + } \ + }while(0); -#define PMIX_REPORT_ERROR(e) \ - do { \ - pmix_globals.connected = false; \ - pmix_errhandler_invoke(e, NULL, 0, NULL, 0); \ - } while(0); +#define PMIX_REPORT_ERROR(e) \ + pmix_errhandler_invoke(e, NULL, 0, NULL, 0) PMIX_DECLSPEC void pmix_errhandler_invoke(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/fd.c b/opal/mca/pmix/pmix112/pmix/src/util/fd.c similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/src/util/fd.c rename to opal/mca/pmix/pmix112/pmix/src/util/fd.c index 7c08a6d3522..2683555c1cb 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/fd.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/fd.c @@ -14,7 +14,7 @@ #include #include -#include +#include #ifdef HAVE_UNISTD_H #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/fd.h b/opal/mca/pmix/pmix112/pmix/src/util/fd.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/fd.h rename to opal/mca/pmix/pmix112/pmix/src/util/fd.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/hash.c b/opal/mca/pmix/pmix112/pmix/src/util/hash.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/hash.c rename to opal/mca/pmix/pmix112/pmix/src/util/hash.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/hash.h b/opal/mca/pmix/pmix112/pmix/src/util/hash.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/hash.h rename to opal/mca/pmix/pmix112/pmix/src/util/hash.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/os_path.c b/opal/mca/pmix/pmix112/pmix/src/util/os_path.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/os_path.c rename to opal/mca/pmix/pmix112/pmix/src/util/os_path.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/os_path.h b/opal/mca/pmix/pmix112/pmix/src/util/os_path.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/os_path.h rename to opal/mca/pmix/pmix112/pmix/src/util/os_path.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/output.c b/opal/mca/pmix/pmix112/pmix/src/util/output.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/util/output.c rename to opal/mca/pmix/pmix112/pmix/src/util/output.c index 593c8352223..d9aeab18df9 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/output.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/output.h b/opal/mca/pmix/pmix112/pmix/src/util/output.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/output.h rename to opal/mca/pmix/pmix112/pmix/src/util/output.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c b/opal/mca/pmix/pmix112/pmix/src/util/pmix_environ.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c rename to opal/mca/pmix/pmix112/pmix/src/util/pmix_environ.c index d337d8bd325..94052ab827b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/pmix_environ.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h b/opal/mca/pmix/pmix112/pmix/src/util/pmix_environ.h similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h rename to opal/mca/pmix/pmix112/pmix/src/util/pmix_environ.h index 15e018558f5..2f8a7d9e0e3 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h +++ b/opal/mca/pmix/pmix112/pmix/src/util/pmix_environ.h @@ -37,7 +37,7 @@ #include #endif -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/printf.c b/opal/mca/pmix/pmix112/pmix/src/util/printf.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/printf.c rename to opal/mca/pmix/pmix112/pmix/src/util/printf.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/printf.h b/opal/mca/pmix/pmix112/pmix/src/util/printf.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/printf.h rename to opal/mca/pmix/pmix112/pmix/src/util/printf.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c b/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c rename to opal/mca/pmix/pmix112/pmix/src/util/progress_threads.c index 2ce45dfd370..ac38be17725 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.c @@ -25,7 +25,7 @@ #include "src/util/error.h" #include "src/util/fd.h" -#include +#include #include "src/util/progress_threads.h" static volatile bool evlib_active; diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.h b/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.h rename to opal/mca/pmix/pmix112/pmix/src/util/progress_threads.h diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/timings.c b/opal/mca/pmix/pmix112/pmix/src/util/timings.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/src/util/timings.c rename to opal/mca/pmix/pmix112/pmix/src/util/timings.c index 988157393cb..f1be7a83c03 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/timings.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/timings.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/timings.h b/opal/mca/pmix/pmix112/pmix/src/util/timings.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/src/util/timings.h rename to opal/mca/pmix/pmix112/pmix/src/util/timings.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/Makefile.am b/opal/mca/pmix/pmix112/pmix/test/Makefile.am similarity index 84% rename from opal/mca/pmix/pmix1xx/pmix/test/Makefile.am rename to opal/mca/pmix/pmix112/pmix/test/Makefile.am index fd864a48c2d..f3e46c45916 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/test/Makefile.am @@ -25,35 +25,37 @@ headers = test_common.h cli_stages.h server_callbacks.h utils.h test_fence.h tes AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api -bin_PROGRAMS = pmix_test pmix_client pmi_client pmi2_client pmix_regex +check_PROGRAMS = pmix_test pmix_client pmi_client pmi2_client pmix_regex + +# TESTS = pmix_test pmix_test_SOURCES = $(headers) \ pmix_test.c test_common.c cli_stages.c server_callbacks.c utils.c - +pmix_test_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_test_LDADD = \ $(top_builddir)/libpmix.la pmi_client_SOURCES = $(headers) \ pmi_client.c - +pmi_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmi_client_LDADD = \ $(top_builddir)/libpmix.la pmi2_client_SOURCES = $(headers) \ pmi2_client.c - +pmi2_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmi2_client_LDADD = \ $(top_builddir)/libpmix.la pmix_client_SOURCES = $(headers) \ pmix_client.c test_fence.c test_common.c test_publish.c test_spawn.c test_cd.c test_resolve_peers.c - +pmix_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_client_LDADD = \ $(top_builddir)/libpmix.la pmix_regex_SOURCES = $(headers) \ pmix_regex.c test_common.c cli_stages.c utils.c - +pmix_regex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_regex_LDADD = \ $(top_builddir)/libpmix.la diff --git a/opal/mca/pmix/pmix1xx/pmix/test/README b/opal/mca/pmix/pmix112/pmix/test/README similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/README rename to opal/mca/pmix/pmix112/pmix/test/README diff --git a/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c b/opal/mca/pmix/pmix112/pmix/test/cli_stages.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c rename to opal/mca/pmix/pmix112/pmix/test/cli_stages.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h b/opal/mca/pmix/pmix112/pmix/test/cli_stages.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h rename to opal/mca/pmix/pmix112/pmix/test/cli_stages.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c b/opal/mca/pmix/pmix112/pmix/test/pmi2_client.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c rename to opal/mca/pmix/pmix112/pmix/test/pmi2_client.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c b/opal/mca/pmix/pmix112/pmix/test/pmi_client.c similarity index 96% rename from opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c rename to opal/mca/pmix/pmix112/pmix/test/pmi_client.c index 0bfe837513b..7dccd63a8e5 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c +++ b/opal/mca/pmix/pmix112/pmix/test/pmi_client.c @@ -336,24 +336,9 @@ static int test_item5(void) static int test_item6(void) { int rc = 0; - char nspace[100]; log_error("pmix does not support this functionality\n"); return rc; - if (0 == rank) { - if (PMI_SUCCESS != (rc = PMI_KVS_Create(nspace, sizeof(nspace)))) { - log_fatal("PMI_KVS_Create failed: %d\n", rc); - return rc; - } - log_info("nspace=%s\n", nspace); - - if (PMI_SUCCESS != (rc = PMI_KVS_Destroy(nspace))) { - log_fatal("PMI_KVS_Destroy failed: %d\n", rc); - return rc; - } - } - - return rc; } static int test_item7(void) diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmix_client.c b/opal/mca/pmix/pmix112/pmix/test/pmix_client.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/pmix_client.c rename to opal/mca/pmix/pmix112/pmix/test/pmix_client.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmix_regex.c b/opal/mca/pmix/pmix112/pmix/test/pmix_regex.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/pmix_regex.c rename to opal/mca/pmix/pmix112/pmix/test/pmix_regex.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c b/opal/mca/pmix/pmix112/pmix/test/pmix_test.c similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c rename to opal/mca/pmix/pmix112/pmix/test/pmix_test.c index 7d6ed93b445..8fd375accc2 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix112/pmix/test/pmix_test.c @@ -81,7 +81,7 @@ int main(int argc, char **argv) } /* setup the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) { TEST_ERROR(("Init failed with error %d", rc)); FREE_TEST_PARAMS(params); return rc; diff --git a/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c b/opal/mca/pmix/pmix112/pmix/test/server_callbacks.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c rename to opal/mca/pmix/pmix112/pmix/test/server_callbacks.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h b/opal/mca/pmix/pmix112/pmix/test/server_callbacks.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h rename to opal/mca/pmix/pmix112/pmix/test/server_callbacks.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/Makefile.am b/opal/mca/pmix/pmix112/pmix/test/simple/Makefile.am similarity index 81% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/Makefile.am rename to opal/mca/pmix/pmix112/pmix/test/simple/Makefile.am index 4e8387aa094..fbf04a23c6b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/simple/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/test/simple/Makefile.am @@ -21,40 +21,42 @@ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix -bin_PROGRAMS = simptest simpclient simppub simpdmodex simpft simpdyn +check_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex + +# TESTS = simptest simptest_SOURCES = \ simptest.c - +simptest_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simptest_LDADD = \ $(top_builddir)/libpmix.la simpclient_SOURCES = \ simpclient.c - +simpclient_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpclient_LDADD = \ $(top_builddir)/libpmix.la simppub_SOURCES = \ simppub.c - +simppub_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simppub_LDADD = \ $(top_builddir)/libpmix.la simpdmodex_SOURCES = \ simpdmodex.c - +simpdmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpdmodex_LDADD = \ $(top_builddir)/libpmix.la simpft_SOURCES = \ simpft.c - +simpft_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpft_LDADD = \ $(top_builddir)/libpmix.la simpdyn_SOURCES = \ simpdyn.c - +simpdyn_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpdyn_LDADD = \ $(top_builddir)/libpmix.la diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix112/pmix/test/simple/simpclient.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/simpclient.c rename to opal/mca/pmix/pmix112/pmix/test/simple/simpclient.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simpdmodex.c b/opal/mca/pmix/pmix112/pmix/test/simple/simpdmodex.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/simpdmodex.c rename to opal/mca/pmix/pmix112/pmix/test/simple/simpdmodex.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simpdyn.c b/opal/mca/pmix/pmix112/pmix/test/simple/simpdyn.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/simpdyn.c rename to opal/mca/pmix/pmix112/pmix/test/simple/simpdyn.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix112/pmix/test/simple/simpft.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c rename to opal/mca/pmix/pmix112/pmix/test/simple/simpft.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simppub.c b/opal/mca/pmix/pmix112/pmix/test/simple/simppub.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/simppub.c rename to opal/mca/pmix/pmix112/pmix/test/simple/simppub.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix112/pmix/test/simple/simptest.c similarity index 99% rename from opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c rename to opal/mca/pmix/pmix112/pmix/test/simple/simptest.c index 2cb82419c06..83ac54613c4 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix112/pmix/test/simple/simptest.c @@ -191,7 +191,7 @@ int main(int argc, char **argv) fprintf(stderr, "Testing version %s\n", PMIx_Get_version()); /* setup the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) { fprintf(stderr, "Init failed with error %d\n", rc); return rc; } @@ -501,7 +501,7 @@ static int lookup_fn(const pmix_proc_t *proc, char **keys, pmix_locdat_t *p, *p2; pmix_list_t results; size_t i, n; - pmix_pdata_t *pd; + pmix_pdata_t *pd = NULL; pmix_status_t ret = PMIX_ERR_NOT_FOUND; pmix_output(0, "SERVER: LOOKUP"); diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_cd.c b/opal/mca/pmix/pmix112/pmix/test/test_cd.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_cd.c rename to opal/mca/pmix/pmix112/pmix/test/test_cd.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_cd.h b/opal/mca/pmix/pmix112/pmix/test/test_cd.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_cd.h rename to opal/mca/pmix/pmix112/pmix/test/test_cd.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_common.c b/opal/mca/pmix/pmix112/pmix/test/test_common.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_common.c rename to opal/mca/pmix/pmix112/pmix/test/test_common.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_common.h b/opal/mca/pmix/pmix112/pmix/test/test_common.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_common.h rename to opal/mca/pmix/pmix112/pmix/test/test_common.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_fence.c b/opal/mca/pmix/pmix112/pmix/test/test_fence.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_fence.c rename to opal/mca/pmix/pmix112/pmix/test/test_fence.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_fence.h b/opal/mca/pmix/pmix112/pmix/test/test_fence.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_fence.h rename to opal/mca/pmix/pmix112/pmix/test/test_fence.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_publish.c b/opal/mca/pmix/pmix112/pmix/test/test_publish.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_publish.c rename to opal/mca/pmix/pmix112/pmix/test/test_publish.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_publish.h b/opal/mca/pmix/pmix112/pmix/test/test_publish.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_publish.h rename to opal/mca/pmix/pmix112/pmix/test/test_publish.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c b/opal/mca/pmix/pmix112/pmix/test/test_resolve_peers.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c rename to opal/mca/pmix/pmix112/pmix/test/test_resolve_peers.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h b/opal/mca/pmix/pmix112/pmix/test/test_resolve_peers.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h rename to opal/mca/pmix/pmix112/pmix/test/test_resolve_peers.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c b/opal/mca/pmix/pmix112/pmix/test/test_spawn.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c rename to opal/mca/pmix/pmix112/pmix/test/test_spawn.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h b/opal/mca/pmix/pmix112/pmix/test/test_spawn.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h rename to opal/mca/pmix/pmix112/pmix/test/test_spawn.h diff --git a/opal/mca/pmix/pmix1xx/pmix/test/utils.c b/opal/mca/pmix/pmix112/pmix/test/utils.c similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/utils.c rename to opal/mca/pmix/pmix112/pmix/test/utils.c diff --git a/opal/mca/pmix/pmix1xx/pmix/test/utils.h b/opal/mca/pmix/pmix112/pmix/test/utils.h similarity index 100% rename from opal/mca/pmix/pmix1xx/pmix/test/utils.h rename to opal/mca/pmix/pmix112/pmix/test/utils.h diff --git a/opal/mca/pmix/pmix1xx/pmix1.h b/opal/mca/pmix/pmix112/pmix1.h similarity index 94% rename from opal/mca/pmix/pmix1xx/pmix1.h rename to opal/mca/pmix/pmix112/pmix1.h index 835096b20b5..60695b5accb 100644 --- a/opal/mca/pmix/pmix1xx/pmix1.h +++ b/opal/mca/pmix/pmix112/pmix1.h @@ -25,8 +25,8 @@ #include "opal/mca/pmix/pmix.h" #include "opal/mca/pmix/pmix_server.h" -#include "opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h" -#include "opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h" +#include "opal/mca/pmix/pmix112/pmix/include/pmix_server.h" +#include "opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h" BEGIN_C_DECLS @@ -36,9 +36,9 @@ typedef struct { bool native_launch; } mca_pmix_pmix1_component_t; -OPAL_DECLSPEC extern mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component; +OPAL_DECLSPEC extern mca_pmix_pmix1_component_t mca_pmix_pmix112_component; -OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_pmix1xx_module; +OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_pmix112_module; /**** INTERNAL OBJECTS ****/ typedef struct { @@ -131,7 +131,8 @@ OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val); /**** SERVER SOUTHBOUND FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module); +OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info); OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void); OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex); OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn); @@ -140,11 +141,13 @@ OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_nspace(opal_jobid_t jobid); OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, void *server_object, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_client(const opal_process_name_t *proc); OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env); OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc, opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); diff --git a/opal/mca/pmix/pmix1xx/pmix1_client.c b/opal/mca/pmix/pmix112/pmix1_client.c similarity index 93% rename from opal/mca/pmix/pmix1xx/pmix1_client.c rename to opal/mca/pmix/pmix112/pmix1_client.c index f1ba0d58916..21d64fc7dea 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_client.c +++ b/opal/mca/pmix/pmix112/pmix1_client.c @@ -24,52 +24,61 @@ #endif #include "opal/hash_string.h" +#include "opal/util/argv.h" #include "opal/util/proc.h" #include "opal/mca/pmix/base/base.h" #include "pmix1.h" -#include "opal/mca/pmix/pmix1xx/pmix/include/pmix.h" -#include "opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h" +#include "opal/mca/pmix/pmix112/pmix/include/pmix.h" +#include "opal/mca/pmix/pmix112/pmix/src/buffer_ops/buffer_ops.h" static pmix_proc_t my_proc; static char *dbgvalue=NULL; static int errhdler_ref = 0; +static void release_cbfunc(void *cbdata) +{ + pmix1_opalcaddy_t *cd = (pmix1_opalcaddy_t*)cbdata; + OBJ_RELEASE(cd); +} static void myerr(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { int rc; - opal_list_t plist, ilist; opal_namelist_t *nm; opal_value_t *iptr; size_t n; + pmix1_opalcaddy_t *cd; /* convert the incoming status */ rc = pmix1_convert_rc(status); + /* setup the caddy */ + cd = OBJ_NEW(pmix1_opalcaddy_t); + /* convert the array of procs */ - OBJ_CONSTRUCT(&plist, opal_list_t); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); - nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(cd); + return; + } nm->name.vpid = procs[n].rank; - opal_list_append(&plist, &nm->super); + opal_list_append(&cd->procs, &nm->super); } /* convert the array of info */ - OBJ_CONSTRUCT(&ilist, opal_list_t); for (n=0; n < ninfo; n++) { iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(info[n].key); pmix1_value_unload(iptr, &info[n].value); - opal_list_append(&plist, &nm->super); + opal_list_append(&cd->info, &iptr->super); } /* call the base errhandler */ - opal_pmix_base_errhandler(rc, &plist, &ilist); - OPAL_LIST_DESTRUCT(&plist); - OPAL_LIST_DESTRUCT(&ilist); + opal_pmix_base_errhandler(rc, &cd->procs, &cd->info, release_cbfunc, cd); } static void errreg_cbfunc (pmix_status_t status, @@ -105,7 +114,7 @@ int pmix1_client_init(void) if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ - mca_pmix_pmix1xx_component.native_launch = true; + mca_pmix_pmix112_component.native_launch = true; opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); } else { /* we were launched by someone else, so make the @@ -117,7 +126,7 @@ int pmix1_client_init(void) job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); job->jobid = pname.jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); pname.vpid = my_proc.rank; opal_proc_set_name(&pname); @@ -175,7 +184,7 @@ int pmix1_abort(int flag, const char *msg, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -210,13 +219,14 @@ int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { job = jptr; break; } } if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); return OPAL_ERR_NOT_FOUND; } (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); @@ -275,7 +285,7 @@ int pmix1_fence(opal_list_t *procs, int collect_data) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -337,7 +347,7 @@ int pmix1_fencenb(opal_list_t *procs, int collect_data, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -419,7 +429,7 @@ int pmix1_get(const opal_process_name_t *proc, const char *key, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { job = jptr; break; @@ -524,7 +534,7 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { job = jptr; break; @@ -675,7 +685,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) /* transfer the data back */ n=0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - if (mca_pmix_pmix1xx_component.native_launch) { + if (mca_pmix_pmix112_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace); @@ -686,7 +696,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; @@ -696,7 +706,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); } if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) { d->proc.vpid = OPAL_VPID_WILDCARD; @@ -738,7 +748,7 @@ static void lk_cbfunc(pmix_status_t status, for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); opal_list_append(&results, &d->super); - if (mca_pmix_pmix1xx_component.native_launch) { + if (mca_pmix_pmix112_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace); @@ -749,7 +759,7 @@ static void lk_cbfunc(pmix_status_t status, } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; @@ -759,7 +769,7 @@ static void lk_cbfunc(pmix_status_t status, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); } if (PMIX_RANK_WILDCARD == data[n].proc.rank) { d->proc.vpid = OPAL_VPID_WILDCARD; @@ -923,7 +933,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); if (PMIX_SUCCESS == ret) { - if (mca_pmix_pmix1xx_component.native_launch) { + if (mca_pmix_pmix112_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(jobid, nspace); @@ -936,7 +946,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); } PMIX_APP_FREE(papps, napps); @@ -953,7 +963,7 @@ static void spcbfunc(pmix_status_t status, rc = pmix1_convert_rc(status); if (PMIX_SUCCESS == status) { - if (mca_pmix_pmix1xx_component.native_launch) { + if (mca_pmix_pmix112_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&jobid, nspace); @@ -966,7 +976,7 @@ static void spcbfunc(pmix_status_t status, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); } op->spcbfunc(rc, jobid, op->cbdata); @@ -1044,7 +1054,7 @@ int pmix1_connect(opal_list_t *procs) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -1097,7 +1107,7 @@ int pmix1_connectnb(opal_list_t *procs, OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { /* look thru our list of jobids and find the * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (job->jobid == ptr->name.jobid) { (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); break; @@ -1136,7 +1146,7 @@ int pmix1_disconnect(opal_list_t *procs) OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { /* look thru our list of jobids and find the * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (job->jobid == ptr->name.jobid) { (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); break; @@ -1184,7 +1194,7 @@ int pmix1_disconnectnb(opal_list_t *procs, OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { /* look thru our list of jobids and find the * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (job->jobid == ptr->name.jobid) { (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); break; @@ -1219,7 +1229,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, nspace = NULL; } else { job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { job = jptr; break; @@ -1238,7 +1248,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); - if (mca_pmix_pmix1xx_component.native_launch) { + if (mca_pmix_pmix112_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace); @@ -1249,7 +1259,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == nm->name.jobid) { job = jptr; break; @@ -1259,7 +1269,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); } nm->name.vpid = array[n].rank; } @@ -1279,7 +1289,7 @@ int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { job = jptr; break; diff --git a/opal/mca/pmix/pmix1xx/pmix1_server_north.c b/opal/mca/pmix/pmix112/pmix1_server_north.c similarity index 90% rename from opal/mca/pmix/pmix1xx/pmix1_server_north.c rename to opal/mca/pmix/pmix112/pmix1_server_north.c index 761cc2a6f56..c2fbfc9a265 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_server_north.c +++ b/opal/mca/pmix/pmix112/pmix1_server_north.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. @@ -80,11 +80,11 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro static pmix_status_t server_register_events(const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); + pmix_op_cbfunc_t cbfunc, void *cbdata); static pmix_status_t server_listener_fn(int listening_sd, pmix_connection_cbfunc_t cbfunc); -pmix_server_module_t mymodule = { +pmix_server_module_t pmix112_module = { server_client_connected_fn, server_client_finalized_fn, server_abort_fn, @@ -101,7 +101,7 @@ pmix_server_module_t mymodule = { server_listener_fn }; -opal_pmix_server_module_t *host_module = NULL; +opal_pmix_server_module_t *pmix112_host_module = NULL; static void opal_opcbfunc(int status, void *cbdata) @@ -119,7 +119,7 @@ static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *serv int rc; opal_process_name_t proc; - if (NULL == host_module || NULL == host_module->client_connected) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->client_connected) { return PMIX_SUCCESS; } @@ -130,7 +130,7 @@ static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *serv proc.vpid = p->rank; /* pass it up */ - rc = host_module->client_connected(&proc, server_object); + rc = pmix112_host_module->client_connected(&proc, server_object); return pmix1_convert_opalrc(rc); } @@ -141,7 +141,7 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv pmix1_opalcaddy_t *opalcaddy; opal_process_name_t proc; - if (NULL == host_module || NULL == host_module->client_finalized) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->client_finalized) { return PMIX_SUCCESS; } @@ -157,7 +157,7 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv opalcaddy->cbdata = cbdata; /* pass it up */ - rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); + rc = pmix112_host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -175,7 +175,7 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, int rc; pmix1_opalcaddy_t *opalcaddy; - if (NULL == host_module || NULL == host_module->abort) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->abort) { return PMIX_ERR_NOT_SUPPORTED; } @@ -206,7 +206,7 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, } /* pass it up */ - rc = host_module->abort(&proc, server_object, status, msg, + rc = pmix112_host_module->abort(&proc, server_object, status, msg, &opalcaddy->procs, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); @@ -252,7 +252,7 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, opal_value_t *iptr; int rc; - if (NULL == host_module || NULL == host_module->fence_nb) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } @@ -288,7 +288,7 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, } /* pass it up */ - rc = host_module->fence_nb(&opalcaddy->procs, &opalcaddy->info, + rc = pmix112_host_module->fence_nb(&opalcaddy->procs, &opalcaddy->info, data, ndata, opmdx_response, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); @@ -306,7 +306,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, opal_value_t *iptr; size_t n; - if (NULL == host_module || NULL == host_module->direct_modex) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->direct_modex) { return PMIX_ERR_NOT_SUPPORTED; } @@ -337,7 +337,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, } /* pass it up */ - rc = host_module->direct_modex(&proc, &opalcaddy->info, opmdx_response, opalcaddy); + rc = pmix112_host_module->direct_modex(&proc, &opalcaddy->info, opmdx_response, opalcaddy); if (OPAL_SUCCESS != rc && OPAL_ERR_IN_PROCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -357,7 +357,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, opal_process_name_t proc; opal_value_t *oinfo; - if (NULL == host_module || NULL == host_module->publish) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->publish) { return PMIX_ERR_NOT_SUPPORTED; } @@ -388,7 +388,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, } /* pass it up */ - rc = host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy); + rc = pmix112_host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -415,7 +415,7 @@ static void opal_lkupcbfunc(int status, n=0; OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { /* convert the jobid */ - (void)snprintf(d[n].proc.nspace, PMIX_MAX_NSLEN, "%s", opal_convert_jobid_to_string(p->proc.jobid)); + (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); d[n].proc.rank = p->proc.vpid; (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); pmix1_value_load(&d[n].value, &p->value); @@ -436,7 +436,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, opal_value_t *iptr; size_t n; - if (NULL == host_module || NULL == host_module->lookup) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->lookup) { return PMIX_ERR_NOT_SUPPORTED; } @@ -467,7 +467,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, } /* pass it up */ - rc = host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy); + rc = pmix112_host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -486,7 +486,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, opal_value_t *iptr; size_t n; - if (NULL == host_module || NULL == host_module->unpublish) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->unpublish) { return PMIX_SUCCESS; } @@ -517,7 +517,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, } /* pass it up */ - rc = host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy); + rc = pmix112_host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -534,7 +534,7 @@ static void opal_spncbfunc(int status, opal_jobid_t jobid, void *cbdata) if (NULL != opalcaddy->spwncbfunc) { rc = pmix1_convert_opalrc(status); /* convert the jobid */ - (void)snprintf(nspace, PMIX_MAX_NSLEN, "%s", opal_convert_jobid_to_string(jobid)); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); opalcaddy->spwncbfunc(rc, nspace, opalcaddy->cbdata); } OBJ_RELEASE(opalcaddy); @@ -552,7 +552,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, size_t k, n; int rc; - if (NULL == host_module || NULL == host_module->spawn) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->spawn) { return PMIX_ERR_NOT_SUPPORTED; } @@ -609,7 +609,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, } /* pass it up */ - rc = host_module->spawn(&proc, &opalcaddy->info, &opalcaddy->apps, opal_spncbfunc, opalcaddy); + rc = pmix112_host_module->spawn(&proc, &opalcaddy->info, &opalcaddy->apps, opal_spncbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(opalcaddy); @@ -629,7 +629,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, size_t n; opal_value_t *oinfo; - if (NULL == host_module || NULL == host_module->connect) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->connect) { return PMIX_ERR_NOT_SUPPORTED; } @@ -665,7 +665,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, } /* pass it up */ - rc = host_module->connect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + rc = pmix112_host_module->connect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -684,7 +684,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro size_t n; opal_value_t *oinfo; - if (NULL == host_module || NULL == host_module->disconnect) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->disconnect) { return PMIX_ERR_NOT_SUPPORTED; } @@ -720,7 +720,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro } /* pass it up */ - rc = host_module->disconnect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + rc = pmix112_host_module->disconnect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -753,7 +753,7 @@ static pmix_status_t server_register_events(const pmix_info_t info[], size_t nin } /* pass it up */ - rc = host_module->register_events(&opalcaddy->info, opal_opcbfunc, opalcaddy); + rc = pmix112_host_module->register_events(&opalcaddy->info, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } @@ -771,10 +771,10 @@ static pmix_status_t server_listener_fn(int listening_sd, { int rc; - if (NULL == host_module || NULL == host_module->listener) { + if (NULL == pmix112_host_module || NULL == pmix112_host_module->listener) { return PMIX_ERR_NOT_SUPPORTED; } - rc = host_module->listener(listening_sd, cbfunc); + rc = pmix112_host_module->listener(listening_sd, cbfunc); return pmix1_convert_opalrc(rc); } diff --git a/opal/mca/pmix/pmix112/pmix1_server_south.c b/opal/mca/pmix/pmix112/pmix1_server_south.c new file mode 100644 index 00000000000..510c9def271 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix1_server_south.c @@ -0,0 +1,440 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" +#include "opal/mca/pmix/base/base.h" +#include "pmix1.h" + +#include "pmix.h" +#include "pmix_server.h" + +/**** S.O.U.T.H.B.O.U.N.D I.N.T.E.R.F.A.C.E.S ****/ + +/* These are the interfaces used by the OMPI/ORTE/OPAL layer to call + * down into the embedded PMIx server. */ + +extern pmix_server_module_t pmix112_module; +extern opal_pmix_server_module_t *pmix112_host_module; +static char *dbgvalue=NULL; +static int errhdler_ref = 0; + +static void release_cbfunc(void *cbdata) +{ + pmix1_opalcaddy_t *cd = (pmix1_opalcaddy_t*)cbdata; + OBJ_RELEASE(cd); +} +static void myerr(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo) +{ + int rc; + opal_namelist_t *nm; + opal_value_t *iptr; + size_t n; + pmix1_opalcaddy_t *cd; + + /* convert the incoming status */ + rc = pmix1_convert_rc(status); + + /* setup the caddy */ + cd = OBJ_NEW(pmix1_opalcaddy_t); + + /* convert the array of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); + nm->name.vpid = procs[n].rank; + opal_list_append(&cd->procs, &nm->super); + } + + /* convert the array of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(info[n].key); + pmix1_value_unload(iptr, &info[n].value); + opal_list_append(&cd->info, &iptr->super); + } + + /* call the base errhandler */ + opal_pmix_base_errhandler(rc, &cd->procs, &cd->info, release_cbfunc, cd); +} + +static void errreg_cbfunc(pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%d", + status, errhandler_ref); +} + +int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info) +{ + pmix_status_t rc; + int dbg; + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; + + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + if (PMIX_SUCCESS != (rc = PMIx_server_init(&pmix112_module, pinfo, sz))) { + PMIX_INFO_FREE(pinfo, sz); + return pmix1_convert_rc(rc); + } + PMIX_INFO_FREE(pinfo, sz); + + /* record the host module */ + pmix112_host_module = module; + + /* register the errhandler */ + PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL); + return OPAL_SUCCESS; +} + +int pmix1_server_finalize(void) +{ + pmix_status_t rc; + + /* deregister the errhandler */ + PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); + + rc = PMIx_server_finalize(); + return pmix1_convert_rc(rc); +} + +int pmix1_server_gen_regex(const char *input, char **regex) +{ + pmix_status_t rc; + + rc = PMIx_generate_regex(input, regex); + return pmix1_convert_rc(rc); +} + + +int pmix1_server_gen_ppn(const char *input, char **ppn) +{ + pmix_status_t rc; + + rc = PMIx_generate_ppn(input, ppn); + return pmix1_convert_rc(rc); +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix1_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix1_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + opal_value_t *kv, *k2; + pmix_info_t *pinfo, *pmap; + size_t sz, szmap, m, n; + char nspace[PMIX_MAX_NSLEN]; + pmix_status_t rc; + pmix1_opcaddy_t *op; + opal_list_t *pmapinfo; + opal_pmix1_jobid_trkr_t *job; + + /* convert the jobid */ + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + + /* store this job in our list of known nspaces */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_pmix112_component.jobids, &job->super); + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { + pinfo[n].value.type = PMIX_INFO_ARRAY; + /* the value contains a list of values - convert + * that list to another array */ + pmapinfo = (opal_list_t*)kv->data.ptr; + szmap = opal_list_get_size(pmapinfo); + PMIX_INFO_CREATE(pmap, szmap); + pinfo[n].value.data.array.array = (struct pmix_info_t*)pmap; + pinfo[n].value.data.array.size = szmap; + m = 0; + OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { + (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pmap[m].value, k2); + ++m; + } + } else { + pmix1_value_load(&pinfo[n].value, kv); + } + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->info = pinfo; + op->sz = sz; + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} + +void pmix1_server_deregister_nspace(opal_jobid_t jobid) +{ + opal_pmix1_jobid_trkr_t *jptr; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + /* found it - tell the server to deregister */ + PMIx_server_deregister_nspace(jptr->nspace); + /* now get rid of it from our list */ + opal_list_remove_item(&mca_pmix_pmix112_component.jobids, &jptr->super); + OBJ_RELEASE(jptr); + return; + } + } +} + +int pmix1_server_register_client(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t rc; + pmix1_opcaddy_t *op; + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); + op->p.rank = proc->vpid; + + rc = PMIx_server_register_client(&op->p, uid, gid, server_object, + opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} + +void pmix1_server_deregister_client(const opal_process_name_t *proc) +{ + opal_pmix1_jobid_trkr_t *jptr; + pmix_proc_t p; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + /* found it - tell the server to deregister */ + (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + PMIx_server_deregister_client(&p); + return; + } + } +} + + +int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env) +{ + pmix_status_t rc; + pmix_proc_t p; + + /* convert the jobid */ + (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); + p.rank = proc->vpid; + + rc = PMIx_server_setup_fork(&p, env); + return pmix1_convert_rc(rc); +} + +/* this is the call back up from the embedded PMIx server that + * will contain the returned data. Note that the embedded server + * "owns" the data and will free it upon return from this function */ +static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbdata) +{ + int rc; + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + + rc = pmix1_convert_rc(status); + if (NULL != op->mdxcbfunc) { + op->mdxcbfunc(rc, data, sz, op->cbdata, NULL, NULL); + } + OBJ_RELEASE(op); +} + +int pmix1_server_dmodex(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opcaddy_t *op; + pmix_status_t rc; + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->mdxcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); + op->p.rank = proc->vpid; + + /* find the internally-cached data for this proc */ + rc = PMIx_server_dmodex_request(&op->p, dmdx_response, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} + +int pmix1_server_notify_error(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, psz, esz, n; + pmix_proc_t *ps, *eps; + pmix_status_t rc; + pmix1_opcaddy_t *op; + opal_namelist_t *nm; + + /* convert the list of procs */ + if (NULL != procs) { + psz = opal_list_get_size(procs); + PMIX_PROC_CREATE(ps, psz); + n = 0; + OPAL_LIST_FOREACH(nm, procs, opal_namelist_t) { + (void)opal_snprintf_jobid(ps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); + ps[n].rank = (int)nm->name.vpid; + ++n; + } + } else { + psz = 0; + ps = NULL; + } + if (NULL != error_procs) { + esz = opal_list_get_size(error_procs); + PMIX_PROC_CREATE(eps, esz); + n = 0; + OPAL_LIST_FOREACH(nm, error_procs, opal_namelist_t) { + (void)opal_snprintf_jobid(eps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); + eps[n].rank = (int)nm->name.vpid; + ++n; + } + } else { + esz = 0; + eps = NULL; + } + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, kv); + } + } else { + sz = 0; + pinfo = NULL; + } + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->procs = ps; + op->nprocs = psz; + op->error_procs = eps; + op->nerror_procs = esz; + op->info = pinfo; + op->sz = sz; + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + rc = pmix1_convert_opalrc(status); + rc = PMIx_Notify_error(rc, ps, psz, eps, esz, + pinfo, sz, opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} diff --git a/opal/mca/pmix/pmix1xx/pmix_pmix1.c b/opal/mca/pmix/pmix112/pmix_pmix1.c similarity index 86% rename from opal/mca/pmix/pmix1xx/pmix_pmix1.c rename to opal/mca/pmix/pmix112/pmix_pmix1.c index 4bcee893b04..5c95e0f87dc 100644 --- a/opal/mca/pmix/pmix1xx/pmix_pmix1.c +++ b/opal/mca/pmix/pmix112/pmix_pmix1.c @@ -37,7 +37,7 @@ #include "pmix1.h" #include "opal/mca/pmix/base/base.h" -#include "opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h" +#include "opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h" /**** C.O.M.M.O.N I.N.T.E.R.F.A.C.E.S ****/ @@ -47,56 +47,58 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid); static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); -const opal_pmix_base_module_t opal_pmix_pmix1xx_module = { +const opal_pmix_base_module_t opal_pmix_pmix112_module = { /* client APIs */ - pmix1_client_init, - pmix1_client_finalize, - pmix1_initialized, - pmix1_abort, - pmix1_commit, - pmix1_fence, - pmix1_fencenb, - pmix1_put, - pmix1_get, - pmix1_getnb, - pmix1_publish, - pmix1_publishnb, - pmix1_lookup, - pmix1_lookupnb, - pmix1_unpublish, - pmix1_unpublishnb, - pmix1_spawn, - pmix1_spawnnb, - pmix1_connect, - pmix1_connectnb, - pmix1_disconnect, - pmix1_disconnectnb, - pmix1_resolve_peers, - pmix1_resolve_nodes, + .init = pmix1_client_init, + .finalize = pmix1_client_finalize, + .initialized = pmix1_initialized, + .abort = pmix1_abort, + .commit = pmix1_commit, + .fence = pmix1_fence, + .fence_nb = pmix1_fencenb, + .put = pmix1_put, + .get = pmix1_get, + .get_nb = pmix1_getnb, + .publish = pmix1_publish, + .publish_nb = pmix1_publishnb, + .lookup = pmix1_lookup, + .lookup_nb = pmix1_lookupnb, + .unpublish = pmix1_unpublish, + .unpublish_nb = pmix1_unpublishnb, + .spawn = pmix1_spawn, + .spawn_nb = pmix1_spawnnb, + .connect = pmix1_connect, + .connect_nb = pmix1_connectnb, + .disconnect = pmix1_disconnect, + .disconnect_nb = pmix1_disconnectnb, + .resolve_peers = pmix1_resolve_peers, + .resolve_nodes = pmix1_resolve_nodes, /* server APIs */ - pmix1_server_init, - pmix1_server_finalize, - pmix1_server_gen_regex, - pmix1_server_gen_ppn, - pmix1_server_register_nspace, - pmix1_server_register_client, - pmix1_server_setup_fork, - pmix1_server_dmodex, - pmix1_server_notify_error, + .server_init = pmix1_server_init, + .server_finalize = pmix1_server_finalize, + .generate_regex = pmix1_server_gen_regex, + .generate_ppn = pmix1_server_gen_ppn, + .server_register_nspace = pmix1_server_register_nspace, + .server_deregister_nspace = pmix1_server_deregister_nspace, + .server_register_client = pmix1_server_register_client, + .server_deregister_client = pmix1_server_deregister_client, + .server_setup_fork = pmix1_server_setup_fork, + .server_dmodex_request = pmix1_server_dmodex, + .server_notify_error = pmix1_server_notify_error, /* utility APIs */ - PMIx_Get_version, - opal_pmix_base_register_handler, - opal_pmix_base_deregister_handler, - pmix1_store_local, - pmix1_get_nspace, - pmix1_register_jobid + .get_version = PMIx_Get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = pmix1_store_local, + .get_nspace = pmix1_get_nspace, + .register_jobid = pmix1_register_jobid }; static const char *pmix1_get_nspace(opal_jobid_t jobid) { opal_pmix1_jobid_trkr_t *jptr; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { return jptr->nspace; } @@ -109,7 +111,7 @@ static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace) opal_pmix1_jobid_trkr_t *jptr; /* if we don't already have it, add this to our jobid tracker */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix112_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { return; } @@ -117,7 +119,7 @@ static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace) jptr = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; - opal_list_append(&mca_pmix_pmix1xx_component.jobids, &jptr->super); + opal_list_append(&mca_pmix_pmix112_component.jobids, &jptr->super); } pmix_status_t pmix1_convert_opalrc(int rc) @@ -171,6 +173,8 @@ pmix_status_t pmix1_convert_opalrc(int rc) case OPAL_EXISTS: return PMIX_EXISTS; + case OPAL_ERR_SILENT: + return PMIX_ERR_SILENT; case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: @@ -254,6 +258,8 @@ int pmix1_convert_rc(pmix_status_t rc) case PMIX_EXISTS: return OPAL_EXISTS; + case PMIX_ERR_SILENT: + return OPAL_ERR_SILENT; case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: @@ -451,7 +457,8 @@ int pmix1_value_unload(opal_value_t *kv, case PMIX_BYTE_OBJECT: kv->type = OPAL_BYTE_OBJECT; if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) { - kv->data.bo.bytes = (uint8_t*)v->data.bo.bytes; + kv->data.bo.bytes = (uint8_t*)malloc(v->data.bo.size); + memcpy(kv->data.bo.bytes, v->data.bo.bytes, v->data.bo.size); kv->data.bo.size = (int)v->data.bo.size; } else { kv->data.bo.bytes = NULL; diff --git a/opal/mca/pmix/pmix1xx/pmix_pmix1_component.c b/opal/mca/pmix/pmix112/pmix_pmix1_component.c similarity index 71% rename from opal/mca/pmix/pmix1xx/pmix_pmix1_component.c rename to opal/mca/pmix/pmix112/pmix_pmix1_component.c index 840d02f4486..a53aa14d7f4 100644 --- a/opal/mca/pmix/pmix1xx/pmix_pmix1_component.c +++ b/opal/mca/pmix/pmix112/pmix_pmix1_component.c @@ -24,17 +24,17 @@ #include "pmix1.h" /* - * Public string showing the pmix pmix1xx component version number + * Public string showing the pmix pmix112 component version number */ -const char *opal_pmix_pmix1xx_component_version_string = - "OPAL pmix1xx pmix MCA component version " OPAL_VERSION; +const char *opal_pmix_pmix112_component_version_string = + "OPAL pmix112 pmix MCA component version " OPAL_VERSION; /* * Local function */ -static int pmix1xx_open(void); -static int pmix1xx_close(void); -static int pmix1xx_component_query(mca_base_module_t **module, int *priority); +static int pmix112_open(void); +static int pmix112_close(void); +static int pmix112_component_query(mca_base_module_t **module, int *priority); /* @@ -42,7 +42,7 @@ static int pmix1xx_component_query(mca_base_module_t **module, int *priority); * and pointers to our public functions in it */ -mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component = { +mca_pmix_pmix1_component_t mca_pmix_pmix112_component = { { /* First, the mca_component_t struct containing meta information about the component itself */ @@ -55,15 +55,15 @@ mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component = { /* Component name and version */ - .mca_component_name = "pmix1xx", + .mca_component_name = "pmix112", MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION), /* Component open and close functions */ - .mca_open_component = pmix1xx_open, - .mca_close_component = pmix1xx_close, - .mca_query_component = pmix1xx_component_query, + .mca_open_component = pmix112_open, + .mca_close_component = pmix112_close, + .mca_query_component = pmix112_component_query, }, /* Next the MCA v1.0.0 component meta data */ .base_data = { @@ -74,20 +74,20 @@ mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component = { .native_launch = false }; -static int pmix1xx_open(void) +static int pmix112_open(void) { - OBJ_CONSTRUCT(&mca_pmix_pmix1xx_component.jobids, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_pmix112_component.jobids, opal_list_t); return OPAL_SUCCESS; } -static int pmix1xx_close(void) +static int pmix112_close(void) { - OPAL_LIST_DESTRUCT(&mca_pmix_pmix1xx_component.jobids); + OPAL_LIST_DESTRUCT(&mca_pmix_pmix112_component.jobids); return OPAL_SUCCESS; } -static int pmix1xx_component_query(mca_base_module_t **module, int *priority) +static int pmix112_component_query(mca_base_module_t **module, int *priority) { char *t, *id; @@ -100,6 +100,6 @@ static int pmix1xx_component_query(mca_base_module_t **module, int *priority) /* we could be a server, so we still need to be considered */ *priority = 5; } - *module = (mca_base_module_t *)&opal_pmix_pmix1xx_module; + *module = (mca_base_module_t *)&opal_pmix_pmix112_module; return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/pmix120/Makefile.am b/opal/mca/pmix/pmix120/Makefile.am new file mode 100644 index 00000000000..2d413b2ff5e --- /dev/null +++ b/opal/mca/pmix/pmix120/Makefile.am @@ -0,0 +1,53 @@ +# +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = autogen.subdirs + +SUBDIRS = pmix + +sources = \ + pmix120.h \ + pmix_pmix120_component.c \ + pmix_pmix120.c \ + pmix120_client.c \ + pmix120_server_south.c \ + pmix120_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_pmix120_DSO +component_noinst = +component_install = mca_pmix_pmix120.la +else +component_noinst = libmca_pmix_pmix120.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_pmix120_la_SOURCES = $(sources) +mca_pmix_pmix120_la_CFLAGS = $(opal_pmix_pmix120_CFLAGS) +mca_pmix_pmix120_la_CPPFLAGS = \ + -I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS) +mca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS) +mca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS) +mca_pmix_pmix120_la_DEPENDENCIES = $(mca_pmix_pmix120_la_LIBADD) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_pmix120_la_SOURCES =$(sources) +libmca_pmix_pmix120_la_CFLAGS = $(opal_pmix_pmix120_CFLAGS) +libmca_pmix_pmix120_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS) +libmca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS) +libmca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS) +libmca_pmix_pmix120_la_DEPENDENCIES = $(libmca_pmix_pmix120_la_LIBADD) diff --git a/opal/mca/pmix/pmix120/autogen.subdirs b/opal/mca/pmix/pmix120/autogen.subdirs new file mode 100644 index 00000000000..f4fd6e846e1 --- /dev/null +++ b/opal/mca/pmix/pmix120/autogen.subdirs @@ -0,0 +1 @@ +pmix diff --git a/opal/mca/pmix/pmix120/configure.m4 b/opal/mca/pmix/pmix120/configure.m4 new file mode 100644 index 00000000000..b6cfe6e2fe1 --- /dev/null +++ b/opal/mca/pmix/pmix120/configure.m4 @@ -0,0 +1,75 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_pmix120_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_pmix120_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/pmix120/Makefile]) + + OPAL_VAR_SCOPE_PUSH([PMIX_VERSION opal_pmix_pmix120_save_CPPFLAGS opal_pmix_pmix120_save_LDFLAGS opal_pmix_pmix120_save_LIBS opal_pmix_pmix120_basedir opal_pmix_pmix120_save_cflags]) + + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AC_MSG_WARN([using an external pmix; disqualifiying this component]) + opal_pmix_pmix120_happy=0], + [PMIX_VERSION= + opal_pmix_pmix120_basedir=opal/mca/pmix/pmix120 + + opal_pmix_pmix120_save_CFLAGS=$CFLAGS + opal_pmix_pmix120_save_CPPFLAGS=$CPPFLAGS + opal_pmix_pmix120_save_LDFLAGS=$LDFLAGS + opal_pmix_pmix120_save_LIBS=$LIBS + + opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + AS_IF([test "$enable_debug" = "yes"], + [opal_pmix_pmix120_args="--enable-debug $opal_pmix_pmix120_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], + [opal_pmix_pmix120_args="--disable-debug $opal_pmix_pmix120_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) + CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" + + OPAL_CONFIG_SUBDIR([$opal_pmix_pmix120_basedir/pmix], + [$opal_pmix_pmix120_args $opal_subdir_args 'CFLAGS=$CFLAGS' 'CPPFLAGS=$CPPFLAGS'], + [opal_pmix_pmix120_happy=1], [opal_pmix_pmix120_happy=0]) + + AS_IF([test $opal_pmix_pmix120_happy -eq 1], + [PMIX_VERSION="internal v`$srcdir/$opal_pmix_pmix120_basedir/pmix/config/pmix_get_version.sh $srcdir/$opal_pmix_pmix120_basedir/pmix/VERSION`" + # Build flags for our Makefile.am + opal_pmix_pmix120_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_pmix_pmix120_basedir"'/pmix/libpmix.la' + opal_pmix_pmix120_CPPFLAGS='-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix120/pmix/include/pmix -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix120/pmix/include -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix120/pmix -I$(OPAL_TOP_SRCDIR)/opal/mca/pmix/pmix120/pmix' + AC_SUBST([opal_pmix_pmix120_LIBS]) + AC_SUBST([opal_pmix_pmix120_CPPFLAGS])]) + + CFLAGS=$opal_pmix_pmix120_save_CFLAGS + CPPFLAGS=$opal_pmix_pmix120_save_CPPFLAGS + LDFLAGS=$opal_pmix_pmix120_save_LDFLAGS + LIBS=$opal_pmix_pmix120_save_LIBS + ]) + + AS_IF([test $opal_pmix_pmix120_happy -eq 1], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix120/pmix/AUTHORS b/opal/mca/pmix/pmix120/pmix/AUTHORS new file mode 100644 index 00000000000..c429d324c00 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/AUTHORS @@ -0,0 +1,30 @@ +PMIx Authors +================ + +The following cumulative list contains the names and GitHub IDs of +all individuals who have committed code to the PMIx repository. + +Email Name Affiliation(s) +------------------------------- --------------------------- ------------------- +alinask Elena Shipunova Mellanox +annu13 Annapurna Dasari Intel +artpol84 Artem Polyakov Mellanox +dsolt Dave Solt IBM +ggouaillardet Gilles Gouaillardet RIST +hjelmn Nathan Hjelm LANL +igor-ivanov Igor Ivanov Mellanox +jladd-mlnx Joshua Ladd Mellanox +jsquyres Jeff Squyres Cisco, IU +nkogteva Nadezhda Kogteva Mellanox +rhc54 Ralph Castain LANL, Cisco, Intel +------------------------------- --------------------------- ------------------- + +Affiliation abbreviations: +-------------------------- +Cisco = Cisco Systems, Inc. +IBM = International Business Machines, Inc. +Intel = Intel, Inc. +IU = Indiana University +LANL = Los Alamos National Laboratory +Mellanox = Mellanox +RIST = Research Organization for Information Science and Technology diff --git a/opal/mca/pmix/pmix120/pmix/INSTALL b/opal/mca/pmix/pmix120/pmix/INSTALL new file mode 100644 index 00000000000..005301463ff --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/INSTALL @@ -0,0 +1,88 @@ +Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2005 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2005 The Regents of the University of California. + All rights reserved. +Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + + +For More Information +==================== + +This file is a *very* short overview of building and installing +the PMIx library. Much more information is available on the +PMIx web site (e.g., see the FAQ section): + + http://pmix.github.io/pmix/master + + +Developer Builds +================ + +If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked +out from Git), you should read the HACKING file before attempting to +build PMIx. You must then run: + +shell$ ./autogen.sh + +You will need very recent versions of GNU Autoconf, Automake, and +Libtool. If autogen.sh fails, read the HACKING file. If anything +else fails, read the HACKING file. Finally, we suggest reading the +HACKING file. + +*** NOTE: Developer's copies of PMIx typically include a large +performance penalty at run-time because of extra debugging overhead. + + +User Builds +=========== + +Building PMIx is typically a combination of running "configure" +and "make". Execute the following commands to install the PMIx +system from within the directory at the top of the tree: + +shell$ ./configure --prefix=/where/to/install +[...lots of output...] +shell$ make all install + +If you need special access to install, then you can execute "make +all" as a user with write permissions in the build tree, and a +separate "make install" as a user with write permissions to the +install tree. + +Compiling support for specific compilers and environments may +require additional command line flags when running configure. See the +README file for more details. Note that VPATH builds are fully +supported. For example: + +shell$ gtar zxf pmix-X.Y.Z.tar.gz +shell$ cd pmix-X.Y.Z +shell$ mkdir build +shell$ cd build +shell$ ../configure ...your options... +[...lots of output...] +shell$ make all install + +Parallel builds are also supported (although some versions of "make", +such as GNU make, will only use the first target listed on the command +line when executable parallel builds). For example (assume GNU make): + +shell$ make -j 4 all +[...lots of output...] +shell$ make install + +Parallel make is generally only helpful in the build phase; the +installation process is mostly serial and does not benefit much from +parallel make. + diff --git a/opal/mca/pmix/pmix120/pmix/LICENSE b/opal/mca/pmix/pmix120/pmix/LICENSE new file mode 100644 index 00000000000..f9e6f047910 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/LICENSE @@ -0,0 +1,93 @@ +Most files in this release are marked with the copyrights of the +organizations who have edited them. The copyrights below are in no +particular order and generally reflect members of the Open MPI core +team who have contributed code that may or may not have been ported +to PMIx. Per the terms of that LICENSE, we include the list here. +The copyrights for code used under license from other parties +are included in the corresponding files. + +Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2010 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2008 The Regents of the University of California. + All rights reserved. +Copyright (c) 2006-2010 Los Alamos National Security, LLC. All rights + reserved. +Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. +Copyright (c) 2006-2011 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. +Copyright (c) 2006-2010 The University of Houston. All rights reserved. +Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. +Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. +Copyright (c) 2007-2010 IBM Corporation. All rights reserved. +Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing + Centre, Federal Republic of Germany +Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany +Copyright (c) 2007 Evergrid, Inc. All rights reserved. +Copyright (c) 2008 Chelsio, Inc. All rights reserved. +Copyright (c) 2008-2009 Institut National de Recherche en + Informatique. All rights reserved. +Copyright (c) 2007 Lawrence Livermore National Security, LLC. + All rights reserved. +Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. +Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2009 Bull SAS. All rights reserved. +Copyright (c) 2010 ARM ltd. All rights reserved. +Copyright (c) 2010-2011 Alex Brick . All rights reserved. +Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights + reserved. +Copyright (c) 2013-2014 Intel, Inc. All rights reserved. +Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +The following LICENSE pertains to both PMIx and any code ported +from Open MPI. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + +- Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +The copyright holders provide no reassurances that the source code +provided does not infringe any patent, copyright, or any other +intellectual property rights of third parties. The copyright holders +disclaim any liability to any recipient for claims brought against +recipient by any third party for infringement of that parties +intellectual property rights. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/opal/mca/pmix/pmix120/pmix/Makefile.am b/opal/mca/pmix/pmix120/pmix/Makefile.am new file mode 100644 index 00000000000..5891e9c09f0 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/Makefile.am @@ -0,0 +1,96 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Note that the -I directory must *exactly* match what was specified +# via AC_CONFIG_MACRO_DIR in configure.ac. +ACLOCAL_AMFLAGS = -I ./config + +headers = +sources = +nodist_headers = +EXTRA_DIST = + +# Only install the valgrind suppressions file if we're building in +# standalone mode +dist_pmixdata_DATA = +if ! PMIX_EMBEDDED_MODE +dist_pmixdata_DATA += contrib/pmix-valgrind.supp +endif + +man_MANS = \ + man/man3/pmix_init.3 \ + man/man3/pmix_finalize.3 \ + man/man3/pmix_initialized.3 \ + man/man3/pmix_abort.3 \ + man/man3/pmix_put.3 \ + man/man3/pmix_commit.3 \ + man/man7/pmix.7 \ + man/man7/pmix_constants.7 + +include config/Makefile.am +include include/Makefile.am +include src/class/Makefile.am +include src/include/Makefile.am +include src/buffer_ops/Makefile.am +include src/util/Makefile.am +include src/usock/Makefile.am +include src/client/Makefile.am +include src/server/Makefile.am +include src/sec/Makefile.am +include src/common/Makefile.am + +if PMIX_EMBEDDED_MODE +noinst_LTLIBRARIES = libpmix.la +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = +else +lib_LTLIBRARIES = libpmix.la +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) +endif + + +if ! PMIX_EMBEDDED_MODE +SUBDIRS = . test +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) +endif + +nroff: + @for file in $(man_MANS); do \ + source=`echo $$file | sed -e 's@/man[0-9]@@'`; \ + contrib/md2nroff.pl --source=$$source.md; \ + done + +EXTRA_DIST += AUTHORS README INSTALL VERSION LICENSE autogen.sh \ + config/pmix_get_version.sh $(man_MANS) \ + contrib/platform/optimized \ + test/test_common.h test/cli_stages.h \ + test/server_callbacks.h test/test_fence.h \ + test/test_publish.h test/test_resolve_peers.h \ + test/test_spawn.h test/utils.h test/test_cd.h \ + examples/client.c examples/dmodex.c examples/dynamic.c \ + examples/fault.c examples/pub.c + + +dist-hook: + env LS_COLORS= sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(PMIX_VERSION)" "$(PMIX_REPO_REV)" + diff --git a/opal/mca/pmix/pmix120/pmix/NEWS b/opal/mca/pmix/pmix120/pmix/NEWS new file mode 100644 index 00000000000..5596e5cc4c0 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/NEWS @@ -0,0 +1,46 @@ +Copyright (c) 2015 Intel, Inc. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This file contains the main features as well as overviews of specific +bug fixes (and other actions) for each version of PMIx since +version 1.0. + +As more fully described in the "Software Version Number" section in +the README file, PMIx typically maintains two separate version +series simultaneously - the current release and one that is locked +to only bug fixes. Since these series are semi-independent of each +other, a single NEWS-worthy item might apply to different series. For +example, a bug might be fixed in the master, and then moved to the +current release as well as the "stable" bug fix release branch. + + +Master (not on release branches yet) +------------------------------------ + + + +1.1.1 +----- +- Fix an issue where the example and test programs + were incorrectly being installed. Thanks to Orion + Poplawski for reporting it + +1.1.0 +----- +- major update of APIs to reflect comments received from 1.0.0 + non-production release +- fixed thread-safety issues +- fixed a range of pack/unpack issues +- added unit tests for all APIs + + +1.0.0 +------ +Initial public release of draft APIs for comment - not production +intended diff --git a/opal/mca/pmix/pmix120/pmix/README b/opal/mca/pmix/pmix120/pmix/README new file mode 100644 index 00000000000..55b7c61f5e3 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/README @@ -0,0 +1,315 @@ +Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2007 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2007 The Regents of the University of California. + All rights reserved. +Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. +Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2007 Myricom, Inc. All rights reserved. +Copyright (c) 2008 IBM Corporation. All rights reserved. +Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2011 University of Houston. All rights reserved. +Copyright (c) 2013-2015 Intel, Inc. All rights reserved +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +When submitting questions and problems, be sure to include as much +extra information as possible. This web page details all the +information that we request in order to provide assistance: + + http://pmix.github.io/master/community/help/ + +The best way to report bugs, send comments, or ask questions is to +sign up on the PMIx mailing list, which is hosted by GoogleGroups: + + pmix@googlegroups.com + +Because of spam, only subscribers are allowed to post to this list +(ensure that you subscribe with and post from exactly the same e-mail +address -- joe@example.com is considered different than +joe@mycomputer.example.com!). You can subscribe to the list here: + + https://groups.google.com/d/forum/pmix + +Thanks for your time. + +=========================================================================== + +More information is available in the PMIx FAQ: + + http://pmix.github.io/master/faq/ + +We are in early days, so please be patient - info will grow as questions +are addressed. + +=========================================================================== + +The following abbreviated list of release notes applies to this code +base as of this writing (12 November 2015): + +General notes +------------- + +- The majority of PMIx's documentation is here in this file, the + included man pages, and on the web site FAQ + (http://pmix.github.io/master/faq). This will eventually be + supplemented with cohesive installation and user documentation files. + +- Systems that have been tested are: + - Linux (various flavors/distros), 32 bit, with gcc + - Linux (various flavors/distros), 64 bit (x86), with gcc, Intel, + and Portland (*) + - OS X (10.7 and above), 32 and 64 bit (x86_64), with gcc (*) + +(*) Compiler Notes +-------------- + +- The Portland Group compilers prior to version 7.0 require the + "-Msignextend" compiler flag to extend the sign bit when converting + from a shorter to longer integer. This is is different than other + compilers (such as GNU). When compiling PMIx with the Portland + compiler suite, the following flags should be passed to PMIx's + configure script: + + shell$ ./configure CFLAGS=-Msignextend ... + + This will compile PMIx with the proper compile flags + +- Running on nodes with different endian and/or different datatype + sizes within a single parallel job is supported in this release. + However, PMIx does not resize data when datatypes differ in size + (for example, sending a 4 byte double and receiving an 8 byte + double will fail). + + +=========================================================================== + +Building PMIx +----------------- + +PMIx uses a traditional configure script paired with "make" to +build. Typical installs can be of the pattern: + +--------------------------------------------------------------------------- +shell$ ./configure [...options...] +shell$ make all install +--------------------------------------------------------------------------- + +There are many available configure options (see "./configure --help" +for a full list); a summary of the more commonly used ones follows: + +INSTALLATION OPTIONS + +--prefix= + Install PMIx into the base directory named . Hence, + PMIx will place its executables in /bin, its header + files in /include, its libraries in /lib, etc. + +--disable-shared + By default, libpmix is built as a shared library. This switch disables + this default; it is really only useful when used with + --enable-static. Specifically, this option does *not* imply + --enable-static; enabling static libraries and disabling shared + libraries are two independent options. + +--enable-static + Build libpmix as a static library. Note that this option does *not* imply + --disable-shared; enabling static libraries and disabling shared + libraries are two independent options. + +--with-platform=FILE + Load configure options for the build from FILE. Options on the + command line that are not in FILE are also used. Options on the + command line and in FILE are replaced by what is in FILE. + +Once PMIx has been built and installed, it is safe to run "make +clean" and/or remove the entire build tree. + +VPATH and parallel builds are fully supported. + +Generally speaking, the only thing that users need to do to use PMIx +is ensure that /lib is in their LD_LIBRARY_PATH. Users may +need to ensure to set LD_LIBRARY_PATH in their shell setup files (e.g., +.bashrc, .cshrc) so that non-interactive rsh/ssh-based logins will +be able to find the PMIx library. + +=========================================================================== + +PMIx Version Numbers and Binary Compatibility +------------------------------------------------- + +PMIx has two sets of version numbers that are likely of interest +to end users / system administrator: + + * Software version number + * Shared library version numbers + +Both are described below, followed by a discussion of application +binary interface (ABI) compatibility implications. + +Software Version Number +----------------------- + +PMIx's version numbers are the union of several different values: +major, minor, release, and an optional quantifier. + + * Major: The major number is the first integer in the version string + (e.g., v1.2.3). Changes in the major number typically indicate a + significant change in the code base and/or end-user + functionality. The major number is always included in the version + number. + + * Minor: The minor number is the second integer in the version + string (e.g., v1.2.3). Changes in the minor number typically + indicate a incremental change in the code base and/or end-user + functionality. The minor number is always included in the version + number: + + * Release: The release number is the third integer in the version + string (e.g., v1.2.3). Changes in the release number typically + indicate a bug fix in the code base and/or end-user + functionality. + + * Quantifier: PMIx version numbers sometimes have an arbitrary + string affixed to the end of the version number. Common strings + include: + + o aX: Indicates an alpha release. X is an integer indicating + the number of the alpha release (e.g., v1.2.3a5 indicates the + 5th alpha release of version 1.2.3). + o bX: Indicates a beta release. X is an integer indicating + the number of the beta release (e.g., v1.2.3b3 indicates the 3rd + beta release of version 1.2.3). + o rcX: Indicates a release candidate. X is an integer + indicating the number of the release candidate (e.g., v1.2.3rc4 + indicates the 4th release candidate of version 1.2.3). + +Although the major, minor, and release values (and optional +quantifiers) are reported in PMIx nightly snapshot tarballs, the +filenames of these snapshot tarballs follow a slightly different +convention. + +Specifically, the snapshot tarball filename contains three distinct +values: + + * Most recent Git tag name on the branch from which the tarball was + created. + + * An integer indicating how many Git commits have occurred since + that Git tag. + + * The Git hash of the tip of the branch. + +For example, a snapshot tarball filename of +"pmix-v1.0.2-57-gb9f1fd9.tar.bz2" indicates that this tarball was +created from the v1.0 branch, 57 Git commits after the "v1.0.2" tag, +specifically at Git hash gb9f1fd9. + +PMIx's Git master branch contains a single "dev" tag. For example, +"pmix-dev-8-gf21c349.tar.bz2" represents a snapshot tarball created +from the master branch, 8 Git commits after the "dev" tag, +specifically at Git hash gf21c349. + +The exact value of the "number of Git commits past a tag" integer is +fairly meaningless; its sole purpose is to provide an easy, +human-recognizable ordering for snapshot tarballs. + +Shared Library Version Number +----------------------------- + +PMIx uses the GNU Libtool shared library versioning scheme. + +NOTE: Only official releases of PMIx adhere to this versioning + scheme. "Beta" releases, release candidates, and nightly + tarballs, developer snapshots, and Git snapshot tarballs likely + will all have arbitrary/meaningless shared library version + numbers. + +The GNU Libtool official documentation details how the versioning +scheme works. The quick version is that the shared library versions +are a triple of integers: (current,revision,age), or "c:r:a". This +triple is not related to the PMIx software version number. There +are six simple rules for updating the values (taken almost verbatim +from the Libtool docs): + + 1. Start with version information of "0:0:0" for each shared library. + + 2. Update the version information only immediately before a public + release of your software. More frequent updates are unnecessary, + and only guarantee that the current interface number gets larger + faster. + + 3. If the library source code has changed at all since the last + update, then increment revision ("c:r:a" becomes "c:r+1:a"). + + 4. If any interfaces have been added, removed, or changed since the + last update, increment current, and set revision to 0. + + 5. If any interfaces have been added since the last public release, + then increment age. + + 6. If any interfaces have been removed since the last public release, + then set age to 0. + +Application Binary Interface (ABI) Compatibility +------------------------------------------------ + +PMIx provides forward ABI compatibility in all versions of a given +feature release series and its corresponding +super stable series. For example, on a single platform, an pmix +application linked against PMIx v1.3.2 shared libraries can be +updated to point to the shared libraries in any successive v1.3.x or +v1.4 release and still work properly (e.g., via the LD_LIBRARY_PATH +environment variable or other operating system mechanism). + +PMIx reserves the right to break ABI compatibility at new feature +release series. For example, the same pmix application from above +(linked against PMIx v1.3.2 shared libraries) will *not* work with +PMIx v1.5 shared libraries. + +=========================================================================== + +Common Questions +---------------- + +Many common questions about building and using PMIx are answered +on the FAQ: + + http://pmix.github.io/master/faq/ + +=========================================================================== + +Got more questions? +------------------- + +Found a bug? Got a question? Want to make a suggestion? Want to +contribute to PMIx? Please let us know! + +When submitting questions and problems, be sure to include as much +extra information as possible. This web page details all the +information that we request in order to provide assistance: + + http://pmix.github.io/master/community/help/ + +Questions and comments should generally be sent to the PMIx mailing +list (pmix@googlegroups.com). Because of spam, only +subscribers are allowed to post to this list (ensure that you +subscribe with and post from *exactly* the same e-mail address -- +joe@example.com is considered different than +joe@mycomputer.example.com!). Visit this page to subscribe to the +user's list: + + https://groups.google.com/d/forum/pmix + +Make today an PMIx day! diff --git a/opal/mca/pmix/pmix1xx/pmix/VERSION b/opal/mca/pmix/pmix120/pmix/VERSION similarity index 98% rename from opal/mca/pmix/pmix1xx/pmix/VERSION rename to opal/mca/pmix/pmix120/pmix/VERSION index 5fac8bdaf60..b1c2150d58f 100644 --- a/opal/mca/pmix/pmix1xx/pmix/VERSION +++ b/opal/mca/pmix/pmix120/pmix/VERSION @@ -14,7 +14,7 @@ # ... major=1 -minor=1 +minor=2 release=0 # greek is used for alpha or beta release tags. If it is non-empty, @@ -30,7 +30,7 @@ greek=a1 # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git69c398e +repo_rev=git843fa89 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Oct 09, 2015" +date="Dec 08, 2015" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix120/pmix/autogen.sh b/opal/mca/pmix/pmix120/pmix/autogen.sh new file mode 100755 index 00000000000..b5b509eac8e --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# Run all the rest of the Autotools +echo "==> Running autoreconf"; +autoreconf ${autoreconf_args:-"-ivf"} diff --git a/opal/mca/pmix/pmix120/pmix/config/Makefile.am b/opal/mca/pmix/pmix120/pmix/config/Makefile.am new file mode 100644 index 00000000000..e78b92de62a --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/Makefile.am @@ -0,0 +1,53 @@ +# PMIx copyrights: +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# +######################### +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Oracle and/or its affiliates. All rights +# reserved. +######################### +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST += \ + config/c_get_alignment.m4 \ + config/pmix_get_version.sh \ + config/distscript.sh \ + config/pmix_check_attributes.m4 \ + config/pmix_check_broken_qsort.m4 \ + config/pmix_check_compiler_version.m4 \ + config/pmix_check_icc.m4 \ + config/pmix_check_ident.m4 \ + config/pmix_check_munge.m4 \ + config/pmix_check_package.m4 \ + config/pmix_check_sasl.m4 \ + config/pmix_check_vendor.m4 \ + config/pmix_check_visibility.m4 \ + config/pmix_ensure_contains_optflags.m4 \ + config/pmix_functions.m4 \ + config/pmix.m4 \ + config/pmix_search_libs.m4 \ + config/pmix_setup_cc.m4 \ + config/pmix_setup_hwloc.m4 \ + config/pmix_setup_libevent.m4 + + +maintainer-clean-local: + rm -f config/pmix_get_version.sh diff --git a/opal/mca/pmix/pmix120/pmix/config/c_get_alignment.m4 b/opal/mca/pmix/pmix120/pmix/config/c_get_alignment.m4 new file mode 100644 index 00000000000..db379100994 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/c_get_alignment.m4 @@ -0,0 +1,72 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +# PMIX_C_GET_ALIGN(type, config_var) +# ---------------------------------- +# Determine datatype alignment. +# First arg is type, 2nd arg is config var to define. +# Now that we require C99 compilers, we include stdbool.h +# in the alignment test so that we can find the definition +# of "bool" when we test for its alignment. We might be able +# to avoid this if we test for alignment of _Bool, but +# since we use "bool" in the code, let's be safe and check +# what we use. Yes, they should be the same - but "should" and +# "are" frequently differ +AC_DEFUN([PMIX_C_GET_ALIGNMENT],[ + AC_CACHE_CHECK([alignment of $1], + [AS_TR_SH([pmix_cv_c_align_$1])], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT + #include ], +[[ + struct foo { char c; $1 x; }; + struct foo *p = (struct foo *) malloc(sizeof(struct foo)); + int diff; + FILE *f=fopen("conftestval", "w"); + if (!f) exit(1); + diff = ((char *)&p->x) - ((char *)&p->c); + fprintf(f, "%d\n", (diff >= 0) ? diff : -diff); +]])], [AS_TR_SH([pmix_cv_c_align_$1])=`cat conftestval`], + [AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** See config.log for details.]) + AC_MSG_ERROR([*** Cannot continue.])], + [ # cross compile - do a non-executable test. Trick + # taken from the Autoconf 2.59c. Switch to using + # AC_CHECK_ALIGNOF when we can require Autoconf 2.60. + _AC_COMPUTE_INT([(long int) offsetof (pmix__type_alignof_, y)], + [AS_TR_SH([pmix_cv_c_align_$1])], + [AC_INCLUDES_DEFAULT +#include + +#ifndef offsetof +# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0) +#endif +typedef struct { char x; $1 y; } pmix__type_alignof_; +], + [AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** See config.log for details.]) + AC_MSG_ERROR([*** Cannot continue.])])])]) + +AC_DEFINE_UNQUOTED([$2], [$AS_TR_SH([pmix_cv_c_align_$1])], [Alignment of type $1]) +eval "$2=$AS_TR_SH([pmix_cv_c_align_$1])" + +rm -rf conftest* ]) dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/distscript.sh b/opal/mca/pmix/pmix120/pmix/config/distscript.sh new file mode 100755 index 00000000000..fbb37a78716 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/distscript.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +srcdir=$1 +builddir=$PWD +distdir=$builddir/$2 +PMIX_REPO_REV=$3 + +if test x"$2" = x ; then + echo "*** ERROR: Must supply relative distdir as argv[2] -- aborting" + exit 1 +elif test ! -d "$distdir" ; then + echo "*** ERROR: dist dir does not exist" + echo "*** ERROR: $distdir" + exit 1 +fi + +# We can catch some hard (but possible) to do mistakes by looking at +# our repo's revision, but only if we are in the source tree. +# Otherwise, use what configure told us, at the cost of allowing one +# or two corner cases in (but otherwise VPATH builds won't work). +repo_rev=$PMIX_REPO_REV +if test -d .git ; then + repo_rev=$(config/pmix_get_version.sh VERSION --repo-rev) +fi + +# +# Update VERSION:repo_rev with the best value we have. +# +perl -pi -e 's/^repo_rev=.*/repo_rev='$repo_rev'/' -- "${distdir}/VERSION" +# need to reset the timestamp to not annoy AM dependencies +touch -r "${srcdir}/VERSION" "${distdir}/VERSION" + +echo "*** Updated VERSION file with repo rev: $repo_rev" +echo "*** (via dist-hook / config/distscript.sh)" diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix.m4 new file mode 100644 index 00000000000..4c2e757f4b0 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix.m4 @@ -0,0 +1,767 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2009 IBM Corporation. All rights reserved. +dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. +dnl Copyright (c) 2013-2015 Intel, Inc. All rights reserved +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_SETUP_CORE],[ + + AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) + AC_REQUIRE([AC_CANONICAL_TARGET]) + AC_REQUIRE([AC_PROG_CC]) + + # If no prefix was defined, set a good value + m4_ifval([$1], + [m4_define([pmix_config_prefix],[$1/])], + [m4_define([pmix_config_prefix], [])]) + + # Get pmix's absolute top builddir (which may not be the same as + # the real $top_builddir) + PMIX_startdir=`pwd` + if test x"pmix_config_prefix" != "x" && test ! -d "pmix_config_prefix"; then + mkdir -p "pmix_config_prefix" + fi + if test x"pmix_config_prefix" != "x"; then + cd "pmix_config_prefix" + fi + PMIX_top_builddir=`pwd` + AC_SUBST(PMIX_top_builddir) + + # Get pmix's absolute top srcdir (which may not be the same as the + # real $top_srcdir. First, go back to the startdir incase the + # $srcdir is relative. + + cd "$PMIX_startdir" + cd "$srcdir"/pmix_config_prefix + PMIX_top_srcdir="`pwd`" + AC_SUBST(PMIX_top_srcdir) + + # Go back to where we started + cd "$PMIX_startdir" + + AC_MSG_NOTICE([pmix builddir: $PMIX_top_builddir]) + AC_MSG_NOTICE([pmix srcdir: $PMIX_top_srcdir]) + if test "$PMIX_top_builddir" != "$PMIX_top_srcdir"; then + AC_MSG_NOTICE([Detected VPATH build]) + fi + + # Get the version of pmix that we are installing + AC_MSG_CHECKING([for pmix version]) + PMIX_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION`" + if test "$?" != "0"; then + AC_MSG_ERROR([Cannot continue]) + fi + PMIX_RELEASE_DATE="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --release-date`" + AC_SUBST(PMIX_VERSION) + AC_DEFINE_UNQUOTED([PMIX_VERSION], ["$PMIX_VERSION"], + [The library version is always available, contrary to VERSION]) + AC_SUBST(PMIX_RELEASE_DATE) + AC_MSG_RESULT([$PMIX_VERSION]) + + # Debug mode? + AC_MSG_CHECKING([if want pmix maintainer support]) + pmix_debug= + AS_IF([test "$pmix_debug" = "" && test "$enable_debug" = "yes"], + [pmix_debug=1 + pmix_debug_msg="enabled"]) + AS_IF([test "$pmix_debug" = ""], + [pmix_debug=0 + pmix_debug_msg="disabled"]) + # Grr; we use #ifndef for PMIX_DEBUG! :-( + AH_TEMPLATE(PMIX_ENABLE_DEBUG, [Whether we are in debugging mode or not]) + AS_IF([test "$pmix_debug" = "1"], [AC_DEFINE([PMIX_ENABLE_DEBUG])]) + AC_MSG_RESULT([$pmix_debug_msg]) + + AC_MSG_CHECKING([for pmix directory prefix]) + AC_MSG_RESULT(m4_ifval([$1], pmix_config_prefix, [(none)])) + + # Note that private/config.h *MUST* be listed first so that it + # becomes the "main" config header file. Any AC-CONFIG-HEADERS + # after that (pmix/config.h) will only have selective #defines + # replaced, not the entire file. + AC_CONFIG_HEADERS(pmix_config_prefix[include/private/autogen/config.h]) + AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/autogen/config.h]) + + # What prefix are we using? + AC_MSG_CHECKING([for pmix symbol prefix]) + AS_IF([test "$pmix_symbol_prefix_value" = ""], + [AS_IF([test "$with_pmix_symbol_prefix" = ""], + [pmix_symbol_prefix_value=pmix_], + [pmix_symbol_prefix_value=$with_pmix_symbol_prefix])]) + AC_DEFINE_UNQUOTED(PMIX_SYM_PREFIX, [$pmix_symbol_prefix_value], + [The pmix symbol prefix]) + # Ensure to [] escape the whole next line so that we can get the + # proper tr tokens + [pmix_symbol_prefix_value_caps="`echo $pmix_symbol_prefix_value | tr '[:lower:]' '[:upper:]'`"] + AC_DEFINE_UNQUOTED(PMIX_SYM_PREFIX_CAPS, [$pmix_symbol_prefix_value_caps], + [The pmix symbol prefix in all caps]) + AC_MSG_RESULT([$pmix_symbol_prefix_value]) + + # Give an easy #define to know if we need to transform all the + # pmix names + AH_TEMPLATE([PMIX_SYM_TRANSFORM], [Whether we need to re-define all the pmix public symbols or not]) + AS_IF([test "$pmix_symbol_prefix_value" = "pmix_"], + [AC_DEFINE([PMIX_SYM_TRANSFORM], [0])], + [AC_DEFINE([PMIX_SYM_TRANSFORM], [1])]) + + # GCC specifics. + if test "x$GCC" = "xyes"; then + PMIX_GCC_CFLAGS="-Wall -Wmissing-prototypes -Wundef" + PMIX_GCC_CFLAGS="$PMIX_GCC_CFLAGS -Wpointer-arith -Wcast-align" + fi + + ############################################################################ + # Check for compilers and preprocessors + ############################################################################ + pmix_show_title "Compiler and preprocessor tests" + + # + # Check for some types + # + + AC_CHECK_TYPES(int8_t) + AC_CHECK_TYPES(uint8_t) + AC_CHECK_TYPES(int16_t) + AC_CHECK_TYPES(uint16_t) + AC_CHECK_TYPES(int32_t) + AC_CHECK_TYPES(uint32_t) + AC_CHECK_TYPES(int64_t) + AC_CHECK_TYPES(uint64_t) + AC_CHECK_TYPES(long long) + + AC_CHECK_TYPES(intptr_t) + AC_CHECK_TYPES(uintptr_t) + AC_CHECK_TYPES(ptrdiff_t) + + # + # Check for type sizes + # + + AC_CHECK_SIZEOF(_Bool) + AC_CHECK_SIZEOF(char) + AC_CHECK_SIZEOF(short) + AC_CHECK_SIZEOF(int) + AC_CHECK_SIZEOF(long) + if test "$ac_cv_type_long_long" = yes; then + AC_CHECK_SIZEOF(long long) + fi + AC_CHECK_SIZEOF(float) + AC_CHECK_SIZEOF(double) + + AC_CHECK_SIZEOF(void *) + AC_CHECK_SIZEOF(size_t) + if test "$ac_cv_type_ssize_t" = yes ; then + AC_CHECK_SIZEOF(ssize_t) + fi + if test "$ac_cv_type_ptrdiff_t" = yes; then + AC_CHECK_SIZEOF(ptrdiff_t) + fi + AC_CHECK_SIZEOF(wchar_t) + + AC_CHECK_SIZEOF(pid_t) + + # + # Check for type alignments + # + + PMIX_C_GET_ALIGNMENT(bool, PMIX_ALIGNMENT_BOOL) + PMIX_C_GET_ALIGNMENT(int8_t, PMIX_ALIGNMENT_INT8) + PMIX_C_GET_ALIGNMENT(int16_t, PMIX_ALIGNMENT_INT16) + PMIX_C_GET_ALIGNMENT(int32_t, PMIX_ALIGNMENT_INT32) + PMIX_C_GET_ALIGNMENT(int64_t, PMIX_ALIGNMENT_INT64) + PMIX_C_GET_ALIGNMENT(char, PMIX_ALIGNMENT_CHAR) + PMIX_C_GET_ALIGNMENT(short, PMIX_ALIGNMENT_SHORT) + PMIX_C_GET_ALIGNMENT(wchar_t, PMIX_ALIGNMENT_WCHAR) + PMIX_C_GET_ALIGNMENT(int, PMIX_ALIGNMENT_INT) + PMIX_C_GET_ALIGNMENT(long, PMIX_ALIGNMENT_LONG) + if test "$ac_cv_type_long_long" = yes; then + PMIX_C_GET_ALIGNMENT(long long, PMIX_ALIGNMENT_LONG_LONG) + fi + PMIX_C_GET_ALIGNMENT(float, PMIX_ALIGNMENT_FLOAT) + PMIX_C_GET_ALIGNMENT(double, PMIX_ALIGNMENT_DOUBLE) + if test "$ac_cv_type_long_double" = yes; then + PMIX_C_GET_ALIGNMENT(long double, PMIX_ALIGNMENT_LONG_DOUBLE) + fi + PMIX_C_GET_ALIGNMENT(void *, PMIX_ALIGNMENT_VOID_P) + PMIX_C_GET_ALIGNMENT(size_t, PMIX_ALIGNMENT_SIZE_T) + + + # + # Does the C compiler native support "bool"? (i.e., without + # or any other help) + # + + PMIX_VAR_SCOPE_PUSH([MSG]) + AC_MSG_CHECKING(for C bool type) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + AC_INCLUDES_DEFAULT], + [[bool bar, foo = true; bar = foo;]])], + [PMIX_NEED_C_BOOL=0 MSG=yes],[PMIX_NEED_C_BOOL=1 MSG=no]) + AC_DEFINE_UNQUOTED(PMIX_NEED_C_BOOL, $PMIX_NEED_C_BOOL, + [Whether the C compiler supports "bool" without any other help (such as )]) + AC_MSG_RESULT([$MSG]) + AC_CHECK_SIZEOF(_Bool) + PMIX_VAR_SCOPE_POP + + # + # Check for other compiler characteristics + # + + PMIX_VAR_SCOPE_PUSH([PMIX_CFLAGS_save]) + if test "$GCC" = "yes"; then + + # gcc 2.96 will emit oodles of warnings if you use "inline" with + # -pedantic (which we do in developer builds). However, + # "__inline__" is ok. So we have to force gcc to select the + # right one. If you use -pedantic, the AC_C_INLINE test will fail + # (because it names a function foo() -- without the (void)). So + # we turn off all the picky flags, turn on -ansi mode (which is + # implied by -pedantic), and set warnings to be errors. Hence, + # this does the following (for 2.96): + # + # - causes the check for "inline" to emit a warning, which then + # fails + # - checks for __inline__, which then emits no error, and works + # + # This also works nicely for gcc 3.x because "inline" will work on + # the first check, and all is fine. :-) + + PMIX_CFLAGS_save=$CFLAGS + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY -Werror -ansi" + fi + AC_C_INLINE + if test "$GCC" = "yes"; then + CFLAGS=$PMIX_CFLAGS_save + fi + PMIX_VAR_SCOPE_POP + + if test "x$CC" = "xicc"; then + PMIX_CHECK_ICC_VARARGS + fi + + + ################################## + # Only after setting up + # C do we check compiler attributes. + ################################## + + pmix_show_subtitle "Compiler characteristics" + + PMIX_CHECK_ATTRIBUTES + PMIX_CHECK_COMPILER_VERSION_ID + + ################################## + # Header files + ################################## + + pmix_show_title "Header file tests" + + AC_CHECK_HEADERS([arpa/inet.h \ + fcntl.h inttypes.h libgen.h \ + netinet/in.h \ + stdint.h stddef.h \ + stdlib.h string.h strings.h \ + sys/param.h \ + sys/select.h sys/socket.h \ + stdarg.h sys/stat.h sys/time.h \ + sys/types.h sys/un.h sys/uio.h net/uio.h \ + sys/wait.h syslog.h \ + time.h unistd.h \ + crt_externs.h signal.h \ + ioLib.h sockLib.h hostLib.h limits.h]) + + # Note that sometimes we have , but it doesn't work (e.g., + # have both Portland and GNU installed; using pgcc will find GNU's + # , which all it does -- by standard -- is define "bool" to + # "_Bool" [see + # http://www.opengroup.org/onlinepubs/009695399/basedefs/stdbool.h.html], + # and Portland has no idea what to do with _Bool). + + # So first figure out if we have (i.e., check the value of + # the macro HAVE_STDBOOL_H from the result of AC_CHECK_HEADERS, + # above). If we do have it, then check to see if it actually works. + # Define PMIX_USE_STDBOOL_H as approrpaite. + AC_CHECK_HEADERS([stdbool.h], [have_stdbool_h=1], [have_stdbool_h=0]) + AC_MSG_CHECKING([if works]) + if test "$have_stdbool_h" = "1"; then + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT[ + #if HAVE_STDBOOL_H + #include + #endif + ]], + [[bool bar, foo = true; bar = foo;]])], + [PMIX_USE_STDBOOL_H=1 MSG=yes],[PMIX_USE_STDBOOL_H=0 MSG=no]) + else + PMIX_USE_STDBOOL_H=0 + MSG="no (don't have )" + fi + AC_DEFINE_UNQUOTED(PMIX_USE_STDBOOL_H, $PMIX_USE_STDBOOL_H, + [Whether to use or not]) + AC_MSG_RESULT([$MSG]) + + # checkpoint results + AC_CACHE_SAVE + + ################################## + # Types + ################################## + + pmix_show_title "Type tests" + + AC_CHECK_TYPES([socklen_t, struct sockaddr_in, struct sockaddr_un, + struct sockaddr_in6, struct sockaddr_storage], + [], [], [AC_INCLUDES_DEFAULT + #if HAVE_SYS_SOCKET_H + #include + #endif + #if HAVE_SYS_UN_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + ]) + + AC_CHECK_DECLS([AF_UNSPEC, PF_UNSPEC, AF_INET6, PF_INET6], + [], [], [AC_INCLUDES_DEFAULT + #if HAVE_SYS_SOCKET_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + ]) + + # SA_RESTART in signal.h + PMIX_VAR_SCOPE_PUSH([MSG2]) + AC_MSG_CHECKING([if SA_RESTART defined in signal.h]) + AC_EGREP_CPP(yes, [ + #include + #ifdef SA_RESTART + yes + #endif + ], [MSG2=yes VALUE=1], [MSG2=no VALUE=0]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_SA_RESTART, $VALUE, + [Whether we have SA_RESTART in or not]) + AC_MSG_RESULT([$MSG2]) + PMIX_VAR_SCOPE_POP + + AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [ + #include + #if HAVE_SYS_SOCKET_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [ + #include + #include ]) + + AC_CHECK_MEMBERS([siginfo_t.si_fd],,,[#include ]) + AC_CHECK_MEMBERS([siginfo_t.si_band],,,[#include ]) + + # + # Checks for struct member names in struct statfs + # + AC_CHECK_MEMBERS([struct statfs.f_type], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_VFS_H + #include + #endif + #ifdef HAVE_SYS_STATFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct statfs.f_fstypename], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_PARAM_H + #include + #endif + #ifdef HAVE_SYS_MOUNT_H + #include + #endif + #ifdef HAVE_SYS_VFS_H + #include + #endif + #ifdef HAVE_SYS_STATFS_H + #include + #endif + ]) + + # + # Checks for struct member names in struct statvfs + # + AC_CHECK_MEMBERS([struct statvfs.f_basetype], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_STATVFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct statvfs.f_fstypename], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_STATVFS_H + #include + #endif + ]) + + # + # Check for ptrdiff type. Yes, there are platforms where + # sizeof(void*) != sizeof(long) (64 bit Windows, apparently). + # + AC_MSG_CHECKING([for pointer diff type]) + if test $ac_cv_type_ptrdiff_t = yes ; then + pmix_ptrdiff_t="ptrdiff_t" + pmix_ptrdiff_size=$ac_cv_sizeof_ptrdiff_t + elif test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long ; then + pmix_ptrdiff_t="long" + pmix_ptrdiff_size=$ac_cv_sizeof_long + elif test $ac_cv_type_long_long = yes && test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long_long ; then + pmix_ptrdiff_t="long long" + pmix_ptrdiff_size=$ac_cv_sizeof_long_long + #else + # AC_MSG_ERROR([Could not find datatype to emulate ptrdiff_t. Cannot continue]) + fi + AC_DEFINE_UNQUOTED([PMIX_PTRDIFF_TYPE], [$pmix_ptrdiff_t], + [type to use for ptrdiff_t]) + AC_MSG_RESULT([$pmix_ptrdiff_t (size: $pmix_ptrdiff_size)]) + + ################################## + # Libraries + ################################## + + pmix_show_title "Library and Function tests" + + PMIX_SEARCH_LIBS_CORE([socket], [socket]) + + # IRIX and CentOS have dirname in -lgen, usually in libc + PMIX_SEARCH_LIBS_CORE([dirname], [gen]) + + # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib + PMIX_SEARCH_LIBS_CORE([ceil], [m]) + + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep]) + + # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get + # confused. On others, it's in the standard library, but stubbed with + # the magic glibc foo as not implemented. and on other systems, it's + # just not there. This covers all cases. + AC_CACHE_CHECK([for htonl define], + [ompi_cv_htonl_define], + [AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #ifdef HAVE_SYS_TYPES_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + #ifdef HAVE_ARPA_INET_H + #include + #endif],[ + #ifndef ntohl + #error "ntohl not defined" + #endif + ])], [ompi_cv_htonl_define=yes], [ompi_cv_htonl_define=no])]) + AC_CHECK_FUNC([htonl], [ompi_have_htonl=yes], [ompi_have_htonl=no]) + AS_IF([test "$ompi_cv_htonl_define" = "yes" || test "$ompi_have_htonl" = "yes"], + [AC_DEFINE_UNQUOTED([HAVE_UNIX_BYTESWAP], [1], + [whether unix byteswap routines -- htonl, htons, nothl, ntohs -- are available])]) + + # + # Make sure we can copy va_lists (need check declared, not linkable) + # + + AC_CHECK_DECL(va_copy, PMIX_HAVE_VA_COPY=1, PMIX_HAVE_VA_COPY=0, + [#include ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_VA_COPY, $PMIX_HAVE_VA_COPY, + [Whether we have va_copy or not]) + + AC_CHECK_DECL(__va_copy, PMIX_HAVE_UNDERSCORE_VA_COPY=1, + PMIX_HAVE_UNDERSCORE_VA_COPY=0, [#include ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_UNDERSCORE_VA_COPY, $PMIX_HAVE_UNDERSCORE_VA_COPY, + [Whether we have __va_copy or not]) + + AC_CHECK_DECLS(__func__) + + # checkpoint results + AC_CACHE_SAVE + + ################################## + # System-specific tests + ################################## + + pmix_show_title "System-specific tests" + + AC_C_BIGENDIAN + PMIX_CHECK_BROKEN_QSORT + + ################################## + # Visibility + ################################## + + # Check the visibility declspec at the end to avoid problem with + # the previous tests that are not necessarily prepared for + # the visibility feature. + pmix_show_title "Symbol visibility feature" + + PMIX_CHECK_VISIBILITY + + ################################## + # Libevent + ################################## + pmix_show_title "Libevent" + + PMIX_LIBEVENT_CONFIG + + ################################## + # HWLOC + ################################## + pmix_show_title "HWLOC" + + PMIX_HWLOC_CONFIG + + ################################## + # SASL + ################################## + pmix_show_title "SASL" + + PMIX_SASL_CONFIG + + ################################## + # Munge + ################################## + pmix_show_title "Munge" + + PMIX_MUNGE_CONFIG + + ############################################################################ + # final compiler config + ############################################################################ + + pmix_show_subtitle "Set path-related compiler flags" + + # + # This is needed for VPATH builds, so that it will -I the appropriate + # include directory. We delayed doing it until now just so that + # '-I$(top_srcdir)' doesn't show up in any of the configure output -- + # purely aesthetic. + # + # Because pmix_config.h is created by AC_CONFIG_HEADERS, we + # don't need to -I the builddir for pmix/include. However, if we + # are VPATH building, we do need to include the source directories. + # + if test "$PMIX_top_builddir" != "$PMIX_top_srcdir"; then + # Note the embedded m4 directives here -- we must embed them + # rather than have successive assignments to these shell + # variables, lest the $(foo) names try to get evaluated here. + # Yuck! + CPPFLAGS='-I$(PMIX_top_srcdir) -I$(PMIX_top_builddir) -I$(PMIX_top_srcdir)/src -I$(PMIX_top_srcdir)/include -I$(PMIX_top_builddir)/include'" $CPPFLAGS" + else + CPPFLAGS='-I$(PMIX_top_srcdir) -I$(PMIX_top_srcdir)/src -I$(PMIX_top_srcdir)/include'" $CPPFLAGS" + fi + + # pmixdatadir, pmixlibdir, and pmixinclude are essentially the same as + # pkg*dir, but will always be */pmix. + pmixdatadir='${datadir}/pmix' + pmixlibdir='${libdir}/pmix' + pmixincludedir='${includedir}/pmix' + AC_SUBST(pmixdatadir) + AC_SUBST(pmixlibdir) + AC_SUBST(pmixincludedir) + + ############################################################################ + # final output + ############################################################################ + + pmix_show_subtitle "Final output" + + AC_CONFIG_FILES(pmix_config_prefix[Makefile]) + + # Success + $2 +])dnl + +AC_DEFUN([PMIX_DEFINE_ARGS],[ + # Embedded mode, or standalone? + AC_ARG_ENABLE([embedded-mode], + [AC_HELP_STRING([--enable-embedded-mode], + [Using --enable-embedded-mode causes PMIx to skip a few configure checks and install nothing. It should only be used when building PMIx within the scope of a larger package.])]) + AS_IF([test ! -z "$enable_embedded_mode" && test "$enable_embedded_mode" = "yes"], + [pmix_mode=embedded], + [pmix_mode=standalone]) + + # Change the symbol prefix? + AC_ARG_WITH([pmix-symbol-prefix], + AC_HELP_STRING([--with-pmix-symbol-prefix=STRING], + [STRING can be any valid C symbol name. It will be prefixed to all public PMIx symbols. Default: "pmix_"])) + +# +# Is this a developer copy? +# + +if test -d .git; then + PMIX_DEVEL=1 +else + PMIX_DEVEL=0 +fi + + +# +# Developer picky compiler options +# + +AC_MSG_CHECKING([if want developer-level compiler pickyness]) +AC_ARG_ENABLE(picky, + AC_HELP_STRING([--enable-picky], + [enable developer-level compiler pickyness when building PMIx (default: disabled)])) +if test "$enable_picky" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_PICKY_COMPILER=1 +else + AC_MSG_RESULT([no]) + WANT_PICKY_COMPILER=0 +fi +#################### Early development override #################### +if test "$WANT_PICKY_COMPILER" = "0" && test -z "$enable_picky" && test "$PMIX_DEVEL" = "1"; then + WANT_PICKY_COMPILER=1 + echo "--> developer override: enable picky compiler by default" +fi +#################### Early development override #################### + +# +# Developer debugging +# + +AC_MSG_CHECKING([if want developer-level debugging code]) +AC_ARG_ENABLE(debug, + AC_HELP_STRING([--enable-debug], + [enable developer-level debugging code (not for general PMIx users!) (default: disabled)])) +if test "$enable_debug" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_DEBUG=1 +else + AC_MSG_RESULT([no]) + WANT_DEBUG=0 +fi +#################### Early development override #################### +if test "$WANT_DEBUG" = "0" && test -z "$enable_debug" && test "$PMIX_DEVEL" = "1"; then + WANT_DEBUG=1 + echo "--> developer override: enable debugging code by default" +fi +#################### Early development override #################### +if test "$WANT_DEBUG" = "0"; then + CFLAGS="-DNDEBUG $CFLAGS" + CXXFLAGS="-DNDEBUG $CXXFLAGS" +fi +AC_DEFINE_UNQUOTED(PMIX_ENABLE_DEBUG, $WANT_DEBUG, + [Whether we want developer-level debugging code or not]) + +AC_ARG_ENABLE(debug-symbols, + AC_HELP_STRING([--disable-debug-symbols], + [Disable adding compiler flags to enable debugging symbols if --enable-debug is specified. For non-debugging builds, this flag has no effect.])) + +# +# Do we want the pretty-print stack trace feature? +# + +AC_MSG_CHECKING([if want pretty-print stacktrace]) +AC_ARG_ENABLE([pretty-print-stacktrace], + [AC_HELP_STRING([--enable-pretty-print-stacktrace], + [Pretty print stacktrace on process signal (default: enabled)])]) +if test "$enable_pretty_print_stacktrace" = "no" ; then + AC_MSG_RESULT([no]) + WANT_PRETTY_PRINT_STACKTRACE=0 +else + AC_MSG_RESULT([yes]) + WANT_PRETTY_PRINT_STACKTRACE=1 +fi +AC_DEFINE_UNQUOTED([PMIX_WANT_PRETTY_PRINT_STACKTRACE], + [$WANT_PRETTY_PRINT_STACKTRACE], + [if want pretty-print stack trace feature]) + +# +# Ident string +# +AC_MSG_CHECKING([if want ident string]) +AC_ARG_WITH([ident-string], + [AC_HELP_STRING([--with-ident-string=STRING], + [Embed an ident string into PMIx object files])]) +if test "$with_ident_string" = "" || test "$with_ident_string" = "no"; then + with_ident_string="%VERSION%" +fi +# This is complicated, because $PMIX_VERSION may have spaces in it. +# So put the whole sed expr in single quotes -- i.e., directly +# substitute %VERSION% for (not expanded) $PMIX_VERSION. +with_ident_string="`echo $with_ident_string | sed -e 's/%VERSION%/$PMIX_VERSION/'`" + +# Now eval an echo of that so that the "$PMIX_VERSION" token is +# replaced with its value. Enclose the whole thing in "" so that it +# ends up as 1 token. +with_ident_string="`eval echo $with_ident_string`" + +AC_DEFINE_UNQUOTED([PMIX_IDENT_STRING], ["$with_ident_string"], + [ident string for PMIX]) +AC_MSG_RESULT([$with_ident_string]) + +# +# Timing support +# +AC_MSG_CHECKING([if want developer-level timing support]) +AC_ARG_ENABLE(timing, + AC_HELP_STRING([--enable-timing], + [enable developer-level timing code (default: disabled)])) +if test "$enable_timing" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_TIMING=1 +else + AC_MSG_RESULT([no]) + WANT_TIMING=0 +fi + +AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_TIMING], + [Whether we want developer-level timing support or not]) + +])dnl + +# Specify the symbol prefix +AC_DEFUN([PMIX_SET_SYMBOL_PREFIX],[ + pmix_symbol_prefix_value=$1 +])dnl + +# This must be a standalone routine so that it can be called both by +# PMIX_INIT and an external caller (if PMIX_INIT is not invoked). +AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ + AS_IF([test "$pmix_did_am_conditionals" != "yes"],[ + AM_CONDITIONAL([PMIX_EMBEDDED_MODE], [test "x$pmix_mode" = "xembedded"]) + AM_CONDITIONAL([PMIX_COMPILE_TIMING], [test "$WANT_TIMING" = "1"]) + AM_CONDITIONAL([PMIX_WANT_MUNGE], [test "$pmix_munge_support" = "1"]) + AM_CONDITIONAL([PMIX_WANT_SASL], [test "$pmix_sasl_support" = "1"]) + ]) + pmix_did_am_conditionals=yes +])dnl + diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_attributes.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_attributes.m4 new file mode 100644 index 00000000000..bbafcc2b8b5 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_attributes.m4 @@ -0,0 +1,539 @@ +# -*- shell-script -*- +# PMIx copyrights: +# Copyright (c) 2013 Intel, Inc. All rights reserved +# +######################### +# +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +######################### +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Search the generated warnings for +# keywords regarding skipping or ignoring certain attributes +# Intel: ignore +# Sun C++: skip +# +AC_DEFUN([_PMIX_ATTRIBUTE_FAIL_SEARCH],[ + AC_REQUIRE([AC_PROG_GREP]) + if test -s conftest.err ; then + # icc uses 'invalid attribute' and 'attribute "__XXX__" ignored' + # Sun 12.1 emits 'warning: attribute parameter "__printf__" is undefined' + for i in invalid ignore skip undefined ; do + $GREP -iq $i conftest.err + if test "$?" = "0" ; then + pmix_cv___attribute__[$1]=0 + break; + fi + done + fi +]) + +# +# Check for one specific attribute by compiling with C +# +# The last argument is for specific CFLAGS, that need to be set +# for the compiler to generate a warning on the cross-check. +# This may need adaption for future compilers / CFLAG-settings. +# +AC_DEFUN([_PMIX_CHECK_SPECIFIC_ATTRIBUTE], [ + AC_MSG_CHECKING([for __attribute__([$1])]) + AC_CACHE_VAL(pmix_cv___attribute__[$1], [ + # + # Try to compile using the C compiler + # + AC_TRY_COMPILE([$2],[], + [ + # + # In case we did succeed: Fine, but was this due to the + # attribute being ignored/skipped? Grep for IgNoRe/skip in conftest.err + # and if found, reset the pmix_cv__attribute__var=0 + # + pmix_cv___attribute__[$1]=1 + _PMIX_ATTRIBUTE_FAIL_SEARCH([$1]) + ], + [pmix_cv___attribute__[$1]=0]) + ]) + + if test "$pmix_cv___attribute__[$1]" = "1" ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +]) + + +# +# Test the availability of __attribute__ and with the help +# of _PMIX_CHECK_SPECIFIC_ATTRIBUTE for the support of +# particular attributes. Compilers, that do not support an +# attribute most often fail with a warning (when the warning +# level is set). +# The compilers output is parsed in _PMIX_ATTRIBUTE_FAIL_SEARCH +# +# To add a new attributes __NAME__ add the +# pmix_cv___attribute__NAME +# add a new check with _PMIX_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check) +# _PMIX_CHECK_SPECIFIC_ATTRIBUTE([name], [int foo (int arg) __attribute__ ((__name__));], [], []) +# and define the corresponding +# AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NAME, [$pmix_cv___attribute__NAME], +# [Whether your compiler has __attribute__ NAME or not]) +# and decide on a correct macro (in pmix/include/pmix_config_bottom.h): +# # define __pmix_attribute_NAME(x) __attribute__(__NAME__) +# +# Please use the "__"-notation of the attribute in order not to +# clash with predefined names or macros (e.g. const, which some compilers +# do not like..) +# + + +AC_DEFUN([PMIX_CHECK_ATTRIBUTES], [ + AC_LANG(C) + AC_MSG_CHECKING(for __attribute__) + + AC_CACHE_VAL(pmix_cv___attribute__, [ + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [pmix_cv___attribute__=1], + [pmix_cv___attribute__=0], + ) + + if test "$pmix_cv___attribute__" = "1" ; then + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [pmix_cv___attribute__=1], + [pmix_cv___attribute__=0], + ) + fi + ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE, [$pmix_cv___attribute__], + [Whether your compiler has __attribute__ or not]) + +# +# Now that we know the compiler support __attribute__ let's check which kind of +# attributed are supported. +# + if test "$pmix_cv___attribute__" = "0" ; then + AC_MSG_RESULT([no]) + pmix_cv___attribute__aligned=0 + pmix_cv___attribute__always_inline=0 + pmix_cv___attribute__cold=0 + pmix_cv___attribute__const=0 + pmix_cv___attribute__deprecated=0 + pmix_cv___attribute__deprecated_argument=0 + pmix_cv___attribute__format=0 + pmix_cv___attribute__format_funcptr=0 + pmix_cv___attribute__hot=0 + pmix_cv___attribute__malloc=0 + pmix_cv___attribute__may_alias=0 + pmix_cv___attribute__no_instrument_function=0 + pmix_cv___attribute__nonnull=0 + pmix_cv___attribute__noreturn=0 + pmix_cv___attribute__noreturn_funcptr=0 + pmix_cv___attribute__packed=0 + pmix_cv___attribute__pure=0 + pmix_cv___attribute__sentinel=0 + pmix_cv___attribute__unused=0 + pmix_cv___attribute__visibility=0 + pmix_cv___attribute__warn_unused_result=0 + pmix_cv___attribute__destructor=0 + else + AC_MSG_RESULT([yes]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([aligned], + [struct foo { char text[4]; } __attribute__ ((__aligned__(8)));], + [], + []) + + # + # Ignored by PGI-6.2.5; -- recognized by output-parser + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([always_inline], + [int foo (int arg) __attribute__ ((__always_inline__));], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([cold], + [ + int foo(int arg1, int arg2) __attribute__ ((__cold__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([const], + [ + int foo(int arg1, int arg2) __attribute__ ((__const__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([deprecated], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([deprecated_argument], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__("compiler allows argument"))); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([format], + [ + int this_printf (void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int this_printf (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return this_printf (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([format_funcptr], + [ + int (*this_printf)(void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int (*this_printf) (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return (*this_printf) (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([hot], + [ + int foo(int arg1, int arg2) __attribute__ ((__hot__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([malloc], + [ +#ifdef HAVE_STDLIB_H +# include +#endif + int * foo(int arg1) __attribute__ ((__malloc__)); + int * foo(int arg1) { return (int*) malloc(arg1); } + ], + [], + []) + + + # + # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers + # Ignored by intel-9.1.045 -- turn off with -wd1292 + # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([may_alias], + [int * p_value __attribute__ ((__may_alias__));], + [], + []) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([no_instrument_function], + [int * foo(int arg1) __attribute__ ((__no_instrument_function__));], + [], + []) + + + # + # Attribute nonnull: + # Ignored by intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by PGI-6.2.5 (pgCC) -- recognized by cross-check + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings, but rather real warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([nonnull], + [ + int square(int *arg) __attribute__ ((__nonnull__)); + int square(int *arg) { return *arg; } + ], + [ + static int usage(int * argument); + int square(int * argument) __attribute__ ((__nonnull__)); + int square(int * argument) { return (*argument) * (*argument); } + + static int usage(int * argument) { + return square( ((void*)0) ); /* This should produce an argument must be nonnull warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([noreturn], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + void fatal(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { exit(arg1); } + ], + [], + []) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([noreturn_funcptr], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + extern void (*fatal_exit)(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { fatal_exit (arg1); } + ], + [], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([packed], + [ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([pure], + [ + int square(int arg) __attribute__ ((__pure__)); + int square(int arg) { return arg * arg; } + ], + [], + []) + + # + # Attribute sentinel: + # Ignored by the intel-9.1.045 -- recognized by cross-check + # intel-10.0beta works fine + # Ignored by PGI-6.2.5 (pgCC) -- recognized by output-parser and cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([sentinel], + [ + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + ], + [ + static int usage(int * argument); + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + + static int usage(int * argument) { + void * last_arg_should_be_null = argument; + return my_execlp ("lala", "/home/there", last_arg_should_be_null); /* This should produce a warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([unused], + [ + int square(int arg1 __attribute__ ((__unused__)), int arg2); + int square(int arg1, int arg2) { return arg2; } + ], + [], + []) + + + # + # Ignored by PGI-6.2.5 (pgCC) -- recognized by the output-parser + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([visibility], + [ + int square(int arg1) __attribute__ ((__visibility__("hidden"))); + ], + [], + []) + + + # + # Attribute warn_unused_result: + # Ignored by the intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([warn_unused_result], + [ + int foo(int arg) __attribute__ ((__warn_unused_result__)); + int foo(int arg) { return arg + 3; } + ], + [ + static int usage(int * argument); + int foo(int arg) __attribute__ ((__warn_unused_result__)); + + int foo(int arg) { return arg + 3; } + static int usage(int * argument) { + foo (*argument); /* Should produce an unused result warning */ + return 0; + } + + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([destructor], + [ + void foo(void) __attribute__ ((__destructor__)); + void foo(void) { return ; } + ], + [], + []) + fi + + # Now that all the values are set, define them + + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_ALIGNED, [$pmix_cv___attribute__aligned], + [Whether your compiler has __attribute__ aligned or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_ALWAYS_INLINE, [$pmix_cv___attribute__always_inline], + [Whether your compiler has __attribute__ always_inline or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_COLD, [$pmix_cv___attribute__cold], + [Whether your compiler has __attribute__ cold or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_CONST, [$pmix_cv___attribute__const], + [Whether your compiler has __attribute__ const or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DEPRECATED, [$pmix_cv___attribute__deprecated], + [Whether your compiler has __attribute__ deprecated or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT, [$pmix_cv___attribute__deprecated_argument], + [Whether your compiler has __attribute__ deprecated with optional argument]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_FORMAT, [$pmix_cv___attribute__format], + [Whether your compiler has __attribute__ format or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_FORMAT_FUNCPTR, [$pmix_cv___attribute__format_funcptr], + [Whether your compiler has __attribute__ format and it works on function pointers]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_HOT, [$pmix_cv___attribute__hot], + [Whether your compiler has __attribute__ hot or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_MALLOC, [$pmix_cv___attribute__malloc], + [Whether your compiler has __attribute__ malloc or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_MAY_ALIAS, [$pmix_cv___attribute__may_alias], + [Whether your compiler has __attribute__ may_alias or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION, [$pmix_cv___attribute__no_instrument_function], + [Whether your compiler has __attribute__ no_instrument_function or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NONNULL, [$pmix_cv___attribute__nonnull], + [Whether your compiler has __attribute__ nonnull or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NORETURN, [$pmix_cv___attribute__noreturn], + [Whether your compiler has __attribute__ noreturn or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NORETURN_FUNCPTR, [$pmix_cv___attribute__noreturn_funcptr], + [Whether your compiler has __attribute__ noreturn and it works on function pointers]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_PACKED, [$pmix_cv___attribute__packed], + [Whether your compiler has __attribute__ packed or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_PURE, [$pmix_cv___attribute__pure], + [Whether your compiler has __attribute__ pure or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_SENTINEL, [$pmix_cv___attribute__sentinel], + [Whether your compiler has __attribute__ sentinel or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_UNUSED, [$pmix_cv___attribute__unused], + [Whether your compiler has __attribute__ unused or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_VISIBILITY, [$pmix_cv___attribute__visibility], + [Whether your compiler has __attribute__ visibility or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT, [$pmix_cv___attribute__warn_unused_result], + [Whether your compiler has __attribute__ warn unused result or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_WEAK_ALIAS, [$pmix_cv___attribute__weak_alias], + [Whether your compiler has __attribute__ weak alias or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DESTRUCTOR, [$pmix_cv___attribute__destructor], + [Whether your compiler has __attribute__ destructor or not]) +]) diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_broken_qsort.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_broken_qsort.m4 new file mode 100644 index 00000000000..da4d14047e3 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_broken_qsort.m4 @@ -0,0 +1,55 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl There was some mentioning of broken qsort happened for Solaris that could +dnl cause qsort to return a bad pointer which could cause some badness. +dnl The problem should have been corrected with these patches from SunSolve. +dnl Solaris 10 should be free from this problem. +dnl +dnl 5.8_sparc #108827-27 or later +dnl 5.8_x86 #108828-28 or later +dnl 5.9_sparc #112874-20 or later +dnl 5.9_x86 #114432-07 or later +dnl +dnl For users who could not patch their systems or are convinced that their +dnl native qsort is broken, they could specify this configure flag to use +dnl the pmix_qsort instead. + +# check for broken qsort +# PMIX_CHECK_BROKEN_QSORT(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_BROKEN_QSORT],[ + AC_ARG_WITH([broken-qsort], + [AC_HELP_STRING([--with-broken-qsort], + [Build with FreeBSD qsort instead of native qsort (default: no)])]) + AC_MSG_CHECKING([for broken qsort]) + + if test "$with_broken_qsort" = "yes"; then + result="yes" + define_result=1 + else + result="no" + define_result=0 + fi + AC_MSG_RESULT([$result]) + AC_DEFINE_UNQUOTED([PMIX_HAVE_BROKEN_QSORT], [$define_result], + [whether qsort is broken or not]) +]) diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_compiler_version.m4 new file mode 100644 index 00000000000..b0a347e27bf --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_compiler_version.m4 @@ -0,0 +1,92 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# PMIX_CHECK_COMPILER_VERSION_ID() +# ---------------------------------------------------- +# Try to figure out the compiler's name and version to detect cases, +# where users compile PMIx with one version and compile the application +# with a different compiler. +# +AC_DEFUN([PMIX_CHECK_COMPILER_VERSION_ID], +[ + PMIX_CHECK_COMPILER(FAMILYID) + PMIX_CHECK_COMPILER_STRINGIFY(FAMILYNAME) + PMIX_CHECK_COMPILER(VERSION) + PMIX_CHECK_COMPILER_STRINGIFY(VERSION_STR) +])dnl + + +AC_DEFUN([PMIX_CHECK_COMPILER], [ + lower=m4_tolower($1) + AC_CACHE_CHECK([for compiler $lower], pmix_cv_compiler_[$1], + [ + CPPFLAGS_orig=$CPPFLAGS + CPPFLAGS="-I${top_pmix_srcdir}/src/include $CPPFLAGS" + AC_TRY_RUN([ +#include +#include +#include "pmix_portable_platform.h" + +int main (int argc, char * argv[]) +{ + FILE * f; + f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf (f, "%d", PLATFORM_COMPILER_$1); + return 0; +} + ], [ + eval pmix_cv_compiler_$1=`cat conftestval`; + ], [ + eval pmix_cv_compiler_$1=0 + ], [ + eval pmix_cv_compiler_$1=0 + ]) + CPPFLAGS=$CPPFLAGS_orig + ]) + AC_DEFINE_UNQUOTED([PMIX_BUILD_PLATFORM_COMPILER_$1], $pmix_cv_compiler_[$1], + [The compiler $lower which PMIx was built with]) +])dnl + + +AC_DEFUN([PMIX_CHECK_COMPILER_STRINGIFY], [ + lower=m4_tolower($1) + AC_CACHE_CHECK([for compiler $lower], pmix_cv_compiler_[$1], + [ + CPPFLAGS_orig=$CPPFLAGS + CPPFLAGS="-I${top_pmix_srcdir}/src/include $CPPFLAGS" + AC_TRY_RUN([ +#include +#include +#include "pmix_portable_platform.h" + +int main (int argc, char * argv[]) +{ + FILE * f; + f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); + return 0; +} + ], [ + eval pmix_cv_compiler_$1=`cat conftestval`; + ], [ + eval pmix_cv_compiler_$1=UNKNOWN + ], [ + eval pmix_cv_compiler_$1=UNKNOWN + ]) + CPPFLAGS=$CPPFLAGS_orig + ]) + AC_DEFINE_UNQUOTED([PMIX_BUILD_PLATFORM_COMPILER_$1], $pmix_cv_compiler_[$1], + [The compiler $lower which PMIX was built with]) +])dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_icc.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_icc.m4 new file mode 100644 index 00000000000..e8a06b25148 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_icc.m4 @@ -0,0 +1,62 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CHECK_ICC_VARARGS],[ +dnl +dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since +dnl va_start was miscompiled... +dnl +AC_MSG_CHECKING([whether icc-8.1 for EM64T works with variable arguments]) +AC_TRY_RUN([ +#include +#include +#include + +void func (int c, char * f, ...) +{ + va_list arglist; + va_start (arglist, f); + /* vprintf (f, arglist); */ + va_end (arglist); +} + +int main () +{ + FILE *f; + func (4711, "Help %d [%s]\n", 10, "ten"); + f=fopen ("conftestval", "w"); + if (!f) exit (1); + return 0; +} + +],[pmix_ac_icc_varargs=`test -f conftestval`],[pmix_ac_icc_varargs=1],[pmix_ac_icc_varargs=1]) + +if test "$pmix_ac_icc_varargs" = "1"; then + AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** Your icc-8.1 compiler seems to miscompile va_start!]) + AC_MSG_WARN([*** Please upgrade compiler to at least version 8.1.027]) + AC_MSG_ERROR([*** Cannot continue.]) +fi + +AC_MSG_RESULT([yes]) + +rm -rf conftest*])dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_ident.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_ident.m4 new file mode 100644 index 00000000000..de2fa573bc9 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_ident.m4 @@ -0,0 +1,103 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2015 Intel, Inc. All rights reserved +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl defines: +dnl PMIX_$1_USE_PRAGMA_IDENT +dnl PMIX_$1_USE_IDENT +dnl PMIX_$1_USE_CONST_CHAR_IDENT +dnl + +# PMIX_CHECK_IDENT(compiler-env, compiler-flags, +# file-suffix, lang) Try to compile a source file containing +# a #pragma ident, and determine whether the ident was +# inserted into the resulting object file +# ----------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_IDENT], [ + AC_MSG_CHECKING([for $4 ident string support]) + + pmix_pragma_ident_happy=0 + pmix_ident_happy=0 + pmix_static_const_char_happy=0 + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]pragma ident], [], + [pmix_pragma_ident_happy=1 + pmix_message="[#]pragma ident"], + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]ident], [], + [pmix_ident_happy=1 + pmix_message="[#]ident"], + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]pragma comment(exestr, ], [)], + [pmix_pragma_comment_happy=1 + pmix_message="[#]pragma comment"], + [pmix_static_const_char_happy=1 + pmix_message="static const char[[]]"]))) + + AC_DEFINE_UNQUOTED([PMIX_$1_USE_PRAGMA_IDENT], + [$pmix_pragma_ident_happy], [Use #pragma ident strings for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_IDENT], + [$pmix_ident_happy], [Use #ident strings for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_PRAGMA_COMMENT], + [$pmix_pragma_comment_happy], [Use #pragma comment for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_CONST_CHAR_IDENT], + [$pmix_static_const_char_happy], [Use static const char[] strings for $4 files]) + + AC_MSG_RESULT([$pmix_message]) + + unset pmix_pragma_ident_happy pmix_ident_happy pmix_static_const_char_happy pmix_message +]) + +# _PMIX_CHECK_IDENT(compiler-env, compiler-flags, +# file-suffix, header_prefix, header_suffix, action-if-success, action-if-fail) +# Try to compile a source file containing a #-style ident, +# and determine whether the ident was inserted into the +# resulting object file +# ----------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_IDENT], [ + eval pmix_compiler="\$$1" + eval pmix_flags="\$$2" + + pmix_ident="string_not_coincidentally_inserted_by_the_compiler" + cat > conftest.$3 <&5 + pmix_output=`$pmix_compiler $pmix_flags -c conftest.$3 -o conftest.${OBJEXT} 2>&1 1>/dev/null` + pmix_status=$? + AS_IF([test $pmix_status = 0], + [test -z "$pmix_output" + pmix_status=$?]) + PMIX_LOG_MSG([\$? = $pmix_status], 1) + AS_IF([test $pmix_status = 0 && test -f conftest.${OBJEXT}], + [pmix_output="`strings -a conftest.${OBJEXT} | grep $pmix_ident`" + grep $pmix_ident conftest.${OBJEXT} 2>&1 1>/dev/null + pmix_status=$? + AS_IF([test "$pmix_output" != "" || test "$pmix_status" = "0"], + [$6], + [$7])], + [PMIX_LOG_MSG([the failed program was:]) + PMIX_LOG_FILE([conftest.$3]) + $7]) + + unset pmix_compiler pmix_flags pmix_output pmix_status + rm -rf conftest.* conftest${EXEEXT} +])dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_munge.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_munge.m4 new file mode 100644 index 00000000000..6410749b2e0 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_munge.m4 @@ -0,0 +1,83 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_munge_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_MUNGE_CONFIG],[ + + PMIX_VAR_SCOPE_PUSH([pmix_munge_dir pmix_munge_libdir]) + + AC_ARG_WITH([munge], + [AC_HELP_STRING([--with-munge=DIR], + [Search for munge headers and libraries in DIR ])]) + + AC_ARG_WITH([munge-libdir], + [AC_HELP_STRING([--with-munge-libdir=DIR], + [Search for munge libraries in DIR ])]) + + pmix_munge_support=0 + if test "$with_munge" != "no"; then + AC_MSG_CHECKING([for munge in]) + if test ! -z "$with_munge" && test "$with_munge" != "yes"; then + if test -d $with_munge/include/munge; then + pmix_munge_dir=$with_munge/include/munge + else + pmix_munge_dir=$with_munge + fi + if test -d $with_munge/lib; then + pmix_munge_libdir=$with_munge/lib + elif test -d $with_munge/lib64; then + pmix_munge_libdir=$with_munge/lib64 + else + AC_MSG_RESULT([Could not find $with_munge/lib or $with_munge/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_munge_dir and $pmix_munge_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_munge_dir= + fi + AS_IF([test ! -z "$with_munge_libdir" && test "$with_munge_libdir" != "yes"], + [pmix_munge_libdir="$with_munge_libdir"]) + + PMIX_CHECK_PACKAGE([pmix_munge], + [munge.h], + [munge], + [munge_encode], + [-lmunge], + [$pmix_munge_dir], + [$pmix_munge_libdir], + [pmix_munge_support=1], + [pmix_munge_support=0]) + if test $pmix_munge_support == "1"; then + CPPFLAGS="$pmix_munge_CPPFLAGS $CPPFLAGS" + LIBS="$LIBS -lmunge" + LDFLAGS="$pmix_munge_LDFLAGS $LDFLAGS" + fi + fi + + if test ! -z "$with_munge" && test "$with_munge" != "no" && test "$pmix_munge_support" != "1"; then + AC_MSG_WARN([MUNGE SUPPORT REQUESTED AND NOT FOUND.]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will munge support be built]) + if test "$pmix_munge_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_MUNGE], [$pmix_munge_support], + [Whether we have munge support or not]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_package.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_package.m4 new file mode 100644 index 00000000000..f4833c3b0ae --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_package.m4 @@ -0,0 +1,176 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# _PMIX_CHECK_PACKAGE_HEADER(prefix, header, dir-prefix, +# [action-if-found], [action-if-not-found], +# includes) +# -------------------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_PACKAGE_HEADER], [ + # This is stolen from autoconf to peek under the covers to get the + # cache variable for the library check. one should not copy this + # code into other places unless you want much pain and suffering + AS_VAR_PUSHDEF([pmix_Header], [ac_cv_header_$2]) + + # so this sucks, but there's no way to get through the progression + # of header includes without killing off the cache variable and trying + # again... + unset pmix_Header + + pmix_check_package_header_happy="no" + AS_IF([test "$3" = "/usr" || test "$3" = "/usr/local"], + [ # try as is... + AC_VERBOSE([looking for header without includes]) + AC_CHECK_HEADERS([$2], [pmix_check_package_header_happy="yes"], []) + AS_IF([test "$pmix_check_package_header_happy" = "no"], + [# no go on the as is - reset the cache and try again + unset pmix_Header])]) + + AS_IF([test "$pmix_check_package_header_happy" = "no"], + [AS_IF([test "$3" != ""], + [$1_CPPFLAGS="$$1_CPPFLAGS -I$3/include" + CPPFLAGS="$CPPFLAGS -I$3/include"]) + AC_CHECK_HEADERS([$2], [pmix_check_package_header_happy="yes"], [], [$6]) + AS_IF([test "$pmix_check_package_header_happy" = "yes"], [$4], [$5])], + [$4]) + unset pmix_check_package_header_happy + + AS_VAR_POPDEF([pmix_Header])dnl +]) + + +# _PMIX_CHECK_PACKAGE_LIB(prefix, library, function, extra-libraries, +# dir-prefix, libdir, +# [action-if-found], [action-if-not-found]]) +# -------------------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_PACKAGE_LIB], [ + # This is stolen from autoconf to peek under the covers to get the + # cache variable for the library check. one should not copy this + # code into other places unless you want much pain and suffering + AS_LITERAL_IF([$2], + [AS_VAR_PUSHDEF([pmix_Lib], [ac_cv_lib_$2_$3])], + [AS_VAR_PUSHDEF([pmix_Lib], [ac_cv_lib_$2''_$3])])dnl + + # see comment above + unset pmix_Lib + pmix_check_package_lib_happy="no" + AS_IF([test "$6" != ""], + [ # libdir was specified - search only there + $1_LDFLAGS="$$1_LDFLAGS -L$6" + LDFLAGS="$LDFLAGS -L$6" + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])], + [ # libdir was not specified - go through search path + pmix_check_package_libdir="$5" + AS_IF([test "$pmix_check_package_libdir" = "" || test "$pmix_check_package_libdir" = "/usr" || test "$pmix_check_package_libdir" = "/usr/local"], + [ # try as is... + AC_VERBOSE([looking for library without search path]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [AS_IF([test "$pmix_check_package_libdir" != ""], + [$1_LDFLAGS="$$1_LDFLAGS -L$pmix_check_package_libdir/lib" + LDFLAGS="$LDFLAGS -L$pmix_check_package_libdir/lib" + AC_VERBOSE([looking for library in lib]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [AS_IF([test "$pmix_check_package_libdir" != ""], + [$1_LDFLAGS="$$1_LDFLAGS -L$pmix_check_package_libdir/lib64" + LDFLAGS="$LDFLAGS -L$pmix_check_package_libdir/lib64" + AC_VERBOSE([looking for library in lib64]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])])])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "yes"], + [$1_LIBS="-l$2 $4" + $7], [$8]) + + AS_VAR_POPDEF([pmix_Lib])dnl +]) + + +# PMIX_CHECK_PACKAGE(prefix, +# header, +# library, +# function, +# extra-libraries, +# dir-prefix, +# libdir-prefix, +# [action-if-found], [action-if-not-found], +# includes) +# ----------------------------------------------------------- +# check for package defined by header and libs, and probably +# located in dir-prefix, possibly with libs in libdir-prefix. +# Both dir-prefix and libdir-prefix can be empty. Will set +# prefix_{CPPFLAGS, LDFLAGS, LIBS} as needed +AC_DEFUN([PMIX_CHECK_PACKAGE],[ + pmix_check_package_$1_save_CPPFLAGS="$CPPFLAGS" + pmix_check_package_$1_save_LDFLAGS="$LDFLAGS" + pmix_check_package_$1_save_LIBS="$LIBS" + + pmix_check_package_$1_orig_CPPFLAGS="$$1_CPPFLAGS" + pmix_check_package_$1_orig_LDFLAGS="$$1_LDFLAGS" + pmix_check_package_$1_orig_LIBS="$$1_LIBS" + + _PMIX_CHECK_PACKAGE_HEADER([$1], [$2], [$6], + [_PMIX_CHECK_PACKAGE_LIB([$1], [$3], [$4], [$5], [$6], [$7], + [pmix_check_package_happy="yes"], + [pmix_check_package_happy="no"])], + [pmix_check_package_happy="no"], + [$10]) + + AS_IF([test "$pmix_check_package_happy" = "yes"], + [$8], + [$1_CPPFLAGS="$pmix_check_package_$1_orig_CPPFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + $1_LIBS="$pmix_check_package_$1_orig_LIBS" + $9]) + + CPPFLAGS="$pmix_check_package_$1_save_CPPFLAGS" + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + LIBS="$pmix_check_package_$1_save_LIBS" +]) diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_sasl.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_sasl.m4 new file mode 100644 index 00000000000..7dafb477a00 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_sasl.m4 @@ -0,0 +1,82 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_sasl_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_SASL_CONFIG],[ + + PMIX_VAR_SCOPE_PUSH([pmix_sasl_dir pmix_sasl_libdir]) + + AC_ARG_WITH([sasl], + [AC_HELP_STRING([--with-sasl=DIR], + [Search for sasl headers and libraries in DIR ])], + [], [with_sasl=no]) + + AC_ARG_WITH([sasl-libdir], + [AC_HELP_STRING([--with-sasl-libdir=DIR], + [Search for sasl libraries in DIR ])]) + + pmix_sasl_support=0 + if test "$with_sasl" != "no"; then + AC_MSG_CHECKING([for sasl in]) + if test ! -z "$with_sasl" && test "$with_sasl" != "yes"; then + pmix_sasl_dir=$with_sasl/include/sasl + if test -d $with_sasl/lib; then + pmix_sasl_libdir=$with_sasl/lib + elif test -d $with_sasl/lib64; then + pmix_sasl_libdir=$with_sasl/lib64 + else + AC_MSG_RESULT([Could not find $with_sasl/lib or $with_sasl/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_sasl_dir and $pmix_sasl_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_sasl_dir= + fi + AS_IF([test ! -z "$with_sasl_libdir" && test "$with_sasl_libdir" != "yes"], + [pmix_sasl_libdir="$with_sasl_libdir"]) + + PMIX_CHECK_PACKAGE([pmix_sasl], + [sasl/sasl.h], + [sasl2], + [sasl_server_init], + [-lsasl2], + [$pmix_sasl_dir], + [$pmix_sasl_libdir], + [pmix_sasl_support=1], + [pmix_sasl_support=0]) + if test $pmix_sasl_support == "1"; then + CPPFLAGS="$pmix_sasl_CPPFLAGS $CPPFLAGS" + LIBS="$LIBS -lsasl2" + LDFLAGS="$pmix_sasl_LDFLAGS $LDFLAGS" + fi + fi + + if test ! -z "$with_sasl" && test "$with_sasl" != "no" && test "$pmix_sasl_support" != "1"; then + AC_MSG_WARN([SASL SUPPORT REQUESTED AND NOT FOUND.]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will sasl support be built]) + if test "$pmix_sasl_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED(PMIX_HAVE_SASL, [$pmix_sasl_support], + [Whether we have sasl support or not]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_vendor.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_vendor.m4 new file mode 100644 index 00000000000..ba3f1a5a8d8 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_vendor.m4 @@ -0,0 +1,252 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# PMIX_C_COMPILER_VENDOR(VENDOR_VARIABLE) +# --------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the current C compiler. +# +# See comment for _PMIX_CHECK_COMPILER_VENDOR for a complete +# list of currently detected compilers. +AC_DEFUN([PMIX_C_COMPILER_VENDOR], [ + AC_REQUIRE([AC_PROG_CC]) + + AC_CACHE_CHECK([for the C compiler vendor], + [pmix_cv_c_compiler_vendor], + [AC_LANG_PUSH(C) + _PMIX_CHECK_COMPILER_VENDOR([pmix_cv_c_compiler_vendor]) + AC_LANG_POP(C)]) + + $1="$pmix_cv_c_compiler_vendor" +]) + + +# workaround to avoid syntax error with Autoconf < 2.68: +m4_ifndef([AC_LANG_DEFINES_PROVIDED], + [m4_define([AC_LANG_DEFINES_PROVIDED])]) + +# PMIX_IFDEF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([PMIX_IFDEF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#ifndef $1 +#error "symbol $1 not defined" +choke me +#endif], [$2], [$3])]) + + +# PMIX_IF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([PMIX_IF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#if !( $1 ) +#error "condition $1 not met" +choke me +#endif], [$2], [$3])]) + + +# _PMIX_CHECK_COMPILER_VENDOR(VENDOR_VARIABLE) +# -------------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the compiler for the current language. Language must be +# one of C, OBJC, or C++. +# +# thanks to http://predef.sourceforge.net/precomp.html for the list +# of defines to check. +AC_DEFUN([_PMIX_CHECK_COMPILER_VENDOR], [ + pmix_check_compiler_vendor_result="unknown" + + # GNU is probably the most common, so check that one as soon as + # possible. Intel pretends to be GNU, so need to check Intel + # before checking for GNU. + + # Intel + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], + [pmix_check_compiler_vendor_result="intel"])]) + + # Fujitsu + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__FUJITSU)], + [pmix_check_compiler_vendor_result="fujitsu"])]) + + # GNU + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__GNUC__], + [pmix_check_compiler_vendor_result="gnu" + + # We do not support gccfss as a compiler so die if + # someone tries to use said compiler. gccfss (gcc + # for SPARC Systems) is a compiler that is no longer + # supported by Oracle and it has some major flaws + # that prevents it from actually compiling PMIX code. + # So if we detect it we automatically bail. + + if ($CC --version | grep gccfss) >/dev/null 2>&1; then + AC_MSG_RESULT([gccfss]) + AC_MSG_WARN([Detected gccfss being used to compile PMIx.]) + AC_MSG_WARN([Because of several issues PMIx does not support]) + AC_MSG_WARN([the gccfss compiler. Please use a different compiler.]) + AC_MSG_WARN([If you didn't think you used gccfss you may want to]) + AC_MSG_WARN([check to see if the compiler you think you used is]) + AC_MSG_WARN([actually a link to gccfss.]) + AC_MSG_ERROR([Cannot continue]) + fi])]) + + # Borland Turbo C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TURBOC__], + [pmix_check_compiler_vendor_result="borland"])]) + + # Borland C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__BORLANDC__], + [pmix_check_compiler_vendor_result="borland"])]) + + # Comeau C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__COMO__], + [pmix_check_compiler_vendor_result="comeau"])]) + + # Compaq C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__DECC) || defined(VAXC) || defined(__VAXC)], + [pmix_check_compiler_vendor_result="compaq"], + [PMIX_IF_IFELSE([defined(__osf__) && defined(__LANGUAGE_C__)], + [pmix_check_compiler_vendor_result="compaq"], + [PMIX_IFDEF_IFELSE([__DECCXX], + [pmix_check_compiler_vendor_result="compaq"])])])]) + + # Cray C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([_CRAYC], + [pmix_check_compiler_vendor_result="cray"])]) + + # Diab C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__DCC__], + [pmix_check_compiler_vendor_result="diab"])]) + + # Digital Mars + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__DMC__) || defined(__SC__) || defined(__ZTC__)], + [pmix_check_compiler_vendor_result="digital mars"])]) + + # HP ANSI C / aC++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__HP_cc) || defined(__HP_aCC)], + [pmix_check_compiler_vendor_result="hp"])]) + + # IBM XL C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__)], + [pmix_check_compiler_vendor_result="ibm"], + [PMIX_IF_IFELSE([defined(_AIX) && !defined(__GNUC__)], + [pmix_check_compiler_vendor_result="ibm"])])]) + + # KAI C++ (rest in peace) + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__KCC], + [pmix_check_compiler_vendor_result="kai"])]) + + # LCC + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__LCC__], + [pmix_check_compiler_vendor_result="lcc"])]) + + # MetaWare High C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__HIGHC__], + [pmix_check_compiler_vendor_result="metaware high"])]) + + # Metrowerks Codewarrior + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__MWERKS__], + [pmix_check_compiler_vendor_result="metrowerks"])]) + + # MIPSpro (SGI) + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(sgi) || defined(__sgi)], + [pmix_check_compiler_vendor_result="sgi"])]) + + # MPW C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], + [pmix_check_compiler_vendor_result="mpw"])]) + + # Norcroft C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__CC_NORCROFT], + [pmix_check_compiler_vendor_result="norcroft"])]) + + # Pelles C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__POCC__], + [pmix_check_compiler_vendor_result="pelles"])]) + + # Portland Group + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__PGI], + [pmix_check_compiler_vendor_result="portland group"])]) + + # SAS/C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(SASC) || defined(__SASC) || defined(__SASC__)], + [pmix_check_compiler_vendor_result="sas"])]) + + # Sun Workshop C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__SUNPRO_C) || defined(__SUNPRO_CC)], + [pmix_check_compiler_vendor_result="sun"])]) + + # TenDRA C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TenDRA__], + [pmix_check_compiler_vendor_result="tendra"])]) + + # Tiny C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TINYC__], + [pmix_check_compiler_vendor_result="tiny"])]) + + # USL C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__USLC__], + [pmix_check_compiler_vendor_result="usl"])]) + + # Watcom C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__WATCOMC__], + [pmix_check_compiler_vendor_result="watcom"])]) + + $1="$pmix_check_compiler_vendor_result" + unset pmix_check_compiler_vendor_result +]) diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_check_visibility.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_check_visibility.m4 new file mode 100644 index 00000000000..5368ead9bf1 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_check_visibility.m4 @@ -0,0 +1,90 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# PMIX_CHECK_VISIBILITY +# -------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_VISIBILITY],[ + AC_REQUIRE([AC_PROG_GREP]) + + # Check if the compiler has support for visibility, like some + # versions of gcc, icc Sun Studio cc. + AC_ARG_ENABLE(visibility, + AC_HELP_STRING([--enable-visibility], + [enable visibility feature of certain compilers/linkers (default: enabled)])) + + pmix_visibility_define=0 + pmix_msg="whether to enable symbol visibility" + + if test "$enable_visibility" = "no"; then + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([no (disabled)]) + else + CFLAGS_orig=$CFLAGS + + pmix_add= + case "$pmix_c_vendor" in + sun) + # Check using Sun Studio -xldscope=hidden flag + pmix_add=-xldscope=hidden + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY $pmix_add -errwarn=%all" + ;; + + *) + # Check using -fvisibility=hidden + pmix_add=-fvisibility=hidden + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY $pmix_add -Werror" + ;; + esac + + AC_MSG_CHECKING([if $CC supports $pmix_add]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #include + __attribute__((visibility("default"))) int foo; + ]],[[fprintf(stderr, "Hello, world\n");]])], + [AS_IF([test -s conftest.err], + [$GREP -iq visibility conftest.err + # If we find "visibility" in the stderr, then + # assume it doesn't work + AS_IF([test "$?" = "0"], [pmix_add=])]) + ], [pmix_add=]) + AS_IF([test "$pmix_add" = ""], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([yes])]) + + CFLAGS=$CFLAGS_orig + PMIX_VISIBILITY_CFLAGS=$pmix_add + + if test "$pmix_add" != "" ; then + pmix_visibility_define=1 + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([yes (via $pmix_add)]) + elif test "$enable_visibility" = "yes"; then + AC_MSG_ERROR([Symbol visibility support requested but compiler does not seem to support it. Aborting]) + else + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([no (unsupported)]) + fi + unset pmix_add + fi + + AC_DEFINE_UNQUOTED([PMIX_C_HAVE_VISIBILITY], [$pmix_visibility_define], + [Whether C compiler supports symbol visibility or not]) +]) diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_ensure_contains_optflags.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_ensure_contains_optflags.m4 new file mode 100644 index 00000000000..68bf36090ba --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_ensure_contains_optflags.m4 @@ -0,0 +1,67 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_ENSURE_CONTAINS_OPTFLAGS],[ + +# Modularize this setup so that sub-configure.in scripts can use this +# same setup code. + +################################## +# Optimization flags +################################## + +# If the user did not specify optimization flags, add some (the value +# from $OPTFLAGS) + +co_arg="$1" +co_found=0 +for co_word in $co_arg; do + # See http://www.gnu.org/software/autoconf/manual/html_node/Quadrigraphs.html#Quadrigraphs + # for an explanation of @<:@ and @:>@ -- they m4 expand to [ and ] + case $co_word in + -g) co_found=1 ;; + -g@<:@1-3@:>@) co_found=1 ;; + +K@<:@0-5@:>@) co_found=1 ;; + -O) co_found=1 ;; + -O@<:@0-9@:>@) co_found=1 ;; + -xO) co_found=1 ;; + -xO@<:@0-9@:>@) co_found=1 ;; + -fast) co_found=1 ;; + + # The below Sun Studio flags require or + # trigger -xO optimization + -xvector*) co_found=1 ;; + -xdepend=yes) co_found=1 ;; + + esac +done + +if test "$co_found" = "0"; then + co_result="$OPTFLAGS $co_arg" +else + co_result="$co_arg" +fi + +# Clean up + +unset co_found co_word co_arg +]) diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix120/pmix/config/pmix_functions.m4 new file mode 100644 index 00000000000..5fb6d7a58cd --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_functions.m4 @@ -0,0 +1,533 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl Portions of this file derived from GASNet v1.12 (see "GASNet" +dnl comments, below) +dnl Copyright 2004, Dan Bonachea +dnl +dnl IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR +dnl DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT +dnl OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF +dnl CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +dnl +dnl THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, +dnl INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +dnl AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +dnl ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO +dnl PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +dnl + +AC_DEFUN([PMIX_CONFIGURE_SETUP],[ + +# Some helper script functions. Unfortunately, we cannot use $1 kinds +# of arugments here because of the m4 substitution. So we have to set +# special variable names before invoking the function. :-\ + +pmix_show_title() { + cat <@:*) + echo installing to directory \"$prefix\" + ;; + *) + AC_MSG_ERROR(prefix "$prefix" must be an absolute directory path) + ;; +esac + +# BEGIN: Derived from GASNet + +# Suggestion from Paul Hargrove to disable --program-prefix and +# friends. Heavily influenced by GASNet 1.12 acinclude.m4 +# functionality to do the same thing (copyright listed at top of this +# file). + +# echo program_prefix=$program_prefix program_suffix=$program_suffix program_transform_name=$program_transform_name +# undo prefix autoconf automatically adds during cross-compilation +if test "$cross_compiling" = yes && test "$program_prefix" = "${target_alias}-" ; then + program_prefix=NONE +fi +# normalize empty prefix/suffix +if test -z "$program_prefix" ; then + program_prefix=NONE +fi +if test -z "$program_suffix" ; then + program_suffix=NONE +fi +# undo transforms caused by empty prefix/suffix +if expr "$program_transform_name" : 's.^..$' >/dev/null || \ + expr "$program_transform_name" : 's.$$..$' >/dev/null || \ + expr "$program_transform_name" : 's.$$..;s.^..$' >/dev/null ; then + program_transform_name="s,x,x," +fi +if test "$program_prefix$program_suffix$program_transform_name" != "NONENONEs,x,x," ; then + AC_MSG_WARN([*** The PMIx configure script does not support --program-prefix, --program-suffix or --program-transform-name. Users are recommended to instead use --prefix with a unique directory and make symbolic links as desired for renaming.]) + AC_MSG_ERROR([*** Cannot continue]) +fi + +# END: Derived from GASNet +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_MSG],[ +# 1 is the message +# 2 is whether to put a prefix or not +if test -n "$2"; then + echo "configure:__oline__: $1" >&5 +else + echo $1 >&5 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_FILE],[ +# 1 is the filename +if test -n "$1" && test -f "$1"; then + cat $1 >&5 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_COMMAND],[ +# 1 is the command +# 2 is actions to do if success +# 3 is actions to do if fail +echo "configure:__oline__: $1" >&5 +$1 1>&5 2>&1 +pmix_status=$? +PMIX_LOG_MSG([\$? = $pmix_status], 1) +if test "$pmix_status" = "0"; then + unset pmix_status + $2 +else + unset pmix_status + $3 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_UNIQ],[ +# 1 is the variable name to be uniq-ized +pmix_name=$1 + +# Go through each item in the variable and only keep the unique ones + +pmix_count=0 +for val in ${$1}; do + pmix_done=0 + pmix_i=1 + pmix_found=0 + + # Loop over every token we've seen so far + + pmix_done="`expr $pmix_i \> $pmix_count`" + while test "$pmix_found" = "0" && test "$pmix_done" = "0"; do + + # Have we seen this token already? Prefix the comparison with + # "x" so that "-Lfoo" values won't be cause an error. + + pmix_eval="expr x$val = x\$pmix_array_$pmix_i" + pmix_found=`eval $pmix_eval` + + # Check the ending condition + + pmix_done="`expr $pmix_i \>= $pmix_count`" + + # Increment the counter + + pmix_i="`expr $pmix_i + 1`" + done + + # Check for special cases where we do want to allow repeated + # arguments (per + # http://www.open-mpi.org/community/lists/devel/2012/08/11362.php). + + case $val in + -Xclang) + pmix_found=0 + pmix_i=`expr $pmix_count + 1` + ;; + esac + + # If we didn't find the token, add it to the "array" + + if test "$pmix_found" = "0"; then + pmix_eval="pmix_array_$pmix_i=$val" + eval $pmix_eval + pmix_count="`expr $pmix_count + 1`" + else + pmix_i="`expr $pmix_i - 1`" + fi +done + +# Take all the items in the "array" and assemble them back into a +# single variable + +pmix_i=1 +pmix_done="`expr $pmix_i \> $pmix_count`" +pmix_newval= +while test "$pmix_done" = "0"; do + pmix_eval="pmix_newval=\"$pmix_newval \$pmix_array_$pmix_i\"" + eval $pmix_eval + + pmix_eval="unset pmix_array_$pmix_i" + eval $pmix_eval + + pmix_done="`expr $pmix_i \>= $pmix_count`" + pmix_i="`expr $pmix_i + 1`" +done + +# Done; do the assignment + +pmix_newval="`echo $pmix_newval`" +pmix_eval="$pmix_name=\"$pmix_newval\"" +eval $pmix_eval + +# Clean up + +unset pmix_name pmix_i pmix_done pmix_newval pmix_eval pmix_count])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# PMIX_APPEND_UNIQ(variable, new_argument) +# ---------------------------------------- +# Append new_argument to variable if not already in variable. This assumes a +# space seperated list. +# +# This could probably be made more efficient :(. +AC_DEFUN([PMIX_APPEND_UNIQ], [ +for arg in $2; do + pmix_found=0; + for val in ${$1}; do + if test "x$val" = "x$arg" ; then + pmix_found=1 + break + fi + done + if test "$pmix_found" = "0" ; then + if test -z "$$1"; then + $1="$arg" + else + $1="$$1 $arg" + fi + fi +done +unset pmix_found +]) + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Macro that serves as an alternative to using `which `. It is +# preferable to simply using `which ` because backticks (`) (aka +# backquotes) invoke a sub-shell which may source a "noisy" +# ~/.whatever file (and we do not want the error messages to be part +# of the assignment in foo=`which `). This macro ensures that we +# get a sane executable value. +AC_DEFUN([PMIX_WHICH],[ +# 1 is the variable name to do "which" on +# 2 is the variable name to assign the return value to + +PMIX_VAR_SCOPE_PUSH([pmix_prog pmix_file pmix_dir pmix_sentinel]) + +pmix_prog=$1 + +IFS_SAVE=$IFS +IFS="$PATH_SEPARATOR" +for pmix_dir in $PATH; do + if test -x "$pmix_dir/$pmix_prog"; then + $2="$pmix_dir/$pmix_prog" + break + fi +done +IFS=$IFS_SAVE + +PMIX_VAR_SCOPE_POP +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Declare some variables; use PMIX_VAR_SCOPE_END to ensure that they +# are cleaned up / undefined. +AC_DEFUN([PMIX_VAR_SCOPE_PUSH],[ + + # Is the private index set? If not, set it. + if test "x$pmix_scope_index" = "x"; then + pmix_scope_index=1 + fi + + # First, check to see if any of these variables are already set. + # This is a simple sanity check to ensure we're not already + # overwriting pre-existing variables (that have a non-empty + # value). It's not a perfect check, but at least it's something. + for pmix_var in $1; do + pmix_str="pmix_str=\"\$$pmix_var\"" + eval $pmix_str + + if test "x$pmix_str" != "x"; then + AC_MSG_WARN([Found configure shell variable clash!]) + AC_MSG_WARN([[PMIX_VAR_SCOPE_PUSH] called on "$pmix_var",]) + AC_MSG_WARN([but it is already defined with value "$pmix_str"]) + AC_MSG_WARN([This usually indicates an error in configure.]) + AC_MSG_ERROR([Cannot continue]) + fi + done + + # Ok, we passed the simple sanity check. Save all these names so + # that we can unset them at the end of the scope. + pmix_str="pmix_scope_$pmix_scope_index=\"$1\"" + eval $pmix_str + unset pmix_str + + env | grep pmix_scope + pmix_scope_index=`expr $pmix_scope_index + 1` +])dnl + +# Unset a bunch of variables that were previously set +AC_DEFUN([PMIX_VAR_SCOPE_POP],[ + # Unwind the index + pmix_scope_index=`expr $pmix_scope_index - 1` + pmix_scope_test=`expr $pmix_scope_index \> 0` + if test "$pmix_scope_test" = "0"; then + AC_MSG_WARN([[PMIX_VAR_SCOPE_POP] popped too many PMIX configure scopes.]) + AC_MSG_WARN([This usually indicates an error in configure.]) + AC_MSG_ERROR([Cannot continue]) + fi + + # Get the variable names from that index + pmix_str="pmix_str=\"\$pmix_scope_$pmix_scope_index\"" + eval $pmix_str + + # Iterate over all the variables and unset them all + for pmix_var in $pmix_str; do + unset $pmix_var + done +])dnl + + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# +# PMIX_WITH_OPTION_MIN_MAX_VALUE(NAME,DEFAULT_VALUE,LOWER_BOUND,UPPER_BOUND) +# Defines a variable PMIX_MAX_xxx, with "xxx" being specified as parameter $1 as "variable_name". +# If not set at configure-time using --with-max-xxx, the default-value ($2) is assumed. +# If set, value is checked against lower (value >= $3) and upper bound (value <= $4) +# +AC_DEFUN([PMIX_WITH_OPTION_MIN_MAX_VALUE], [ + max_value=[$2] + AC_MSG_CHECKING([maximum length of ]m4_translit($1, [_], [ ])) + AC_ARG_WITH([max-]m4_translit($1, [_], [-]), + AC_HELP_STRING([--with-max-]m4_translit($1, [_], [-])[=VALUE], + [maximum length of ]m4_translit($1, [_], [ ])[s. VALUE argument has to be specified (default: [$2]).])) + if test ! -z "$with_max_[$1]" && test "$with_max_[$1]" != "no" ; then + # Ensure it's a number (hopefully an integer!), and >0 + expr $with_max_[$1] + 1 > /dev/null 2> /dev/null + AS_IF([test "$?" != "0"], [happy=0], + [AS_IF([test $with_max_[$1] -ge $3 && test $with_max_[$1] -le $4], + [happy=1], [happy=0])]) + + # If badness in the above tests, bail + AS_IF([test "$happy" = "0"], + [AC_MSG_RESULT([bad value ($with_max_[$1])]) + AC_MSG_WARN([--with-max-]m4_translit($1, [_], [-])[s value must be >= $3 and <= $4]) + AC_MSG_ERROR([Cannot continue])]) + max_value=$with_max_[$1] + fi + AC_MSG_RESULT([$max_value]) + AC_DEFINE_UNQUOTED([PMIX_MAX_]m4_toupper($1), $max_value, + [Maximum length of ]m4_translit($1, [_], [ ])[s (default is $2)]) + [PMIX_MAX_]m4_toupper($1)=$max_value + AC_SUBST([PMIX_MAX_]m4_toupper($1)) +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Usage: PMIX_COMPUTE_MAX_VALUE(number_bytes, variable_to_set, action if overflow) +# Compute maximum value of datatype of +# number_bytes, setting the result in the second argument. Assumes a +# signed datatype. +AC_DEFUN([PMIX_COMPUTE_MAX_VALUE], [ + # This is more complicated than it really should be. But some + # expr implementations (OpenBSD) have an expr with a max value of + # 2^31 - 1, and we sometimes want to compute the max value of a + # type as big or bigger than that... + pmix_num_bits=`expr $1 \* 8 - 1` + newval=1 + value=1 + overflow=0 + + while test $pmix_num_bits -ne 0 ; do + newval=`expr $value \* 2` + if test 0 -eq `expr $newval \< 0` ; then + # if the new value is not negative, next iteration... + value=$newval + pmix_num_bits=`expr $pmix_num_bits - 1` + # if this was the last iteration, subtract 1 (as signed + # max positive is 2^num_bits - 1). Do this here instead + # of outside of the while loop because we might have + # already subtracted 1 by then if we're trying to find the + # max value of the same datatype expr uses as it's + # internal representation (ie, if we hit the else + # below...) + if test 0 -eq $pmix_num_bits ; then + value=`expr $value - 1` + fi + else + # if the new value is negative, we've over flowed. First, + # try adding value - 1 instead of value (see if we can get + # to positive max of expr) + newval=`expr $value - 1 + $value` + if test 0 -eq `expr $newval \< 0` ; then + value=$newval + # Still positive, this is as high as we can go. If + # pmix_num_bits is 1, we didn't actually overflow. + # Otherwise, we overflowed. + if test 1 -ne $pmix_num_bits ; then + overflow=1 + fi + else + # stil negative. Time to give up. + overflow=1 + fi + pmix_num_bits=0 + fi + done + + AS_VAR_SET([$2], [$value]) + AS_IF([test $overflow -ne 0], [$3]) +])dnl diff --git a/opal/mca/pmix/pmix120/pmix/config/pmix_get_version.sh b/opal/mca/pmix/pmix120/pmix/config/pmix_get_version.sh new file mode 100755 index 00000000000..6106af60c38 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/config/pmix_get_version.sh @@ -0,0 +1,161 @@ +#!/bin/sh +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + + +# PMIX_GET_VERSION(version_file, variable_prefix) +# ----------------------------------------------- +# parse version_file for version information, setting +# the following shell variables: +# +# prefix_VERSION +# prefix_BASE_VERSION +# prefix_MAJOR_VERSION +# prefix_MINOR_VERSION +# prefix_RELEASE_VERSION +# prefix_GREEK_VERSION +# prefix_REPO_REV +# prefix_TARBALL_VERSION +# prefix_RELEASE_DATE + + + +srcfile="$1" +option="$2" + +if test -z "$srcfile"; then + option="--help" +else + + if test -f "$srcfile"; then + srcdir=`dirname $srcfile` + pmix_vers=`sed -n " + t clear + : clear + s/^major/PMIX_MAJOR_VERSION/ + s/^minor/PMIX_MINOR_VERSION/ + s/^release/PMIX_RELEASE_VERSION/ + s/^greek/PMIX_GREEK_VERSION/ + s/^repo_rev/PMIX_REPO_REV/ + s/^tarball_version/PMIX_TARBALL_VERSION/ + s/^date/PMIX_RELEASE_DATE/ + t print + b + : print + p" < "$srcfile"` + eval "$pmix_vers" + + PMIX_VERSION="$PMIX_MAJOR_VERSION.$PMIX_MINOR_VERSION.$PMIX_RELEASE_VERSION" + PMIX_VERSION="${PMIX_VERSION}${PMIX_GREEK_VERSION}" + + if test "$PMIX_TARBALL_VERSION" = ""; then + PMIX_TARBALL_VERSION=$PMIX_VERSION + fi + + # If repo_rev was not set in the VERSION file, then get it now + if test "$PMIX_REPO_REV" = ""; then + # See if we can find the "git" command. + git_happy=0 + git --version > /dev/null 2>&1 + if test $? -eq 0; then + git_happy=1 + fi + + # If we're in a git repo and we found the git command, use + # git describe to get the repo rev + if test -d "$srcdir/.git" && test $git_happy -eq 1; then + if test "$srcdir" != "`pwd`"; then + git_save_dir=`pwd` + cd $srcdir + PMIX_REPO_REV=`git describe --tags --always` + cd $git_save_dir + unset git_save_dir + else + PMIX_REPO_REV=`git describe --tags --always` + fi + else + PMIX_REPO_REV="date`date '+%Y-%m-%d'`" + fi + fi + + + fi + + + if test "$option" = ""; then + option="--full" + fi +fi + +case "$option" in + --full|-v|--version) + echo $PMIX_VERSION + ;; + --major) + echo $PMIX_MAJOR_VERSION + ;; + --minor) + echo $PMIX_MINOR_VERSION + ;; + --release) + echo $PMIX_RELEASE_VERSION + ;; + --greek) + echo $PMIX_GREEK_VERSION + ;; + --repo-rev) + echo $PMIX_REPO_REV + ;; + --tarball) + echo $PMIX_TARBALL_VERSION + ;; + --release-date) + echo $PMIX_RELEASE_DATE + ;; + --all) + echo ${PMIX_VERSION} : ${PMIX_MAJOR_VERSION} : ${PMIX_MINOR_VERSION} : ${PMIX_RELEASE_VERSION} : ${PMIX_GREEK_VERSION} : ${PMIX_REPO_REV} : ${PMIX_TARBALL_VERSION} + ;; + -h|--help) + cat <