diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 119cb0b08f7..27ecbb6c97d 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -29,6 +29,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor_internal.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 02c42dae831..30f42fd8b12 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -31,6 +31,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor_internal.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" @@ -42,7 +43,7 @@ /* * We want to minimize the amount of temporary memory needed while allowing as many ranks * to exchange data simultaneously. We use a variation of the ring algorithm, where in a - * single step a process echange the data with both neighbors at distance k (on the left + * single step a process exchange the data with both neighbors at distance k (on the left * and the right on a logical ring topology). With this approach we need to pack the data * for a single of the two neighbors, as we can then use the original buffer (and datatype * and count) to send the data to the other. @@ -57,16 +58,22 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts ptrdiff_t extent; ompi_request_t *req = MPI_REQUEST_NULL; char *tmp_buffer; - size_t packed_size = 0, max_size; + size_t packed_size = 0, max_size, type_size; opal_convertor_t convertor; /* Initialize. */ size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); + ompi_datatype_type_size(rdtype, &type_size); - ompi_datatype_type_size(rdtype, &max_size); - max_size *= rcounts[rank]; + for (i = 0, max_size = 0 ; i < size ; ++i) { + if (i == rank) { + continue; + } + packed_size = rcounts[i] * type_size; + max_size = packed_size > max_size ? packed_size : max_size; + } /* Easy way out */ if ((1 == size) || (0 == max_size) ) {