Open
Description
There are multiple issues with bcast algorithms when the tasks involved specify different counts and datatypes for a given bcast operation.
One low hanging fix is :
--- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
+++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
@@ -252,7 +252,7 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
/* Handle messages of small and intermediate size, and
single-element broadcasts */
- if ((message_size < small_message_size) || (count <= 1)) {
+ if (message_size < small_message_size) {
/* Binomial without segmentation */
segsize = 0;
return ompi_coll_base_bcast_intra_binomial(buff, count, datatype,
I'm seeing issues with both the pipelined and split binary tree algorithms. This is probably a known issue with pipelined algorithms in tuned module. We should probably start discussing the right approach to solve this.
Here's a reproducer:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define MAXLEN 10000000
int
main(int argc, char **argv)
{
int root,*out,i,j,k;
int myself,tasks;
MPI_Datatype type;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&myself);
MPI_Comm_size(MPI_COMM_WORLD,&tasks);
MPI_Type_contiguous(10, MPI_INT, &type);
MPI_Type_commit(&type);
/* Add this section because some os/architectures can't allocate
this much memory on the stack */
out = malloc(sizeof(int) * MAXLEN);
if (out == NULL) {
printf( "Doh! Rank %d was not able to allocate enough memory. MPI test aborted!\n", myself);
exit(1);
}
root = tasks-1;
for (j=1000000;j<=MAXLEN;j*=10) {
if (myself == root)
for (i=0;i<j;i++)
out[i] = i;
fprintf(stderr,"bcast %d integers\n",j);
if(myself == root) {
MPI_Bcast(out,j,MPI_INT,root,MPI_COMM_WORLD);
} else {
MPI_Bcast(out,j/10,type,root,MPI_COMM_WORLD);
}
for (k=0;k<j;k++) {
if (out[k] != k) {
printf("bad answer (%d) at index %d of %d (should be %d)\n",out[k],k,j,k);
exit(1);
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
printf("Rank %d exiting successfully\n", myself);
MPI_Type_free(&type);
MPI_Finalize();
/* Clean up, 'cause it's the Right Thing to do */
free(out);
return 0;
}