diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index c0cb947dc2..4fce9744ff 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -230,10 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT #ifndef TRANSA range_m[num_cpu] = num_cpu * ((m + 15) & ~15); - if (range_m[num_cpu] > m) range_m[num_cpu] = m; + if (range_m[num_cpu] > m * num_cpu) range_m[num_cpu] = m * num_cpu; #else range_m[num_cpu] = num_cpu * ((n + 15) & ~15); - if (range_m[num_cpu] > n) range_m[num_cpu] = n; + if (range_m[num_cpu] > n * num_cpu) range_m[num_cpu] = n * num_cpu; #endif queue[num_cpu].mode = mode; diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index e59451c5a3..50efa350ac 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -246,7 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -286,7 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -318,7 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * ((n + 15) & ~15); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c index 0b40874301..e52b08d0ef 100644 --- a/driver/level2/spmv_thread.c +++ b/driver/level2/spmv_thread.c @@ -246,7 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = spmv_kernel; @@ -286,7 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = spmv_kernel; diff --git a/driver/level2/symv_thread.c b/driver/level2/symv_thread.c index 8d4cd249ca..ab783de2b6 100644 --- a/driver/level2/symv_thread.c +++ b/driver/level2/symv_thread.c @@ -177,7 +177,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode; queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel; @@ -226,7 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = symv_kernel; diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 35e5ff1cd3..67109b53f2 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -288,7 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -328,7 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -358,7 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index b7eb105715..a077591a57 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -303,7 +303,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = tpmv_kernel; @@ -343,7 +343,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = tpmv_kernel;