Skip to content

Commit 719fcc5

Browse files
authored
Merge pull request #1262 from martin-frbg/xmv_thread-splitting
Make sure that range limit of last thread never exceeds data size
2 parents 1296c89 + 0ba64ce commit 719fcc5

File tree

7 files changed

+18
-2
lines changed

7 files changed

+18
-2
lines changed

driver/level2/gbmv_thread.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
230230

231231
#ifndef TRANSA
232232
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
233+
if (range_m[num_cpu] > m) range_m[num_cpu] = m;
233234
#else
234235
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
236+
if (range_m[num_cpu] > n) range_m[num_cpu] = n;
235237
#endif
236238

237239
queue[num_cpu].mode = mode;

driver/level2/sbmv_thread.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
246246

247247
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
248248
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
249+
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
249250

250251
queue[num_cpu].mode = mode;
251252
queue[num_cpu].routine = sbmv_kernel;
@@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
285286

286287
range_m[num_cpu + 1] = range_m[num_cpu] + width;
287288
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
289+
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
288290

289291
queue[num_cpu].mode = mode;
290292
queue[num_cpu].routine = sbmv_kernel;
@@ -316,6 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
316318
range_m[num_cpu + 1] = range_m[num_cpu] + width;
317319

318320
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
321+
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
319322

320323
queue[num_cpu].mode = mode;
321324
queue[num_cpu].routine = sbmv_kernel;

driver/level2/spmv_thread.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
246246

247247
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
248248
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
249+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
249250

250251
queue[num_cpu].mode = mode;
251252
queue[num_cpu].routine = spmv_kernel;
@@ -285,6 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
285286

286287
range_m[num_cpu + 1] = range_m[num_cpu] + width;
287288
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
289+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
288290

289291
queue[num_cpu].mode = mode;
290292
queue[num_cpu].routine = spmv_kernel;

driver/level2/symv_thread.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,8 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
177177

178178
range_m[num_cpu + 1] = range_m[num_cpu] + width;
179179
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
180-
180+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
181+
181182
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
182183
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
183184
queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args;
@@ -225,6 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
225226

226227
range_m[num_cpu + 1] = range_m[num_cpu] + width;
227228
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
229+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
228230

229231
queue[num_cpu].mode = mode;
230232
queue[num_cpu].routine = symv_kernel;

driver/level2/tbmv_thread.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
288288

289289
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
290290
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
291+
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
291292

292293
queue[num_cpu].mode = mode;
293294
queue[num_cpu].routine = trmv_kernel;
@@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
327328

328329
range_m[num_cpu + 1] = range_m[num_cpu] + width;
329330
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
331+
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
330332

331333
queue[num_cpu].mode = mode;
332334
queue[num_cpu].routine = trmv_kernel;
@@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
356358

357359
range_m[num_cpu + 1] = range_m[num_cpu] + width;
358360
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
361+
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
359362

360363
queue[num_cpu].mode = mode;
361364
queue[num_cpu].routine = trmv_kernel;

driver/level2/tpmv_thread.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,8 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
307307

308308
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
309309
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
310-
310+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
311+
311312
queue[num_cpu].mode = mode;
312313
queue[num_cpu].routine = tpmv_kernel;
313314
queue[num_cpu].args = &args;
@@ -346,6 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
346347

347348
range_m[num_cpu + 1] = range_m[num_cpu] + width;
348349
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
350+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
349351

350352
queue[num_cpu].mode = mode;
351353
queue[num_cpu].routine = tpmv_kernel;

driver/level2/trmv_thread.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
346346

347347
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
348348
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
349+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
349350

350351
queue[num_cpu].mode = mode;
351352
queue[num_cpu].routine = trmv_kernel;
@@ -385,6 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
385386

386387
range_m[num_cpu + 1] = range_m[num_cpu] + width;
387388
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
389+
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
388390

389391
queue[num_cpu].mode = mode;
390392
queue[num_cpu].routine = trmv_kernel;

0 commit comments

Comments
 (0)