Skip to content

Commit f8950f4

Browse files
authored
Merge pull request #2836 from austinpagan/gordon_trsm
Fixing a performance bug in trsm_[LR].c.
2 parents 91c84e1 + 274d6e0 commit f8950f4

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

driver/level3/trsm_L.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
131131

132132
for(jjs = js; jjs < js + min_j; jjs += min_jj){
133133
min_jj = min_j + js - jjs;
134-
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
134+
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
135135
else
136136
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
137137

@@ -197,7 +197,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
197197

198198
for(jjs = js; jjs < js + min_j; jjs += min_jj){
199199
min_jj = min_j + js - jjs;
200-
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
200+
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
201201
else
202202
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
203203

driver/level3/trsm_R.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
126126

127127
for(jjs = js; jjs < js + min_j; jjs += min_jj){
128128
min_jj = min_j + js - jjs;
129-
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
129+
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
130130
else
131131
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
132132

@@ -182,7 +182,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
182182

183183
for(jjs = 0; jjs < min_j - min_l - ls + js; jjs += min_jj){
184184
min_jj = min_j - min_l - ls + js - jjs;
185-
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
185+
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
186186
else
187187
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
188188

@@ -243,7 +243,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
243243

244244
for(jjs = js; jjs < js + min_j; jjs += min_jj){
245245
min_jj = min_j + js - jjs;
246-
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
246+
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
247247
else
248248
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
249249

@@ -304,7 +304,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
304304

305305
for(jjs = 0; jjs < min_j - js + ls; jjs += min_jj){
306306
min_jj = min_j - js + ls - jjs;
307-
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
307+
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
308308
else
309309
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
310310

0 commit comments

Comments
 (0)