@@ -32,7 +32,7 @@ __global__ void KeRowConv(real* y, const real* x, const real* w,
3232 for (int i = tidy; i < context; i += blky) {
3333 sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0 ;
3434 }
35-
35+
3636 __syncthreads ();
3737
3838 for (int i = 0 ; i < numSeq; ++i) {
@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
144144 int yoff = start + j;
145145
146146 // transpose
147- sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0 ;
148- sh_dy[tidx][tidy + context - 1 ] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0 ;
147+ sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
148+ x[yoff * width + xoff] : 0.0 ;
149+ sh_dy[tidx][tidy + context - 1 ] = (xoff < width && yoff < end) ?
150+ dy[yoff * width + xoff] : 0.0 ;
149151 __syncthreads ();
150152 if (tidy < (context - 1 )) {
151153 yoff = yoff - context + 1 ;
152- sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0 ;
154+ sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ?
155+ dy[yoff * width + xoff] : 0.0 ;
153156 }
154157 __syncthreads ();
155158
@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
199202 int yoff = start + j;
200203
201204 // transpose
202- sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0 ;
205+ sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
206+ x[yoff * width + xoff] : 0.0 ;
203207 __syncthreads ();
204208
205209 for (int t = 0 ; t < context; t++) {
206- sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0 ;
210+ sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start &&
211+ yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0 ;
207212 __syncthreads ();
208213
209214 real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
@@ -239,7 +244,7 @@ __global__ void KeRowConvBwData(real* dx, const real* w, const real* dy,
239244 for (int i = tidy; i < context; i += blky) {
240245 sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0 ;
241246 }
242-
247+
243248 __syncthreads ();
244249
245250 for (int i = 0 ; i < numSeq; ++i) {
@@ -312,7 +317,7 @@ void RowConvGrad<DEVICE_TYPE_GPU>(const GpuMatrix& outG,
312317 dim3 dimBlock (32 , 32 );
313318 dim3 dimGrid (DIVUP (width, dimBlock.x ), 1 );
314319 real* dw = filterG.getData ();
315- if (contextLength <= 32 ) {
320+ if (contextLength <= 32 ) {
316321 KeRowConvBwWeight<32 , 32 , 32 >
317322 <<<dimGrid, dimBlock, 0 , STREAM_DEFAULT>>>
318323 (dw, x, dy, starts, height, width, numSeq, contextLength);
0 commit comments