@@ -2,50 +2,45 @@ export CuVec
22module CuVec
33using .. Mocha
44
5- function cuda_geometry (sp_dim:: Int , chann:: Int , num:: Int )
6- x_block = int (ceil (float64 (sp_dim)/ CUDA. THREADS_PER_BLOCK_X))
7- y_block = int (ceil (float64 (chann)/ CUDA. THREADS_PER_BLOCK_Y))
8- z_block = int (ceil (float64 (num)/ CUDA. THREADS_PER_BLOCK_Z))
9- return ((x_block,y_block,z_block),
10- (CUDA. THREADS_PER_BLOCK_X,CUDA. THREADS_PER_BLOCK_Y,CUDA. THREADS_PER_BLOCK_Z))
5+ const THREADS_PER_BLOCK = 128
6+ function cuda_geometry (len:: Int )
7+ x_block = int (ceil (float64 (len)/ THREADS_PER_BLOCK))
8+ return (x_block, THREADS_PER_BLOCK)
119end
1210
1311for (ctype, dtype) in [(:float , Float32), (:double , Float64)]
1412 # define add!, sub!, mul!, div!, div2!
1513 for name in [:add , :sub , :mul , :div , :div2 ]
1614 @eval begin
17- function $ (symbol (" $(name) !" ))(backend:: GPUBackend , :: Type{$dtype} , X, Y,
18- spatial_dim:: Int , channels:: Int , num:: Int )
15+ function $ (symbol (" $(name) !" ))(backend:: GPUBackend , :: Type{$dtype} , X, Y, len:: Int )
1916 X = convert (Ptr{Void},X)
2017 Y = convert (Ptr{Void},Y)
21- cuda_dim = cuda_geometry (spatial_dim, channels, num )
18+ cuda_dim = cuda_geometry (len )
2219 kernel = backend. mocha.$ (symbol (" elem_$(name) _$ctype " ))
23- CUDA. launch (kernel, cuda_dim... , (X, Y, spatial_dim, channels, num ))
20+ CUDA. launch (kernel, cuda_dim... , (X, Y, len ))
2421 end
2522 end
2623 end
2724
2825 # define add_scal!
2926 @eval begin
30- function add_scal! (backend:: GPUBackend , :: Type{$dtype} , X, Y,
31- spatial_dim:: Int , channels:: Int , num:: Int )
27+ function add_scal! (backend:: GPUBackend , :: Type{$dtype} , X, Y, len:: Int )
3228 X = convert (Ptr{Void}, X)
3329 Y = convert ($ dtype, Y)
34- cuda_dim = cuda_geometry (spatial_dim, channels, num )
30+ cuda_dim = cuda_geometry (len )
3531 kernel = backend. mocha.$ (symbol (" add_scal_$ctype " ))
36- CUDA. launch (kernel, cuda_dim... , (X,Y,spatial_dim,channels,num ))
32+ CUDA. launch (kernel, cuda_dim... , (X,Y,len ))
3733 end
3834 end
3935
4036 # define mul_scal!
4137 @eval begin
42- function mul_scal! (backend:: GPUBackend , :: Type{$dtype} , X, Y,
43- spatial_dim:: Int , channels:: Int , num:: Int )
38+ function mul_scal! (backend:: GPUBackend , :: Type{$dtype} , X, Y, len:: Int )
4439 X = convert (Ptr{Void}, X)
4540 Y = convert ($ dtype, Y)
46- cuda_dim = cuda_geometry (spatial_dim, channels, num )
41+ cuda_dim = cuda_geometry (len )
4742 kernel = backend. mocha.$ (symbol (" mul_scal_$ctype " ))
48- CUDA. launch (kernel, cuda_dim... , (X,Y,spatial_dim,channels,num ))
43+ CUDA. launch (kernel, cuda_dim... , (X,Y,len ))
4944 end
5045 end
5146end
5449for name in [:add , :sub , :mul , :div , :div2 ]
5550 @eval begin
5651 function $ (symbol (" $(name) !" )){T}(backend:: GPUBackend , X:: CuTensorBlob{T} , Y:: CuTensorBlob{T} )
57- width, height, channels, num = get_whcn (X)
58- sp_dim = width* height
59- $ (symbol (" $(name) !" ))(backend, T, X. ptr. p, Y. ptr. p, sp_dim, channels, num)
52+ len = length (X)
53+ $ (symbol (" $(name) !" ))(backend, T, X. ptr. p, Y. ptr. p, len)
6054 end
6155 end
6256end
6357function add_scal! {T} (backend:: GPUBackend , X:: CuTensorBlob{T} , Y)
6458 Y = convert (T, Y)
65- width, height, channels, num = get_whcn (X)
66- sp_dim = width* height
67- add_scal! (backend, T, X. ptr. p, Y, sp_dim, channels, num)
59+ len = length (X)
60+ add_scal! (backend, T, X. ptr. p, Y, len)
6861end
6962function mul_scal! {T} (backend:: GPUBackend , X:: CuTensorBlob{T} , Y)
7063 Y = convert (T, Y)
71- width, height, channels, num = get_whcn (X)
72- sp_dim = width* height
73- mul_scal! (backend, T, X. ptr. p, Y, sp_dim, channels, num)
64+ len = length (X)
65+ mul_scal! (backend, T, X. ptr. p, Y, len)
7466end
7567
7668for (postfix, dt1, dt2) in [(:fi , Float32, Int), (:di , Float64, Int),
7769 (:ff , Float32, Float32), (:dd , Float64, Float64)]
7870 @eval begin
79- function pow! (backend:: GPUBackend , :: Type{$dt1} , X, Y:: $dt2 ,
80- spatial_dim:: Int , channels:: Int , num:: Int )
71+ function pow! (backend:: GPUBackend , :: Type{$dt1} , X, Y:: $dt2 , len:: Int )
8172 X = convert (Ptr{Void}, X)
82- cuda_dim = cuda_geometry (spatial_dim, channels, num )
73+ cuda_dim = cuda_geometry (len )
8374 kernel = backend. mocha.$ (symbol (" elem_pow_$postfix " ))
84- CUDA. launch (kernel, cuda_dim... , (X,Y,spatial_dim,channels,num ))
75+ CUDA. launch (kernel, cuda_dim... , (X,Y,len ))
8576 end
8677 end
8778end
0 commit comments