-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadd_numbers.cu
More file actions
71 lines (57 loc) · 1.33 KB
/
add_numbers.cu
File metadata and controls
71 lines (57 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <stdio.h>
__global__ void add_2d_numbers(int *d_out,int *d_in)
{
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
int index = row * col + row;
if(index == 8){
printf("Checkpoint!\n");
}
d_out[index] = d_in[index];
}
void call_2d_parallel_computing(void)
{
const int N_ROWS = 5;
const int N_COLS = 5;
const int BYTES_SIZE = N_ROWS * N_COLS * sizeof(int);
// Define Host matrix
int h_2d_in[N_ROWS][N_COLS];
int h_2d_out[N_ROWS][N_COLS];
for(int i = 0; i < N_ROWS;i++)
{
for(int j = 0; j < N_COLS;j++)
{
h_2d_in[i][j] = i + j;
printf("%d ",i + j);
}
printf("\n");
}
printf("\n");
// Define device matrix
int * d_2d_in;
int * d_2d_out;
cudaMalloc((void **) &d_2d_in,BYTES_SIZE);
cudaMalloc((void **) &d_2d_out,BYTES_SIZE);
cudaMemcpy(d_2d_in,h_2d_in,BYTES_SIZE,cudaMemcpyHostToDevice);
dim3 dimBlock(N_ROWS,N_COLS);
dim3 dimGrid(1,1);
add_2d_numbers<<<dimGrid,dimBlock>>>(d_2d_out,d_2d_in);
cudaMemcpy(h_2d_out,d_2d_out,BYTES_SIZE,cudaMemcpyDeviceToHost);
printf("Result : \n" );
for(int i = 0 ; i < N_ROWS;i++)
{
for(int j = 0 ; j < N_COLS;j++)
{
printf("%d ",h_2d_out[i][j]);
}
printf("\n");
}
printf("\n");
cudaFree(d_2d_in);
cudaFree(d_2d_out);
}
int main(int argc,char ** argv)
{
call_2d_parallel_computing();
return 0;
}