Skip to content

Commit 432c25d

Browse files
author
Rohan Yadav
committed
cuda: fix windowing test with cuda
Fixes #422. This commit ensures that the allocation clearing logic is applied to the CUDA backend as well. The windowing test caught this because TACO was automatically parallelizing the loop onto the GPU.
1 parent d61cc2a commit 432c25d

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

src/codegen/codegen_cuda.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,20 @@ void CodeGen_CUDA::visit(const Allocate* op) {
10961096
op->num_elements.accept(this);
10971097
parentPrecedence = TOP;
10981098
stream << "));" << endl;
1099+
// If the operation wants the input cleared, then memset it to zero.
1100+
if (op->clear) {
1101+
doIndent();
1102+
stream << "gpuErrchk(cudaMemset(";
1103+
op->var.accept(this);
1104+
stream << variable_name;
1105+
stream << ", 0, ";
1106+
stream << "sizeof(" << elementType << ")";
1107+
stream << " * ";
1108+
parentPrecedence = MUL;
1109+
op->num_elements.accept(this);
1110+
parentPrecedence = TOP;
1111+
stream << "));" << endl;
1112+
}
10991113

11001114
if(op->is_realloc) {
11011115
doIndent();

0 commit comments

Comments
 (0)