@@ -23,7 +23,7 @@ func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> me
23
23
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
24
24
// CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %[[base]], %[[strideArg]], %[[numRecords]], %[[flags]]
25
25
// CHECK: %[[fatBuf:.*]] = llvm.addrspacecast %[[rsrc]] : !llvm.ptr<8> to !llvm.ptr<7>
26
- // CHECK: %[[ret0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<7>, ptr<7>, i64, array<1 x i64>, array<1 x i64>)>
26
+ // CHECK: %[[ret0:.*]] = llvm.mlir.poison : !llvm.struct<(ptr<7>, ptr<7>, i64, array<1 x i64>, array<1 x i64>)>
27
27
// CHECK: %[[ret1:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret0]][0]
28
28
// CHECK: %[[ret2:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret1]][1]
29
29
// CHECK: %[[ret3:.*]] = llvm.insertvalue %[[offset]], %[[ret2]][2]
@@ -34,6 +34,26 @@ func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> me
34
34
return %ret : memref <8 xi32 , #amdgpu.address_space <fat_raw_buffer >>
35
35
}
36
36
37
+ // CHECK-LABEL: func @fat_raw_buffer_cast_0d
38
+ func.func @fat_raw_buffer_cast_0d (%buf: memref <i32 , #gpu_global_addrspace >) -> memref <i32 , #amdgpu.address_space <fat_raw_buffer >> {
39
+ // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<i32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64)>
40
+ // CHECK-DAG: %[[base:.*]] = llvm.extractvalue %[[desc]][1]
41
+ // CHECK-DAG: %[[offset:.*]] = llvm.extractvalue %[[desc]][2]
42
+ // CHECK-DAG: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32) : i32
43
+ // CHECK-DAG: %[[strideArg:.*]] = llvm.mlir.constant(0 : i16) : i16
44
+ // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
45
+ // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
46
+ // CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %[[base]], %[[strideArg]], %[[numRecords]], %[[flags]]
47
+ // CHECK: %[[fatBuf:.*]] = llvm.addrspacecast %[[rsrc]] : !llvm.ptr<8> to !llvm.ptr<7>
48
+ // CHECK: %[[ret0:.*]] = llvm.mlir.poison : !llvm.struct<(ptr<7>, ptr<7>, i64)>
49
+ // CHECK: %[[ret1:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret0]][0]
50
+ // CHECK: %[[ret2:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret1]][1]
51
+ // CHECK: %[[ret3:.*]] = llvm.insertvalue %[[offset]], %[[ret2]][2]
52
+ // CHECK: builtin.unrealized_conversion_cast %[[ret3]]
53
+ %ret = amdgpu.fat_raw_buffer_cast %buf : memref <i32 , #gpu_global_addrspace > to memref <i32 , #amdgpu.address_space <fat_raw_buffer >>
54
+ return %ret : memref <i32 , #amdgpu.address_space <fat_raw_buffer >>
55
+ }
56
+
37
57
// CHECK-LABEL: func @fat_raw_buffer_cast_dyn_size_offset
38
58
func.func @fat_raw_buffer_cast_dyn_size_offset (%buf: memref <?xi32 , strided <[1 ], offset : ?>, #gpu_global_addrspace >) -> memref <?xi32 , strided <[1 ], offset : ?>, #amdgpu.address_space <fat_raw_buffer >> {
39
59
// CHECK: %[[size0:.*]] = llvm.extractvalue %{{.*}}[3, 0]
0 commit comments