Skip to content

Commit ec9e497

Browse files
authored
[mlir][sparse] add sparse convolution with 5x5 kernel (#74793)
Also unifies some of the test set up parts in other conv tests
1 parent 6e1f191 commit ec9e497

File tree

5 files changed

+220
-4
lines changed

5 files changed

+220
-4
lines changed

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
//
2626
// Do the same run, but now with direct IR generation and vectorization.
2727
// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
28-
2928
// RUN: %{compile} | %{run} | FileCheck %s
3029
//
3130
// Do the same run, but now with direct IR generation and VLA vectorization.

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
//
2626
// Do the same run, but now with direct IR generation and vectorization.
2727
// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
28-
2928
// RUN: %{compile} | %{run} | FileCheck %s
3029
//
3130
// Do the same run, but now with direct IR generation and VLA vectorization.
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
//--------------------------------------------------------------------------------------------------
2+
// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
3+
//
4+
// Set-up that's shared across all tests in this directory. In principle, this
5+
// config could be moved to lit.local.cfg. However, there are downstream users that
6+
// do not use these LIT config files. Hence why this is kept inline.
7+
//
8+
// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
9+
// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
10+
// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
11+
// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
12+
// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
13+
// DEFINE: %{run_opts} = -e entry -entry-point-result=void
14+
// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
15+
// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
16+
//
17+
// DEFINE: %{env} =
18+
//--------------------------------------------------------------------------------------------------
19+
20+
// RUN: %{compile} | %{run} | FileCheck %s
21+
//
22+
// Do the same run, but now with direct IR generation.
23+
// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false
24+
// RUN: %{compile} | %{run} | FileCheck %s
25+
//
26+
// Do the same run, but now with direct IR generation and vectorization.
27+
// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
28+
//
29+
// Do the same run, but now with direct IR generation and VLA vectorization.
30+
// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
31+
32+
#CSR = #sparse_tensor.encoding<{
33+
map = (d0, d1) -> (d0 : dense,
34+
d1 : compressed)
35+
}>
36+
37+
#DCSR = #sparse_tensor.encoding<{
38+
map = (d0, d1) -> (d0 : compressed,
39+
d1 : compressed)
40+
}>
41+
42+
// An example of a 2D convolution with sparse data and filter.
43+
module {
44+
func.func @conv2d(%input: tensor<10x10xi32>,
45+
%filter: tensor<5x5xi32>,
46+
%output: tensor<6x6xi32>) -> tensor<6x6xi32> {
47+
%0 = linalg.conv_2d
48+
ins (%input, %filter: tensor<10x10xi32>, tensor<5x5xi32>)
49+
outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
50+
return %0 : tensor<6x6xi32>
51+
}
52+
53+
func.func @conv2d_ss(%input: tensor<10x10xi32, #CSR>,
54+
%filter: tensor<5x5xi32, #CSR>,
55+
%output: tensor<6x6xi32>) -> tensor<6x6xi32> {
56+
%0 = linalg.conv_2d
57+
ins (%input, %filter: tensor<10x10xi32, #CSR>, tensor<5x5xi32, #CSR>)
58+
outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
59+
return %0 : tensor<6x6xi32>
60+
}
61+
62+
func.func @conv2d_bs(%input: tensor<10x10xi32, #DCSR>,
63+
%filter: tensor<5x5xi32, #CSR>,
64+
%output: tensor<6x6xi32>) -> tensor<6x6xi32> {
65+
%0 = linalg.conv_2d
66+
ins (%input, %filter: tensor<10x10xi32, #DCSR>, tensor<5x5xi32, #CSR>)
67+
outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
68+
return %0 : tensor<6x6xi32>
69+
}
70+
71+
func.func @entry() {
72+
%c0 = arith.constant 0 : index
73+
%i0 = arith.constant 0 : i32
74+
75+
// Dense filter and input to "stress" test sparsity.
76+
77+
%filter = arith.constant dense<[
78+
[ -1, -2, -3, -4, -5 ],
79+
[ -6, -7, -8, -9, -10 ],
80+
[ -11, -12, -13, -14, -15 ],
81+
[ -16, -17, -18, -19, -20 ],
82+
[ -21, -22, -23, -24, -25 ]
83+
]> : tensor<5x5xi32>
84+
85+
%input = arith.constant dense<[
86+
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
87+
[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ],
88+
[ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 ],
89+
[ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ],
90+
[ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ],
91+
[ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 ],
92+
[ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69 ],
93+
[ 70, 71, 72, 73, 74, 75, 76, 77, 78, 79 ],
94+
[ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89 ],
95+
[ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ]
96+
]> : tensor<10x10xi32>
97+
98+
// Sparse filter and input to test true sparsity.
99+
100+
%sfilter = arith.constant dense<[
101+
[ 0, -1, 0, -2, 0 ],
102+
[ 0, 0, 0, 0, 0 ],
103+
[ 0, 0, 8, 0, 0 ],
104+
[ -3, 0, 0, -4, 0 ],
105+
[ 0, 0, -5, 0, -6 ]
106+
]> : tensor<5x5xi32>
107+
108+
%sinput = arith.constant dense<[
109+
[ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0 ],
110+
[ 0, 4, 0, 0, 5, 0, 0, 0, 0, 0 ],
111+
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
112+
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
113+
[ 0, 0, 0, 0, 0, 0, 6, 0, 0, 7 ],
114+
[ 0, 0, 0, 0, 0, 0, 0, 8, 0, 0 ],
115+
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
116+
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
117+
[ 0, 9, 0, 0, 0, 0, 0, 0, 0, 0 ],
118+
[ 0, 0, 0, 0, 10, 0, 0, 0, 0, 0 ]
119+
]> : tensor<10x10xi32>
120+
121+
// Set up sparse tensors.
122+
123+
%input_CSR = sparse_tensor.convert %input : tensor<10x10xi32> to tensor<10x10xi32, #CSR>
124+
%input_DCSR = sparse_tensor.convert %input : tensor<10x10xi32> to tensor<10x10xi32, #DCSR>
125+
%filter_CSR = sparse_tensor.convert %filter : tensor<5x5xi32> to tensor<5x5xi32, #CSR>
126+
127+
%sinput_CSR = sparse_tensor.convert %sinput : tensor<10x10xi32> to tensor<10x10xi32, #CSR>
128+
%sinput_DCSR = sparse_tensor.convert %sinput : tensor<10x10xi32> to tensor<10x10xi32, #DCSR>
129+
%sfilter_CSR = sparse_tensor.convert %sfilter : tensor<5x5xi32> to tensor<5x5xi32, #CSR>
130+
131+
// Call the kernels with stress input.
132+
%output0 = arith.constant dense<0> : tensor<6x6xi32>
133+
%0 = call @conv2d(%input, %filter, %output0)
134+
: (tensor<10x10xi32>, tensor<5x5xi32>, tensor<6x6xi32>) -> tensor<6x6xi32>
135+
%output1 = arith.constant dense<0> : tensor<6x6xi32>
136+
%1 = call @conv2d_ss(%input_CSR, %filter_CSR, %output1)
137+
: (tensor<10x10xi32, #CSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32>
138+
%output2 = arith.constant dense<0> : tensor<6x6xi32>
139+
%2 = call @conv2d_bs(%input_DCSR, %filter_CSR, %output2)
140+
: (tensor<10x10xi32, #DCSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32>
141+
142+
// Call the kernels with sparse input.
143+
%output3 = arith.constant dense<0> : tensor<6x6xi32>
144+
%3 = call @conv2d(%sinput, %sfilter, %output3)
145+
: (tensor<10x10xi32>, tensor<5x5xi32>, tensor<6x6xi32>) -> tensor<6x6xi32>
146+
%output4 = arith.constant dense<0> : tensor<6x6xi32>
147+
%4 = call @conv2d_ss(%sinput_CSR, %sfilter_CSR, %output4)
148+
: (tensor<10x10xi32, #CSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32>
149+
%output5 = arith.constant dense<0> : tensor<6x6xi32>
150+
%5 = call @conv2d_bs(%sinput_DCSR, %sfilter_CSR, %output5)
151+
: (tensor<10x10xi32, #DCSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32>
152+
153+
// Verify the output.
154+
//
155+
// CHECK: ( ( -9700, -10025, -10350, -10675, -11000, -11325 ),
156+
// CHECK-SAME: ( -12950, -13275, -13600, -13925, -14250, -14575 ),
157+
// CHECK-SAME: ( -16200, -16525, -16850, -17175, -17500, -17825 ),
158+
// CHECK-SAME: ( -19450, -19775, -20100, -20425, -20750, -21075 ),
159+
// CHECK-SAME: ( -22700, -23025, -23350, -23675, -24000, -24325 ),
160+
// CHECK-SAME: ( -25950, -26275, -26600, -26925, -27250, -27575 ) )
161+
//
162+
// CHECK: ( ( -9700, -10025, -10350, -10675, -11000, -11325 ),
163+
// CHECK-SAME: ( -12950, -13275, -13600, -13925, -14250, -14575 ),
164+
// CHECK-SAME: ( -16200, -16525, -16850, -17175, -17500, -17825 ),
165+
// CHECK-SAME: ( -19450, -19775, -20100, -20425, -20750, -21075 ),
166+
// CHECK-SAME: ( -22700, -23025, -23350, -23675, -24000, -24325 ),
167+
// CHECK-SAME: ( -25950, -26275, -26600, -26925, -27250, -27575 ) )
168+
//
169+
// CHECK: ( ( -9700, -10025, -10350, -10675, -11000, -11325 ),
170+
// CHECK-SAME: ( -12950, -13275, -13600, -13925, -14250, -14575 ),
171+
// CHECK-SAME: ( -16200, -16525, -16850, -17175, -17500, -17825 ),
172+
// CHECK-SAME: ( -19450, -19775, -20100, -20425, -20750, -21075 ),
173+
// CHECK-SAME: ( -22700, -23025, -23350, -23675, -24000, -24325 ),
174+
// CHECK-SAME: ( -25950, -26275, -26600, -26925, -27250, -27575 ) )
175+
//
176+
// CHECK: ( ( -7, -2, -39, 0, -30, -42 ),
177+
// CHECK-SAME: ( -4, -10, 0, -77, 0, -40 ),
178+
// CHECK-SAME: ( 0, 0, 0, 0, 16, 0 ),
179+
// CHECK-SAME: ( 0, 0, 0, 0, 0, 64 ),
180+
// CHECK-SAME: ( 0, 0, 0, -12, 0, -6 ),
181+
// CHECK-SAME: ( -60, -27, -50, 0, -16, 0 ) )
182+
//
183+
// CHECK: ( ( -7, -2, -39, 0, -30, -42 ),
184+
// CHECK-SAME: ( -4, -10, 0, -77, 0, -40 ),
185+
// CHECK-SAME: ( 0, 0, 0, 0, 16, 0 ),
186+
// CHECK-SAME: ( 0, 0, 0, 0, 0, 64 ),
187+
// CHECK-SAME: ( 0, 0, 0, -12, 0, -6 ),
188+
// CHECK-SAME: ( -60, -27, -50, 0, -16, 0 ) )
189+
//
190+
// CHECK: ( ( -7, -2, -39, 0, -30, -42 ),
191+
// CHECK-SAME: ( -4, -10, 0, -77, 0, -40 ),
192+
// CHECK-SAME: ( 0, 0, 0, 0, 16, 0 ),
193+
// CHECK-SAME: ( 0, 0, 0, 0, 0, 64 ),
194+
// CHECK-SAME: ( 0, 0, 0, -12, 0, -6 ),
195+
// CHECK-SAME: ( -60, -27, -50, 0, -16, 0 ) )
196+
//
197+
%v0 = vector.transfer_read %0[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32>
198+
vector.print %v0 : vector<6x6xi32>
199+
%v1 = vector.transfer_read %1[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32>
200+
vector.print %v1 : vector<6x6xi32>
201+
%v2 = vector.transfer_read %2[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32>
202+
vector.print %v2 : vector<6x6xi32>
203+
%v3 = vector.transfer_read %3[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32>
204+
vector.print %v3 : vector<6x6xi32>
205+
%v4 = vector.transfer_read %4[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32>
206+
vector.print %v4 : vector<6x6xi32>
207+
%v5 = vector.transfer_read %5[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32>
208+
vector.print %v5 : vector<6x6xi32>
209+
210+
// Release sparse resources.
211+
bufferization.dealloc_tensor %input_CSR : tensor<10x10xi32, #CSR>
212+
bufferization.dealloc_tensor %input_DCSR : tensor<10x10xi32, #DCSR>
213+
bufferization.dealloc_tensor %filter_CSR : tensor<5x5xi32, #CSR>
214+
bufferization.dealloc_tensor %sinput_CSR : tensor<10x10xi32, #CSR>
215+
bufferization.dealloc_tensor %sinput_DCSR : tensor<10x10xi32, #DCSR>
216+
bufferization.dealloc_tensor %sfilter_CSR : tensor<5x5xi32, #CSR>
217+
218+
return
219+
}
220+
}

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nchw_fchw.mlir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
//
2626
// Do the same run, but now with direct IR generation and vectorization.
2727
// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
28-
2928
// RUN: %{compile} | %{run} | FileCheck %s
3029
//
3130
// Do the same run, but now with direct IR generation and VLA vectorization.

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
//
2626
// Do the same run, but now with direct IR generation and vectorization.
2727
// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
28-
2928
// RUN: %{compile} | %{run} | FileCheck %s
3029
//
3130
// Do the same run, but now with direct IR generation and VLA vectorization.

0 commit comments

Comments
 (0)