|
| 1 | +//-------------------------------------------------------------------------------------------------- |
| 2 | +// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. |
| 3 | +// |
| 4 | +// Set-up that's shared across all tests in this directory. In principle, this |
| 5 | +// config could be moved to lit.local.cfg. However, there are downstream users that |
| 6 | +// do not use these LIT config files. Hence why this is kept inline. |
| 7 | +// |
| 8 | +// DEFINE: %{sparsifier_opts} = enable-runtime-library=true |
| 9 | +// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} |
| 10 | +// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" |
| 11 | +// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" |
| 12 | +// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils |
| 13 | +// DEFINE: %{run_opts} = -e entry -entry-point-result=void |
| 14 | +// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} |
| 15 | +// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} |
| 16 | +// |
| 17 | +// DEFINE: %{env} = |
| 18 | +//-------------------------------------------------------------------------------------------------- |
| 19 | + |
| 20 | +// RUN: %{compile} | %{run} | FileCheck %s |
| 21 | +// |
| 22 | +// Do the same run, but now with direct IR generation. |
| 23 | +// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false |
| 24 | +// RUN: %{compile} | %{run} | FileCheck %s |
| 25 | +// |
| 26 | +// Do the same run, but now with direct IR generation and vectorization. |
| 27 | +// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true |
| 28 | +// |
| 29 | +// Do the same run, but now with direct IR generation and VLA vectorization. |
| 30 | +// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} |
| 31 | + |
| 32 | +#CSR = #sparse_tensor.encoding<{ |
| 33 | + map = (d0, d1) -> (d0 : dense, |
| 34 | + d1 : compressed) |
| 35 | +}> |
| 36 | + |
| 37 | +#DCSR = #sparse_tensor.encoding<{ |
| 38 | + map = (d0, d1) -> (d0 : compressed, |
| 39 | + d1 : compressed) |
| 40 | +}> |
| 41 | + |
| 42 | +// An example of a 2D convolution with sparse data and filter. |
| 43 | +module { |
| 44 | + func.func @conv2d(%input: tensor<10x10xi32>, |
| 45 | + %filter: tensor<5x5xi32>, |
| 46 | + %output: tensor<6x6xi32>) -> tensor<6x6xi32> { |
| 47 | + %0 = linalg.conv_2d |
| 48 | + ins (%input, %filter: tensor<10x10xi32>, tensor<5x5xi32>) |
| 49 | + outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> |
| 50 | + return %0 : tensor<6x6xi32> |
| 51 | + } |
| 52 | + |
| 53 | + func.func @conv2d_ss(%input: tensor<10x10xi32, #CSR>, |
| 54 | + %filter: tensor<5x5xi32, #CSR>, |
| 55 | + %output: tensor<6x6xi32>) -> tensor<6x6xi32> { |
| 56 | + %0 = linalg.conv_2d |
| 57 | + ins (%input, %filter: tensor<10x10xi32, #CSR>, tensor<5x5xi32, #CSR>) |
| 58 | + outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> |
| 59 | + return %0 : tensor<6x6xi32> |
| 60 | + } |
| 61 | + |
| 62 | + func.func @conv2d_bs(%input: tensor<10x10xi32, #DCSR>, |
| 63 | + %filter: tensor<5x5xi32, #CSR>, |
| 64 | + %output: tensor<6x6xi32>) -> tensor<6x6xi32> { |
| 65 | + %0 = linalg.conv_2d |
| 66 | + ins (%input, %filter: tensor<10x10xi32, #DCSR>, tensor<5x5xi32, #CSR>) |
| 67 | + outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> |
| 68 | + return %0 : tensor<6x6xi32> |
| 69 | + } |
| 70 | + |
| 71 | + func.func @entry() { |
| 72 | + %c0 = arith.constant 0 : index |
| 73 | + %i0 = arith.constant 0 : i32 |
| 74 | + |
| 75 | + // Dense filter and input to "stress" test sparsity. |
| 76 | + |
| 77 | + %filter = arith.constant dense<[ |
| 78 | + [ -1, -2, -3, -4, -5 ], |
| 79 | + [ -6, -7, -8, -9, -10 ], |
| 80 | + [ -11, -12, -13, -14, -15 ], |
| 81 | + [ -16, -17, -18, -19, -20 ], |
| 82 | + [ -21, -22, -23, -24, -25 ] |
| 83 | + ]> : tensor<5x5xi32> |
| 84 | + |
| 85 | + %input = arith.constant dense<[ |
| 86 | + [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], |
| 87 | + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ], |
| 88 | + [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 ], |
| 89 | + [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], |
| 90 | + [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], |
| 91 | + [ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 ], |
| 92 | + [ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69 ], |
| 93 | + [ 70, 71, 72, 73, 74, 75, 76, 77, 78, 79 ], |
| 94 | + [ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89 ], |
| 95 | + [ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ] |
| 96 | + ]> : tensor<10x10xi32> |
| 97 | + |
| 98 | + // Sparse filter and input to test true sparsity. |
| 99 | + |
| 100 | + %sfilter = arith.constant dense<[ |
| 101 | + [ 0, -1, 0, -2, 0 ], |
| 102 | + [ 0, 0, 0, 0, 0 ], |
| 103 | + [ 0, 0, 8, 0, 0 ], |
| 104 | + [ -3, 0, 0, -4, 0 ], |
| 105 | + [ 0, 0, -5, 0, -6 ] |
| 106 | + ]> : tensor<5x5xi32> |
| 107 | + |
| 108 | + %sinput = arith.constant dense<[ |
| 109 | + [ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0 ], |
| 110 | + [ 0, 4, 0, 0, 5, 0, 0, 0, 0, 0 ], |
| 111 | + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], |
| 112 | + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], |
| 113 | + [ 0, 0, 0, 0, 0, 0, 6, 0, 0, 7 ], |
| 114 | + [ 0, 0, 0, 0, 0, 0, 0, 8, 0, 0 ], |
| 115 | + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], |
| 116 | + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], |
| 117 | + [ 0, 9, 0, 0, 0, 0, 0, 0, 0, 0 ], |
| 118 | + [ 0, 0, 0, 0, 10, 0, 0, 0, 0, 0 ] |
| 119 | + ]> : tensor<10x10xi32> |
| 120 | + |
| 121 | + // Set up sparse tensors. |
| 122 | + |
| 123 | + %input_CSR = sparse_tensor.convert %input : tensor<10x10xi32> to tensor<10x10xi32, #CSR> |
| 124 | + %input_DCSR = sparse_tensor.convert %input : tensor<10x10xi32> to tensor<10x10xi32, #DCSR> |
| 125 | + %filter_CSR = sparse_tensor.convert %filter : tensor<5x5xi32> to tensor<5x5xi32, #CSR> |
| 126 | + |
| 127 | + %sinput_CSR = sparse_tensor.convert %sinput : tensor<10x10xi32> to tensor<10x10xi32, #CSR> |
| 128 | + %sinput_DCSR = sparse_tensor.convert %sinput : tensor<10x10xi32> to tensor<10x10xi32, #DCSR> |
| 129 | + %sfilter_CSR = sparse_tensor.convert %sfilter : tensor<5x5xi32> to tensor<5x5xi32, #CSR> |
| 130 | + |
| 131 | + // Call the kernels with stress input. |
| 132 | + %output0 = arith.constant dense<0> : tensor<6x6xi32> |
| 133 | + %0 = call @conv2d(%input, %filter, %output0) |
| 134 | + : (tensor<10x10xi32>, tensor<5x5xi32>, tensor<6x6xi32>) -> tensor<6x6xi32> |
| 135 | + %output1 = arith.constant dense<0> : tensor<6x6xi32> |
| 136 | + %1 = call @conv2d_ss(%input_CSR, %filter_CSR, %output1) |
| 137 | + : (tensor<10x10xi32, #CSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32> |
| 138 | + %output2 = arith.constant dense<0> : tensor<6x6xi32> |
| 139 | + %2 = call @conv2d_bs(%input_DCSR, %filter_CSR, %output2) |
| 140 | + : (tensor<10x10xi32, #DCSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32> |
| 141 | + |
| 142 | + // Call the kernels with sparse input. |
| 143 | + %output3 = arith.constant dense<0> : tensor<6x6xi32> |
| 144 | + %3 = call @conv2d(%sinput, %sfilter, %output3) |
| 145 | + : (tensor<10x10xi32>, tensor<5x5xi32>, tensor<6x6xi32>) -> tensor<6x6xi32> |
| 146 | + %output4 = arith.constant dense<0> : tensor<6x6xi32> |
| 147 | + %4 = call @conv2d_ss(%sinput_CSR, %sfilter_CSR, %output4) |
| 148 | + : (tensor<10x10xi32, #CSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32> |
| 149 | + %output5 = arith.constant dense<0> : tensor<6x6xi32> |
| 150 | + %5 = call @conv2d_bs(%sinput_DCSR, %sfilter_CSR, %output5) |
| 151 | + : (tensor<10x10xi32, #DCSR>, tensor<5x5xi32, #CSR>, tensor<6x6xi32>) -> tensor<6x6xi32> |
| 152 | + |
| 153 | + // Verify the output. |
| 154 | + // |
| 155 | + // CHECK: ( ( -9700, -10025, -10350, -10675, -11000, -11325 ), |
| 156 | + // CHECK-SAME: ( -12950, -13275, -13600, -13925, -14250, -14575 ), |
| 157 | + // CHECK-SAME: ( -16200, -16525, -16850, -17175, -17500, -17825 ), |
| 158 | + // CHECK-SAME: ( -19450, -19775, -20100, -20425, -20750, -21075 ), |
| 159 | + // CHECK-SAME: ( -22700, -23025, -23350, -23675, -24000, -24325 ), |
| 160 | + // CHECK-SAME: ( -25950, -26275, -26600, -26925, -27250, -27575 ) ) |
| 161 | + // |
| 162 | + // CHECK: ( ( -9700, -10025, -10350, -10675, -11000, -11325 ), |
| 163 | + // CHECK-SAME: ( -12950, -13275, -13600, -13925, -14250, -14575 ), |
| 164 | + // CHECK-SAME: ( -16200, -16525, -16850, -17175, -17500, -17825 ), |
| 165 | + // CHECK-SAME: ( -19450, -19775, -20100, -20425, -20750, -21075 ), |
| 166 | + // CHECK-SAME: ( -22700, -23025, -23350, -23675, -24000, -24325 ), |
| 167 | + // CHECK-SAME: ( -25950, -26275, -26600, -26925, -27250, -27575 ) ) |
| 168 | + // |
| 169 | + // CHECK: ( ( -9700, -10025, -10350, -10675, -11000, -11325 ), |
| 170 | + // CHECK-SAME: ( -12950, -13275, -13600, -13925, -14250, -14575 ), |
| 171 | + // CHECK-SAME: ( -16200, -16525, -16850, -17175, -17500, -17825 ), |
| 172 | + // CHECK-SAME: ( -19450, -19775, -20100, -20425, -20750, -21075 ), |
| 173 | + // CHECK-SAME: ( -22700, -23025, -23350, -23675, -24000, -24325 ), |
| 174 | + // CHECK-SAME: ( -25950, -26275, -26600, -26925, -27250, -27575 ) ) |
| 175 | + // |
| 176 | + // CHECK: ( ( -7, -2, -39, 0, -30, -42 ), |
| 177 | + // CHECK-SAME: ( -4, -10, 0, -77, 0, -40 ), |
| 178 | + // CHECK-SAME: ( 0, 0, 0, 0, 16, 0 ), |
| 179 | + // CHECK-SAME: ( 0, 0, 0, 0, 0, 64 ), |
| 180 | + // CHECK-SAME: ( 0, 0, 0, -12, 0, -6 ), |
| 181 | + // CHECK-SAME: ( -60, -27, -50, 0, -16, 0 ) ) |
| 182 | + // |
| 183 | + // CHECK: ( ( -7, -2, -39, 0, -30, -42 ), |
| 184 | + // CHECK-SAME: ( -4, -10, 0, -77, 0, -40 ), |
| 185 | + // CHECK-SAME: ( 0, 0, 0, 0, 16, 0 ), |
| 186 | + // CHECK-SAME: ( 0, 0, 0, 0, 0, 64 ), |
| 187 | + // CHECK-SAME: ( 0, 0, 0, -12, 0, -6 ), |
| 188 | + // CHECK-SAME: ( -60, -27, -50, 0, -16, 0 ) ) |
| 189 | + // |
| 190 | + // CHECK: ( ( -7, -2, -39, 0, -30, -42 ), |
| 191 | + // CHECK-SAME: ( -4, -10, 0, -77, 0, -40 ), |
| 192 | + // CHECK-SAME: ( 0, 0, 0, 0, 16, 0 ), |
| 193 | + // CHECK-SAME: ( 0, 0, 0, 0, 0, 64 ), |
| 194 | + // CHECK-SAME: ( 0, 0, 0, -12, 0, -6 ), |
| 195 | + // CHECK-SAME: ( -60, -27, -50, 0, -16, 0 ) ) |
| 196 | + // |
| 197 | + %v0 = vector.transfer_read %0[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32> |
| 198 | + vector.print %v0 : vector<6x6xi32> |
| 199 | + %v1 = vector.transfer_read %1[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32> |
| 200 | + vector.print %v1 : vector<6x6xi32> |
| 201 | + %v2 = vector.transfer_read %2[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32> |
| 202 | + vector.print %v2 : vector<6x6xi32> |
| 203 | + %v3 = vector.transfer_read %3[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32> |
| 204 | + vector.print %v3 : vector<6x6xi32> |
| 205 | + %v4 = vector.transfer_read %4[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32> |
| 206 | + vector.print %v4 : vector<6x6xi32> |
| 207 | + %v5 = vector.transfer_read %5[%c0, %c0], %i0 : tensor<6x6xi32>, vector<6x6xi32> |
| 208 | + vector.print %v5 : vector<6x6xi32> |
| 209 | + |
| 210 | + // Release sparse resources. |
| 211 | + bufferization.dealloc_tensor %input_CSR : tensor<10x10xi32, #CSR> |
| 212 | + bufferization.dealloc_tensor %input_DCSR : tensor<10x10xi32, #DCSR> |
| 213 | + bufferization.dealloc_tensor %filter_CSR : tensor<5x5xi32, #CSR> |
| 214 | + bufferization.dealloc_tensor %sinput_CSR : tensor<10x10xi32, #CSR> |
| 215 | + bufferization.dealloc_tensor %sinput_DCSR : tensor<10x10xi32, #DCSR> |
| 216 | + bufferization.dealloc_tensor %sfilter_CSR : tensor<5x5xi32, #CSR> |
| 217 | + |
| 218 | + return |
| 219 | + } |
| 220 | +} |
0 commit comments