|
4 | 4 |
|
5 | 5 | declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
|
6 | 6 | declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
|
| 7 | +declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone |
| 8 | +declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone |
| 9 | + |
7 | 10 |
|
8 | 11 | declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
9 | 12 | declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
| 13 | +declare { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8>, <4 x i8>) nounwind readnone |
| 14 | +declare { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8>) nounwind readnone |
10 | 15 |
|
11 | 16 | ; fold (ssub x, 0) -> x
|
12 | 17 | define i32 @combine_ssub_zero(i32 %a0, i32 %a1) {
|
@@ -148,3 +153,79 @@ define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {
|
148 | 153 | %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
|
149 | 154 | ret <4 x i32> %4
|
150 | 155 | }
|
| 156 | + |
| 157 | +define { i32, i1 } @combine_usub_nuw(i32 %a, i32 %b) { |
| 158 | +; CHECK-LABEL: combine_usub_nuw: |
| 159 | +; CHECK: # %bb.0: |
| 160 | +; CHECK-NEXT: movl %edi, %eax |
| 161 | +; CHECK-NEXT: orl $-2147483648, %eax # imm = 0x80000000 |
| 162 | +; CHECK-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF |
| 163 | +; CHECK-NEXT: subl %esi, %eax |
| 164 | +; CHECK-NEXT: xorl %edx, %edx |
| 165 | +; CHECK-NEXT: retq |
| 166 | + %aa = or i32 %a, 2147483648 |
| 167 | + %bb = and i32 %b, 2147483647 |
| 168 | + %x = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %aa, i32 %bb) |
| 169 | + ret { i32, i1 } %x |
| 170 | +} |
| 171 | + |
| 172 | +define { i8, i1 } @usub_always_overflow(i8 %x) nounwind { |
| 173 | +; CHECK-LABEL: usub_always_overflow: |
| 174 | +; CHECK: # %bb.0: |
| 175 | +; CHECK-NEXT: orb $64, %dil |
| 176 | +; CHECK-NEXT: movb $63, %al |
| 177 | +; CHECK-NEXT: subb %dil, %al |
| 178 | +; CHECK-NEXT: setb %dl |
| 179 | +; CHECK-NEXT: retq |
| 180 | + %y = or i8 %x, 64 |
| 181 | + %a = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 63, i8 %y) |
| 182 | + ret { i8, i1 } %a |
| 183 | +} |
| 184 | + |
| 185 | +define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind { |
| 186 | +; CHECK-LABEL: ssub_always_overflow: |
| 187 | +; CHECK: # %bb.0: |
| 188 | +; CHECK-NEXT: cmpb $30, %dil |
| 189 | +; CHECK-NEXT: movl $29, %ecx |
| 190 | +; CHECK-NEXT: cmovgel %edi, %ecx |
| 191 | +; CHECK-NEXT: movb $-100, %al |
| 192 | +; CHECK-NEXT: subb %cl, %al |
| 193 | +; CHECK-NEXT: seto %dl |
| 194 | +; CHECK-NEXT: retq |
| 195 | + %c = icmp sgt i8 %x, 29 |
| 196 | + %y = select i1 %c, i8 %x, i8 29 |
| 197 | + %a = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 -100, i8 %y) |
| 198 | + ret { i8, i1 } %a |
| 199 | +} |
| 200 | + |
| 201 | +define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind { |
| 202 | +; SSE-LABEL: always_usub_const_vector: |
| 203 | +; SSE: # %bb.0: |
| 204 | +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 |
| 205 | +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 |
| 206 | +; SSE-NEXT: retq |
| 207 | +; |
| 208 | +; AVX-LABEL: always_usub_const_vector: |
| 209 | +; AVX: # %bb.0: |
| 210 | +; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| 211 | +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| 212 | +; AVX-NEXT: retq |
| 213 | + %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 0, i8 0, i8 0, i8 0>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>) |
| 214 | + ret { <4 x i8>, <4 x i1> } %x |
| 215 | +} |
| 216 | + |
| 217 | +define { <4 x i8>, <4 x i1> } @never_usub_const_vector() nounwind { |
| 218 | +; SSE-LABEL: never_usub_const_vector: |
| 219 | +; SSE: # %bb.0: |
| 220 | +; SSE-NEXT: movaps {{.*#+}} xmm0 = <127,255,0,254,u,u,u,u,u,u,u,u,u,u,u,u> |
| 221 | +; SSE-NEXT: xorps %xmm1, %xmm1 |
| 222 | +; SSE-NEXT: retq |
| 223 | +; |
| 224 | +; AVX-LABEL: never_usub_const_vector: |
| 225 | +; AVX: # %bb.0: |
| 226 | +; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [127,255,0,254,127,255,0,254,127,255,0,254,127,255,0,254] |
| 227 | +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| 228 | +; AVX-NEXT: retq |
| 229 | + %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 255, i8 255, i8 255, i8 255>, <4 x i8> <i8 128, i8 0, i8 255, i8 1>) |
| 230 | + ret { <4 x i8>, <4 x i1> } %x |
| 231 | +} |
0 commit comments