|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 |
| 2 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2,SSE2-LV |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -early-live-intervals | FileCheck %s --check-prefixes=CHECK,SSE2,SSE2-LIS |
3 | 4 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,XOP
|
4 | 5 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
|
5 | 6 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
|
@@ -114,30 +115,56 @@ define i32 @combine_rot_select_zero(i32, i32) {
|
114 | 115 | }
|
115 | 116 |
|
116 | 117 | define <4 x i32> @combine_vec_rot_select_zero(<4 x i32>, <4 x i32>) {
|
117 |
| -; SSE2-LABEL: combine_vec_rot_select_zero: |
118 |
| -; SSE2: # %bb.0: |
119 |
| -; SSE2-NEXT: pxor %xmm2, %xmm2 |
120 |
| -; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 |
121 |
| -; SSE2-NEXT: pslld $23, %xmm1 |
122 |
| -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
123 |
| -; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
124 |
| -; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 |
125 |
| -; SSE2-NEXT: movdqa %xmm0, %xmm3 |
126 |
| -; SSE2-NEXT: pmuludq %xmm1, %xmm3 |
127 |
| -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] |
128 |
| -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] |
129 |
| -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] |
130 |
| -; SSE2-NEXT: pmuludq %xmm5, %xmm1 |
131 |
| -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] |
132 |
| -; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] |
133 |
| -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] |
134 |
| -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] |
135 |
| -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] |
136 |
| -; SSE2-NEXT: por %xmm4, %xmm3 |
137 |
| -; SSE2-NEXT: pand %xmm2, %xmm0 |
138 |
| -; SSE2-NEXT: pandn %xmm3, %xmm2 |
139 |
| -; SSE2-NEXT: por %xmm2, %xmm0 |
140 |
| -; SSE2-NEXT: retq |
| 118 | +; SSE2-LV-LABEL: combine_vec_rot_select_zero: |
| 119 | +; SSE2-LV: # %bb.0: |
| 120 | +; SSE2-LV-NEXT: pxor %xmm2, %xmm2 |
| 121 | +; SSE2-LV-NEXT: pcmpeqd %xmm1, %xmm2 |
| 122 | +; SSE2-LV-NEXT: pslld $23, %xmm1 |
| 123 | +; SSE2-LV-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 124 | +; SSE2-LV-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 125 | +; SSE2-LV-NEXT: cvttps2dq %xmm1, %xmm1 |
| 126 | +; SSE2-LV-NEXT: movdqa %xmm0, %xmm3 |
| 127 | +; SSE2-LV-NEXT: pmuludq %xmm1, %xmm3 |
| 128 | +; SSE2-LV-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] |
| 129 | +; SSE2-LV-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] |
| 130 | +; SSE2-LV-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] |
| 131 | +; SSE2-LV-NEXT: pmuludq %xmm5, %xmm1 |
| 132 | +; SSE2-LV-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] |
| 133 | +; SSE2-LV-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] |
| 134 | +; SSE2-LV-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] |
| 135 | +; SSE2-LV-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] |
| 136 | +; SSE2-LV-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] |
| 137 | +; SSE2-LV-NEXT: por %xmm4, %xmm3 |
| 138 | +; SSE2-LV-NEXT: pand %xmm2, %xmm0 |
| 139 | +; SSE2-LV-NEXT: pandn %xmm3, %xmm2 |
| 140 | +; SSE2-LV-NEXT: por %xmm2, %xmm0 |
| 141 | +; SSE2-LV-NEXT: retq |
| 142 | +; |
| 143 | +; SSE2-LIS-LABEL: combine_vec_rot_select_zero: |
| 144 | +; SSE2-LIS: # %bb.0: |
| 145 | +; SSE2-LIS-NEXT: pxor %xmm2, %xmm2 |
| 146 | +; SSE2-LIS-NEXT: pcmpeqd %xmm1, %xmm2 |
| 147 | +; SSE2-LIS-NEXT: pslld $23, %xmm1 |
| 148 | +; SSE2-LIS-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 149 | +; SSE2-LIS-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 150 | +; SSE2-LIS-NEXT: cvttps2dq %xmm1, %xmm1 |
| 151 | +; SSE2-LIS-NEXT: movdqa %xmm0, %xmm3 |
| 152 | +; SSE2-LIS-NEXT: pmuludq %xmm1, %xmm3 |
| 153 | +; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] |
| 154 | +; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] |
| 155 | +; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] |
| 156 | +; SSE2-LIS-NEXT: pmuludq %xmm5, %xmm1 |
| 157 | +; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] |
| 158 | +; SSE2-LIS-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] |
| 159 | +; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] |
| 160 | +; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] |
| 161 | +; SSE2-LIS-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] |
| 162 | +; SSE2-LIS-NEXT: por %xmm4, %xmm3 |
| 163 | +; SSE2-LIS-NEXT: pand %xmm2, %xmm0 |
| 164 | +; SSE2-LIS-NEXT: pandn %xmm3, %xmm2 |
| 165 | +; SSE2-LIS-NEXT: por %xmm0, %xmm2 |
| 166 | +; SSE2-LIS-NEXT: movdqa %xmm2, %xmm0 |
| 167 | +; SSE2-LIS-NEXT: retq |
141 | 168 | ;
|
142 | 169 | ; XOP-LABEL: combine_vec_rot_select_zero:
|
143 | 170 | ; XOP: # %bb.0:
|
|
0 commit comments