Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 52d7ce6

Browse files
committed
[X86] Move the promotion of v16i16->v16i8 for avx512f but not avx512bw from lowering to isel. Change to use vpmovzx instead of vpmovsx.
With avx512f but not avx512bw we need to extend to v16i32 then truncate that to to v16i8. Previously we emitted both nodes during lowering, but I'm trying to switch to using target independent nodes and with that switched the extend+truncate wou This patch changes the implementation to what will be necessary with that patch which helps minimize test diffs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346552 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 3593f97 commit 52d7ce6

14 files changed

+156
-146
lines changed

lib/Target/X86/X86ISelLowering.cpp

+6-8
Original file line numberDiff line numberDiff line change
@@ -17789,15 +17789,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
1778917789

1779017790
// vpmovqb/w/d, vpmovdb/w, vpmovwb
1779117791
if (Subtarget.hasAVX512()) {
17792-
// word to byte only under BWI
17793-
if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) { // v16i16 -> v16i8
17794-
// Make sure we're allowed to promote 512-bits.
17795-
if (Subtarget.canExtendTo512DQ())
17796-
return DAG.getNode(ISD::TRUNCATE, DL, VT,
17797-
DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
17798-
} else {
17792+
// word to byte only under BWI. Otherwise we have to promoted to v16i32
17793+
// and then truncate that. But we should only do that if we haven't been
17794+
// asked to avoid 512-bit vectors. The actual promotion to v16i32 will be
17795+
// handled by isel patterns.
17796+
if (InVT != MVT::v16i16 || Subtarget.hasBWI() ||
17797+
Subtarget.canExtendTo512DQ())
1779917798
return Op;
17800-
}
1780117799
}
1780217800

1780317801
unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16);

lib/Target/X86/X86InstrAVX512.td

+12
Original file line numberDiff line numberDiff line change
@@ -9774,6 +9774,18 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
97749774
defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
97759775
defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
97769776

9777+
// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9778+
// ext+trunc aggresively making it impossible to legalize the DAG to this
9779+
// pattern directly.
9780+
let Predicates = [HasAVX512, NoBWI] in {
9781+
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9782+
(VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9783+
def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
9784+
(VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9785+
def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
9786+
(VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9787+
}
9788+
97779789
//===----------------------------------------------------------------------===//
97789790
// GATHER - SCATTER Operations
97799791

test/CodeGen/X86/avx512-trunc.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -440,9 +440,9 @@ define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
440440
define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
441441
; KNL-LABEL: trunc_wb_512:
442442
; KNL: ## %bb.0:
443-
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
443+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
444444
; KNL-NEXT: vpmovdb %zmm0, %xmm0
445-
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
445+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
446446
; KNL-NEXT: vpmovdb %zmm1, %xmm1
447447
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
448448
; KNL-NEXT: retq
@@ -458,9 +458,9 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
458458
define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
459459
; KNL-LABEL: trunc_wb_512_mem:
460460
; KNL: ## %bb.0:
461-
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
461+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
462462
; KNL-NEXT: vpmovdb %zmm0, %xmm0
463-
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
463+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
464464
; KNL-NEXT: vpmovdb %zmm1, %xmm1
465465
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
466466
; KNL-NEXT: vmovdqa %ymm0, (%rdi)
@@ -480,7 +480,7 @@ define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
480480
define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
481481
; KNL-LABEL: trunc_wb_256:
482482
; KNL: ## %bb.0:
483-
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
483+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
484484
; KNL-NEXT: vpmovdb %zmm0, %xmm0
485485
; KNL-NEXT: vzeroupper
486486
; KNL-NEXT: retq
@@ -562,7 +562,7 @@ define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
562562
; KNL-LABEL: usat_trunc_wb_256:
563563
; KNL: ## %bb.0:
564564
; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
565-
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
565+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
566566
; KNL-NEXT: vpmovdb %zmm0, %xmm0
567567
; KNL-NEXT: vzeroupper
568568
; KNL-NEXT: retq
@@ -836,7 +836,7 @@ define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
836836
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
837837
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
838838
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
839-
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
839+
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
840840
; KNL-NEXT: vpmovdb %zmm0, %xmm0
841841
; KNL-NEXT: vzeroupper
842842
; KNL-NEXT: retq

test/CodeGen/X86/pmul.ll

+14-14
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind {
5050
; AVX512F: # %bb.0: # %entry
5151
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
5252
; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
53-
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
53+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
5454
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
5555
; AVX512F-NEXT: vzeroupper
5656
; AVX512F-NEXT: retq
@@ -186,7 +186,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind {
186186
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1
187187
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
188188
; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
189-
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
189+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
190190
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
191191
; AVX512F-NEXT: vzeroupper
192192
; AVX512F-NEXT: retq
@@ -443,12 +443,12 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
443443
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1
444444
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
445445
; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1
446-
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
446+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
447447
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
448448
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
449449
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
450450
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0
451-
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
451+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
452452
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
453453
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
454454
; AVX512F-NEXT: retq
@@ -628,14 +628,14 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind {
628628
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm2
629629
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm3
630630
; AVX512F-NEXT: vpmullw %ymm2, %ymm3, %ymm2
631-
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
631+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
632632
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
633633
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
634634
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1
635635
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
636636
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
637637
; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
638-
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
638+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
639639
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
640640
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
641641
; AVX512F-NEXT: retq
@@ -859,22 +859,22 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
859859
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2
860860
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
861861
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
862-
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
862+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
863863
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
864864
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
865865
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
866866
; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0
867-
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
867+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
868868
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
869869
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
870870
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm2
871871
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
872-
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
872+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
873873
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
874874
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
875875
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1
876876
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
877-
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
877+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
878878
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
879879
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
880880
; AVX512F-NEXT: retq
@@ -1035,27 +1035,27 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
10351035
; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm4
10361036
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm5
10371037
; AVX512F-NEXT: vpmullw %ymm4, %ymm5, %ymm4
1038-
; AVX512F-NEXT: vpmovsxwd %ymm4, %zmm4
1038+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
10391039
; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
10401040
; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2
10411041
; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm2
10421042
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
10431043
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
10441044
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0
1045-
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1045+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
10461046
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
10471047
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm0
10481048
; AVX512F-NEXT: vpmovsxbw %xmm3, %ymm2
10491049
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm4
10501050
; AVX512F-NEXT: vpmullw %ymm2, %ymm4, %ymm2
1051-
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
1051+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
10521052
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
10531053
; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm3
10541054
; AVX512F-NEXT: vpmovsxbw %xmm3, %ymm3
10551055
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
10561056
; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1
10571057
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
1058-
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
1058+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
10591059
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
10601060
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
10611061
; AVX512F-NEXT: retq

test/CodeGen/X86/prefer-avx256-trunc.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define <16 x i8> @testv16i16_trunc_v16i8(<16 x i16> %x) {
2121
;
2222
; AVX512NOBW-LABEL: testv16i16_trunc_v16i8:
2323
; AVX512NOBW: # %bb.0:
24-
; AVX512NOBW-NEXT: vpmovsxwd %ymm0, %zmm0
24+
; AVX512NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
2525
; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0
2626
; AVX512NOBW-NEXT: vzeroupper
2727
; AVX512NOBW-NEXT: retq

test/CodeGen/X86/shuffle-vs-trunc-512.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,9 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
8989
define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
9090
; AVX512F-LABEL: trunc_v32i16_to_v32i8:
9191
; AVX512F: # %bb.0:
92-
; AVX512F-NEXT: vpmovsxwd (%rdi), %zmm0
92+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
9393
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
94-
; AVX512F-NEXT: vpmovsxwd 32(%rdi), %zmm1
94+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
9595
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
9696
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
9797
; AVX512F-NEXT: vmovdqa %ymm0, (%rsi)
@@ -100,9 +100,9 @@ define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
100100
;
101101
; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
102102
; AVX512VL: # %bb.0:
103-
; AVX512VL-NEXT: vpmovsxwd (%rdi), %zmm0
103+
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
104104
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
105-
; AVX512VL-NEXT: vpmovsxwd 32(%rdi), %zmm1
105+
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
106106
; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
107107
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
108108
; AVX512VL-NEXT: vmovdqa %ymm0, (%rsi)

0 commit comments

Comments
 (0)