Skip to content

Commit 079924b

Browse files
committed
Revert r363802, r363850, and r363856 "[TargetLowering] SimplifyDemandedBits..."
This reverts the following patches. "[TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VECTOR_INREG" "[TargetLowering] SimplifyDemandedBits ZERO_EXTEND_VECTOR_INREG -> ANY_EXTEND_VECTOR_INREG" "[TargetLowering] SimplifyDemandedBits - add ANY_EXTEND_VECTOR_INREG support" We can end up with an any_extend_vector_inreg with a 256 bit result type and a 128 bit result type. This is allowed by the ISD opcode, but the generic operation legalizer is only able to expand cases where the total vector width is the same. The X86 backend creates these mismatched cases for zext_vec_inreg/sext_vec_inreg. The SimplifyDemandedBits changes are allowing those nodes to become aext_vec_inreg. For the zext/sext cases, the X86 backend has Custom handling and never lets them get to the generic legalizer. We need to do the same for aext_vec_inreg. llvm-svn: 364264
1 parent 81eb828 commit 079924b

File tree

6 files changed

+217
-192
lines changed

6 files changed

+217
-192
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,11 +1381,18 @@ bool TargetLowering::SimplifyDemandedBits(
13811381
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
13821382

13831383
// If none of the top bits are demanded, convert this into an any_extend.
1384-
if (DemandedBits.getActiveBits() <= InBits)
1385-
return TLO.CombineTo(
1386-
Op, TLO.DAG.getNode(IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG
1387-
: ISD::ANY_EXTEND,
1388-
dl, VT, Src));
1384+
// TODO: Add ZERO_EXTEND_VECTOR_INREG - ANY_EXTEND_VECTOR_INREG fold.
1385+
if (DemandedBits.getActiveBits() <= InBits) {
1386+
// If we only need the non-extended bits of the bottom element
1387+
// then we can just bitcast to the result.
1388+
if (IsVecInReg && DemandedElts == 1 &&
1389+
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1390+
TLO.DAG.getDataLayout().isLittleEndian())
1391+
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1392+
1393+
if (!IsVecInReg)
1394+
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
1395+
}
13891396

13901397
APInt InDemandedBits = DemandedBits.trunc(InBits);
13911398
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
@@ -1406,11 +1413,9 @@ bool TargetLowering::SimplifyDemandedBits(
14061413
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
14071414

14081415
// If none of the top bits are demanded, convert this into an any_extend.
1409-
if (DemandedBits.getActiveBits() <= InBits)
1410-
return TLO.CombineTo(
1411-
Op, TLO.DAG.getNode(IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG
1412-
: ISD::ANY_EXTEND,
1413-
dl, VT, Src));
1416+
// TODO: Add SIGN_EXTEND_VECTOR_INREG - ANY_EXTEND_VECTOR_INREG fold.
1417+
if (DemandedBits.getActiveBits() <= InBits && !IsVecInReg)
1418+
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
14141419

14151420
APInt InDemandedBits = DemandedBits.trunc(InBits);
14161421
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
@@ -1429,28 +1434,17 @@ bool TargetLowering::SimplifyDemandedBits(
14291434
Known = Known.sext(BitWidth);
14301435

14311436
// If the sign bit is known zero, convert this to a zero extend.
1432-
if (Known.isNonNegative())
1433-
return TLO.CombineTo(
1434-
Op, TLO.DAG.getNode(IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG
1435-
: ISD::ZERO_EXTEND,
1436-
dl, VT, Src));
1437+
// TODO: Add SIGN_EXTEND_VECTOR_INREG - ZERO_EXTEND_VECTOR_INREG fold.
1438+
if (Known.isNonNegative() && !IsVecInReg)
1439+
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
14371440
break;
14381441
}
1439-
case ISD::ANY_EXTEND:
1440-
case ISD::ANY_EXTEND_VECTOR_INREG: {
1442+
case ISD::ANY_EXTEND: {
1443+
// TODO: Add ISD::ANY_EXTEND_VECTOR_INREG support.
14411444
SDValue Src = Op.getOperand(0);
14421445
EVT SrcVT = Src.getValueType();
14431446
unsigned InBits = SrcVT.getScalarSizeInBits();
14441447
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1445-
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1446-
1447-
// If we only need the bottom element then we can just bitcast.
1448-
// TODO: Handle ANY_EXTEND?
1449-
if (IsVecInReg && DemandedElts == 1 &&
1450-
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1451-
TLO.DAG.getDataLayout().isLittleEndian())
1452-
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1453-
14541448
APInt InDemandedBits = DemandedBits.trunc(InBits);
14551449
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
14561450
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,

llvm/test/CodeGen/X86/combine-pmuldq.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,18 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
9191
define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) {
9292
; SSE-LABEL: combine_zext_pmuludq_256:
9393
; SSE: # %bb.0:
94-
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,2,3,3]
94+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
95+
; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
96+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
97+
; SSE-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
9598
; SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
96-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
9799
; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
98-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [715827883,715827883]
99-
; SSE-NEXT: pmuludq %xmm4, %xmm0
100-
; SSE-NEXT: pmuludq %xmm4, %xmm1
101-
; SSE-NEXT: pmuludq %xmm4, %xmm2
102-
; SSE-NEXT: pmuludq %xmm4, %xmm3
100+
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883]
101+
; SSE-NEXT: pmuludq %xmm1, %xmm0
102+
; SSE-NEXT: pmuludq %xmm1, %xmm2
103+
; SSE-NEXT: pmuludq %xmm1, %xmm4
104+
; SSE-NEXT: pmuludq %xmm1, %xmm3
105+
; SSE-NEXT: movdqa %xmm4, %xmm1
103106
; SSE-NEXT: retq
104107
;
105108
; AVX2-LABEL: combine_zext_pmuludq_256:

llvm/test/CodeGen/X86/pmul.ll

Lines changed: 60 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,24 +1016,30 @@ entry:
10161016
define <4 x i32> @mul_v4i64_zero_upper(<4 x i32> %val1, <4 x i32> %val2) {
10171017
; SSE2-LABEL: mul_v4i64_zero_upper:
10181018
; SSE2: # %bb.0: # %entry
1019-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
1020-
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
1021-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1022-
; SSE2-NEXT: pmuludq %xmm2, %xmm0
1023-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1024-
; SSE2-NEXT: pmuludq %xmm3, %xmm1
1025-
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
1019+
; SSE2-NEXT: pxor %xmm3, %xmm3
1020+
; SSE2-NEXT: movdqa %xmm0, %xmm2
1021+
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1022+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
1023+
; SSE2-NEXT: movdqa %xmm1, %xmm4
1024+
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
1025+
; SSE2-NEXT: pmuludq %xmm4, %xmm2
1026+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1027+
; SSE2-NEXT: pmuludq %xmm0, %xmm1
1028+
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
1029+
; SSE2-NEXT: movaps %xmm2, %xmm0
10261030
; SSE2-NEXT: retq
10271031
;
10281032
; SSE41-LABEL: mul_v4i64_zero_upper:
10291033
; SSE41: # %bb.0: # %entry
1030-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
1031-
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,2,3,3]
1034+
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
1035+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
1036+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
1037+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1038+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
1039+
; SSE41-NEXT: pmuludq %xmm2, %xmm4
10321040
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1033-
; SSE41-NEXT: pmuludq %xmm2, %xmm0
1034-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1035-
; SSE41-NEXT: pmuludq %xmm3, %xmm1
1036-
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
1041+
; SSE41-NEXT: pmuludq %xmm3, %xmm0
1042+
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
10371043
; SSE41-NEXT: retq
10381044
;
10391045
; AVX-LABEL: mul_v4i64_zero_upper:
@@ -1165,37 +1171,48 @@ entry:
11651171
define <8 x i32> @mul_v8i64_zero_upper(<8 x i32> %val1, <8 x i32> %val2) {
11661172
; SSE2-LABEL: mul_v8i64_zero_upper:
11671173
; SSE2: # %bb.0: # %entry
1168-
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,1,3]
1169-
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,1,3,3]
1170-
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,1,1,3]
1171-
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,1,3,3]
1172-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
1173-
; SSE2-NEXT: pmuludq %xmm4, %xmm0
1174-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
1175-
; SSE2-NEXT: pmuludq %xmm5, %xmm1
1176-
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
1177-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,1,1,3]
1178-
; SSE2-NEXT: pmuludq %xmm6, %xmm1
1179-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,3,3]
1180-
; SSE2-NEXT: pmuludq %xmm7, %xmm2
1181-
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
1174+
; SSE2-NEXT: pxor %xmm6, %xmm6
1175+
; SSE2-NEXT: movdqa %xmm0, %xmm4
1176+
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
1177+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm6[2],xmm0[3],xmm6[3]
1178+
; SSE2-NEXT: movdqa %xmm1, %xmm5
1179+
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
1180+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
1181+
; SSE2-NEXT: movdqa %xmm2, %xmm7
1182+
; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
1183+
; SSE2-NEXT: pmuludq %xmm7, %xmm4
1184+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm6[2],xmm2[3],xmm6[3]
1185+
; SSE2-NEXT: pmuludq %xmm0, %xmm2
1186+
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm2[1,3]
1187+
; SSE2-NEXT: movdqa %xmm3, %xmm0
1188+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
1189+
; SSE2-NEXT: pmuludq %xmm0, %xmm5
1190+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm6[2],xmm3[3],xmm6[3]
1191+
; SSE2-NEXT: pmuludq %xmm1, %xmm3
1192+
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm3[1,3]
1193+
; SSE2-NEXT: movaps %xmm4, %xmm0
1194+
; SSE2-NEXT: movaps %xmm5, %xmm1
11821195
; SSE2-NEXT: retq
11831196
;
11841197
; SSE41-LABEL: mul_v8i64_zero_upper:
11851198
; SSE41: # %bb.0: # %entry
1186-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
1187-
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,2,3,3]
1188-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = xmm1[0],zero,xmm1[1],zero
1189-
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,2,3,3]
1199+
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
1200+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
1201+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
1202+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1203+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero
1204+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm7 = xmm1[0],zero,xmm1[1],zero
1205+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1206+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1207+
; SSE41-NEXT: pmuludq %xmm4, %xmm1
11901208
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
1191-
; SSE41-NEXT: pmuludq %xmm4, %xmm0
1192-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
1193-
; SSE41-NEXT: pmuludq %xmm5, %xmm1
1209+
; SSE41-NEXT: pmuludq %xmm5, %xmm0
11941210
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
1211+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1212+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
1213+
; SSE41-NEXT: pmuludq %xmm6, %xmm2
11951214
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
1196-
; SSE41-NEXT: pmuludq %xmm6, %xmm1
1197-
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,3,3]
1198-
; SSE41-NEXT: pmuludq %xmm7, %xmm2
1215+
; SSE41-NEXT: pmuludq %xmm7, %xmm1
11991216
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
12001217
; SSE41-NEXT: retq
12011218
;
@@ -1309,13 +1326,15 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
13091326
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
13101327
; SSE41-NEXT: pmovsxwq %xmm3, %xmm6
13111328
; SSE41-NEXT: pmovsxwq %xmm0, %xmm7
1312-
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,2,3,3]
1329+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1330+
; SSE41-NEXT: pmovsxdq %xmm0, %xmm3
13131331
; SSE41-NEXT: pmuldq %xmm4, %xmm3
1314-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
1332+
; SSE41-NEXT: pmovsxdq %xmm2, %xmm2
13151333
; SSE41-NEXT: pmuldq %xmm5, %xmm2
1316-
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,2,3,3]
1334+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1335+
; SSE41-NEXT: pmovsxdq %xmm0, %xmm4
13171336
; SSE41-NEXT: pmuldq %xmm6, %xmm4
1318-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1337+
; SSE41-NEXT: pmovsxdq %xmm1, %xmm0
13191338
; SSE41-NEXT: pmuldq %xmm7, %xmm0
13201339
; SSE41-NEXT: movdqa %xmm4, %xmm1
13211340
; SSE41-NEXT: retq

0 commit comments

Comments
 (0)