diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c146e1e6c0334..8ca247e5e1b78 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9482,16 +9482,19 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; + SmallSet Undefs; for (unsigned i = 0; i != NumOperands; ++i) { SDValue SubVec = Op.getOperand(i); if (SubVec.isUndef()) continue; if (ISD::isFreezeUndef(SubVec.getNode())) { // If the freeze(undef) has multiple uses then we must fold to zero. - if (SubVec.hasOneUse()) + if (SubVec.hasOneUse()) { ++NumFreezeUndef; - else + } else { ++NumZero; + Undefs.insert(SubVec); + } } else if (ISD::isBuildVectorAllZeros(SubVec.getNode())) ++NumZero; @@ -9518,6 +9521,11 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT)) : DAG.getUNDEF(ResVT)); + // Replace Undef operands with ZeroVector. + for (SDValue U : Undefs) + DAG.ReplaceAllUsesWith( + U, getZeroVector(U.getSimpleValueType(), Subtarget, DAG, dl)); + MVT SubVT = Op.getOperand(0).getSimpleValueType(); unsigned NumSubElems = SubVT.getVectorNumElements(); for (unsigned i = 0; i != NumOperands; ++i) { diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll index 44ab33ad67f27..70b3b99b46ce9 100644 --- a/llvm/test/CodeGen/X86/avx2-arith.ll +++ b/llvm/test/CodeGen/X86/avx2-arith.ll @@ -260,3 +260,31 @@ define <4 x i32> @mul_const11(<4 x i32> %x) { %m = mul <4 x i32> %x, ret <4 x i32> %m } + +; check we will zero both vectors. +define void @multi_freeze(<2 x double> %x, <2 x double> %y) nounwind { +; X86-LABEL: multi_freeze: +; X86: # %bb.0: +; X86-NEXT: vmovaps %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm1, %xmm1 +; X86-NEXT: calll foo@PLT +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; +; X64-LABEL: multi_freeze: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: vmovaps %xmm0, %xmm0 +; X64-NEXT: vmovaps %xmm1, %xmm1 +; X64-NEXT: callq foo@PLT +; X64-NEXT: popq %rax +; X64-NEXT: vzeroupper +; X64-NEXT: retq + %1 = freeze <2 x double> poison + %2 = shufflevector <2 x double> %x, <2 x double> %1, <4 x i32> + %3 = shufflevector <2 x double> %y, <2 x double> %1, <4 x i32> + call void @foo(<4 x double> %2, <4 x double> %3) + ret void +} + +declare void @foo(<4 x double>, <4 x double>)