-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[DAG] replaceShuffleOfInsert - add support for shuffle_vector(scalar_to_vector(x),y) -> insert_vector_elt(y,x,c) #127210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…to_vector(x),y) -> insert_vector_elt(y,x,c) Begin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector. I've limited this to targets that just have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-powerpc Author: Simon Pilgrim (RKSimon) ChangesBegin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector. I've limited this to targets that have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations. Full diff: https://github.com/llvm/llvm-project/pull/127210.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c6fd72b6b76f4..82c4cbf793ee7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -626,6 +626,7 @@ namespace {
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
+ SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
@@ -26102,8 +26103,7 @@ static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
/// If a shuffle inserts exactly one element from a source vector operand into
/// another vector operand and we can access the specified element as a scalar,
/// then we can eliminate the shuffle.
-static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
- SelectionDAG &DAG) {
+SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
// First, check if we are taking one element of a vector and shuffling that
// element into another vector.
ArrayRef<int> Mask = Shuf->getMask();
@@ -26126,7 +26126,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// Now see if we can access that element as a scalar via a real insert element
// instruction.
// TODO: We can try harder to locate the element as a scalar. Examples: it
- // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+ // could be an operand of BUILD_VECTOR, or a constant.
assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
"Shuffle mask value must be from operand 0");
@@ -26149,6 +26149,16 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Elt, NewInsIndex);
}
+ if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
+ return SDValue();
+
+ if (sd_match(Op0, m_UnaryOp(ISD::SCALAR_TO_VECTOR, m_Value(Elt))) &&
+ Mask[ShufOp0Index] == 0) {
+ SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+ Op1, Elt, NewInsIndex);
+ }
+
return SDValue();
}
@@ -26220,7 +26230,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
- if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+ if (SDValue InsElt = replaceShuffleOfInsert(SVN))
return InsElt;
// A shuffle of a single vector that is a splatted value can always be folded.
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 402a4f34e62b2..d98b78dfdd3b0 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -239,13 +239,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: li r3, 0
-; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2
-; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
-; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1
+; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: xxperm v2, v2, vs0
; CHECK-LE-P9-NEXT: stxv v2, 0(r5)
; CHECK-LE-P9-NEXT: blr
;
@@ -263,14 +260,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: li r3, 0
-; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2
-; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
-; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-BE-P9-NEXT: xxperm vs0, v2, vs1
-; CHECK-BE-P9-NEXT: stxv vs0, 0(r5)
+; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, v2, vs0
+; CHECK-BE-P9-NEXT: stxv v2, 0(r5)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
@@ -286,13 +280,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: li r4, 0
-; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
-; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1
-; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -308,13 +299,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
-; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1
-; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxperm v2, v2, vs0
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = extractelement <2 x i32> %vec, i64 0
|
ping? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…to_vector(x),y) -> insert_vector_elt(y,x,c) (llvm#127210) Begin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector. I've limited this to targets that have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations.
Begin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector.
I've limited this to targets that have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations.