-
Notifications
You must be signed in to change notification settings - Fork 24
[AIE2P] Support all 64-bit-destination G_BUILD_VECTOR in legalizer #467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: aie-public
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,11 +65,11 @@ isValidVectorMergeUnmergeOp(const unsigned BigVectorId, | |
}; | ||
} | ||
|
||
static LegalityPredicate isValidVectorAIEP(const unsigned TypeIdx) { | ||
static LegalityPredicate isValidVectorAIE2P(const unsigned TypeIdx) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is used for other opcodes as well, we should add tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like the name actually. We were excluding 64-bits as invalid AIE2P vectors when it was just a question of missing support. For instance, we have legal vector types for all those sizes from 32-bits and upwards. |
||
return [=](const LegalityQuery &Query) { | ||
const LLT DstTy = Query.Types[TypeIdx]; | ||
const unsigned DstSize = DstTy.getSizeInBits(); | ||
return DstTy.isVector() && (DstSize == 32 || DstSize > 64); | ||
return DstTy.isVector() && DstSize >= 32; | ||
}; | ||
} | ||
|
||
|
@@ -605,7 +605,7 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) | |
.customIf(typeInSet(0, {V2S8, S16})); | ||
|
||
const LegalityPredicate IsNotValidDestinationVector = | ||
negatePredicate(isValidVectorAIEP(0)); | ||
negatePredicate(isValidVectorAIE2P(0)); | ||
|
||
getActionDefinitionsBuilder(G_MERGE_VALUES).legalFor({{S64, S32}}); | ||
getActionDefinitionsBuilder(G_UNMERGE_VALUES) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -180,3 +180,40 @@ body: | | |
$bmll0 = COPY %1(<8 x s64>) | ||
PseudoRET implicit $lr, implicit $bmll0 | ||
... | ||
|
||
|
||
--- | ||
name: test_build_vector_v8s8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please include legalizer tests for 16 and 32 bit element type cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should already have the one for v232 since it is already legal before my change. I already added the test for 4x16 in the end-2-end test buildvector.ll but I can add it here as well if needed. |
||
stack: | ||
- {id: 0, name: "", type: default, offset: 0, size: 128, alignment: 32} | ||
body: | | ||
bb.1.entry: | ||
; CHECK-LABEL: name: test_build_vector_v8s8 | ||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF | ||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[DEF1]], [[DEF]](s32) | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI]], [[COPY]](s32) | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI1]], [[COPY1]](s32) | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI2]], [[COPY2]](s32) | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI3]], [[COPY3]](s32) | ||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI4]], [[COPY4]](s32) | ||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI5]], [[COPY5]](s32) | ||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI6]], [[COPY6]](s32) | ||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 | ||
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_ADD_VECTOR_ELT_HI7]](<64 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s64) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[BITCAST]](<8 x s64>), [[C]](s32) | ||
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s64) | ||
; CHECK-NEXT: $l0 = COPY [[BITCAST1]](<8 x s8>) | ||
; CHECK-NEXT: PseudoRET implicit $lr, implicit $l0 | ||
%2:_(s8) = G_IMPLICIT_DEF | ||
%1:_(<8 x s8>) = G_BUILD_VECTOR %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8) | ||
$l0 = COPY %1(<8 x s8>) | ||
PseudoRET implicit $lr, implicit $l0 | ||
... |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates | ||
# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates | ||
# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s | ||
|
||
--- | ||
|
@@ -24,3 +24,72 @@ body: | | |
%2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<16 x s32>), %1(<16 x s32>), shufflemask(14, 15) | ||
PseudoRET implicit $lr, implicit %2 | ||
... | ||
|
||
--- | ||
name: test_v8s8 | ||
body: | | ||
bb.0: | ||
|
||
; CHECK-LABEL: name: test_v8s8 | ||
; CHECK: [[COPY:%[0-9]+]]:_(<32 x s8>) = COPY $wh0 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s8>) = COPY $wh0 | ||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 | ||
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s8>) = G_IMPLICIT_DEF | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY2]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<64 x s8>), [[C]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8 | ||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY3]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS1]](<64 x s8>), [[C1]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 8 | ||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 | ||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY4]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS2]](<64 x s8>), [[C2]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT2:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT2]], 8 | ||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 | ||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY5]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS3]](<64 x s8>), [[C3]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT3:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT3]], 8 | ||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 | ||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY6]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS4]](<64 x s8>), [[C4]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT4:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT4]], 8 | ||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 | ||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY7]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS5]](<64 x s8>), [[C5]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT5:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT5]], 8 | ||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 | ||
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY8]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS6]](<64 x s8>), [[C6]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT6:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT6]], 8 | ||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 | ||
; CHECK-NEXT: [[CONCAT_VECTORS7:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[DEF]](<32 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS7]](<64 x s8>), [[C7]](s32) | ||
; CHECK-NEXT: [[ASSERT_SEXT7:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT7]], 8 | ||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[DEF1]], [[ASSERT_SEXT]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI]], [[ASSERT_SEXT1]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI1]], [[ASSERT_SEXT2]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI2]], [[ASSERT_SEXT3]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI3]], [[ASSERT_SEXT4]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI4]], [[ASSERT_SEXT5]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI5]], [[ASSERT_SEXT6]](s32) | ||
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI6]], [[ASSERT_SEXT7]](s32) | ||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 | ||
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_ADD_VECTOR_ELT_HI7]](<64 x s8>) | ||
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s64) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[BITCAST]](<8 x s64>), [[C8]](s32) | ||
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AIE_SEXT_EXTRACT_VECTOR_ELT8]](s64) | ||
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST1]](<8 x s8>) | ||
%0:_(<32 x s8>) = COPY $wh0 | ||
%143:_(<32 x s8>) = COPY $wh0 | ||
%2:_(<8 x s8>) = G_SHUFFLE_VECTOR %143:_(<32 x s8>), %143:_, shufflemask(24, 25, 26, 27, 28, 29, 30, 31) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe use a mask that cannot be optimized by the pre-legalizer combiner in a real scenario. For example There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes and we can keep both. We should be able to combine this later. |
||
PseudoRET implicit $lr, implicit %2 | ||
... | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,3 +83,23 @@ entry: | |
ret void | ||
} | ||
|
||
define void @test_buildvector_64bits() { | ||
; CHECK-LABEL: test_buildvector_64bits: | ||
; CHECK: .p2align 4 | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: mova r0, #777; nopx | ||
; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
; CHECK-NEXT: vextract.64 r1:r0, x0, #0, vaddsign1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super follow-up: can we combine this as a VBCST.16 + VEXTRACT.64? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I used that pattern on purpose |
||
; CHECK-NEXT: ret lr | ||
; CHECK-NEXT: mova p0, #0 // Delay Slot 5 | ||
; CHECK-NEXT: st r0, [p0, #0] // Delay Slot 4 | ||
; CHECK-NEXT: mova p0, #4 // Delay Slot 3 | ||
; CHECK-NEXT: st r1, [p0, #0] // Delay Slot 2 | ||
; CHECK-NEXT: nop // Delay Slot 1 | ||
entry: | ||
store <4 x i16> <i16 777, i16 777, i16 777, i16 777>, ptr addrspace(6) null, align 32 | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I started to feel like we are over using vector registers, may be we could consider keep it in the scalar registers itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I already tried the pack approach using shifts but there were more instructions than passing through the 512-bit vector. I don't mind either approach, both are pretty expensive without any pattern to combine.