diff --git a/llvm/lib/Target/AIE/AIELegalizerHelper.cpp b/llvm/lib/Target/AIE/AIELegalizerHelper.cpp index 24ef97844444..ddb9d58cbf29 100644 --- a/llvm/lib/Target/AIE/AIELegalizerHelper.cpp +++ b/llvm/lib/Target/AIE/AIELegalizerHelper.cpp @@ -193,8 +193,8 @@ bool AIELegalizerHelper::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper, assert((EltSize == 8 || EltSize == 16 || EltSize == 32 || EltSize == 64) && "non-existent integer size"); - assert(DstVecSize == 32 || (DstVecSize > 64 && DstVecSize <= 1024 && - "non-native vectors are not supported")); + assert(DstVecSize >= 32 && DstVecSize <= 1024 && + "non-native vectors are not supported"); assert(DstVecSize < 1024 && "vadd takes a 512-bit argument"); // If our vector is 32-bit we can store it as packed integer vector @@ -263,6 +263,16 @@ bool AIELegalizerHelper::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper, : MIRBuilder.buildBitcast(VecTy, Vec512Reg).getReg(0); MIRBuilder.buildInstr(II->getGenericUnpadVectorOpcode(), {DstReg}, {NewSrc2}); + } else if (DstVecSize == 64) { + // E.G. AIE2 doesn't have an 64-bit extract vector element instruction + assert(ST.isAIE2P() && "Only AIE2P is supported for now"); + Register Zero = MIRBuilder.buildConstant(S32, 0).getReg(0); + Register DstTemp = MRI.createGenericVirtualRegister(S64); + Register NewSrc = MIRBuilder.buildBitcast(V8S64, Src).getReg(0); + + MIRBuilder.buildInstr(II->getGenericExtractVectorEltOpcode(true), {DstTemp}, + {NewSrc, Zero}); + MIRBuilder.buildBitcast(DstReg, DstTemp); } MI.eraseFromParent(); diff --git a/llvm/lib/Target/AIE/AIELegalizerHelper.h b/llvm/lib/Target/AIE/AIELegalizerHelper.h index 8951a3609cc7..9a5ab6fd6930 100644 --- a/llvm/lib/Target/AIE/AIELegalizerHelper.h +++ b/llvm/lib/Target/AIE/AIELegalizerHelper.h @@ -39,6 +39,8 @@ class AIELegalizerHelper { const LLT V32FP32 = LLT::fixed_vector(32, 32); const LLT V32ACC32 = LLT::fixed_vector(32, 32); const LLT V64FP32 = LLT::fixed_vector(64, 32); + const LLT S64 = LLT::scalar(64); + const LLT V8S64 = LLT::fixed_vector(8, 64); public: AIELegalizerHelper(const AIEBaseSubtarget &ST); diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp index 5ea47376c667..d442c0133b23 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp @@ -65,11 +65,11 @@ isValidVectorMergeUnmergeOp(const unsigned BigVectorId, }; } -static LegalityPredicate isValidVectorAIEP(const unsigned TypeIdx) { +static LegalityPredicate isValidVectorAIE2P(const unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT DstTy = Query.Types[TypeIdx]; const unsigned DstSize = DstTy.getSizeInBits(); - return DstTy.isVector() && (DstSize == 32 || DstSize > 64); + return DstTy.isVector() && DstSize >= 32; }; } @@ -605,7 +605,7 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) .customIf(typeInSet(0, {V2S8, S16})); const LegalityPredicate IsNotValidDestinationVector = - negatePredicate(isValidVectorAIEP(0)); + negatePredicate(isValidVectorAIE2P(0)); getActionDefinitionsBuilder(G_MERGE_VALUES).legalFor({{S64, S32}}); getActionDefinitionsBuilder(G_UNMERGE_VALUES) diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-build-vector.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-build-vector.mir index 1c16e907097f..7e475d1ca956 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-build-vector.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-build-vector.mir @@ -180,3 +180,40 @@ body: | $bmll0 = COPY %1(<8 x s64>) PseudoRET implicit $lr, implicit $bmll0 ... + + +--- +name: test_build_vector_v8s8 +stack: + - {id: 0, name: "", type: default, offset: 0, size: 128, alignment: 32} +body: | + bb.1.entry: + ; CHECK-LABEL: name: test_build_vector_v8s8 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[DEF1]], [[DEF]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI]], [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI1]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI2]], [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI3]], [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI4]], [[COPY4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI5]], [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI6]], [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_ADD_VECTOR_ELT_HI7]](<64 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s64) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[BITCAST]](<8 x s64>), [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s64) + ; CHECK-NEXT: $l0 = COPY [[BITCAST1]](<8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $l0 + %2:_(s8) = G_IMPLICIT_DEF + %1:_(<8 x s8>) = G_BUILD_VECTOR %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8) + $l0 = COPY %1(<8 x s8>) + PseudoRET implicit $lr, implicit $l0 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-shuffle-vector-64.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-shuffle-vector-64.mir index cea46ce4be0f..e9ba58c84bda 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-shuffle-vector-64.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-shuffle-vector-64.mir @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s --- @@ -24,3 +24,72 @@ body: | %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<16 x s32>), %1(<16 x s32>), shufflemask(14, 15) PseudoRET implicit $lr, implicit %2 ... + +--- +name: test_v8s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s8>) = COPY $wh0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s8>) = COPY $wh0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY2]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY3]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS1]](<64 x s8>), [[C1]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 8 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY4]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS2]](<64 x s8>), [[C2]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT2:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT2]], 8 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY5]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS3]](<64 x s8>), [[C3]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT3:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT3]], 8 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY6]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS4]](<64 x s8>), [[C4]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT4:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT4]], 8 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY7]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS5]](<64 x s8>), [[C5]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT5:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT5]], 8 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY8]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS6]](<64 x s8>), [[C6]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT6:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT6]], 8 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[CONCAT_VECTORS7:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS7]](<64 x s8>), [[C7]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT7:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT7]], 8 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[DEF1]], [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI]], [[ASSERT_SEXT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI1]], [[ASSERT_SEXT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI2]], [[ASSERT_SEXT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI3]], [[ASSERT_SEXT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI4]], [[ASSERT_SEXT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI5]], [[ASSERT_SEXT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI6]], [[ASSERT_SEXT7]](s32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_ADD_VECTOR_ELT_HI7]](<64 x s8>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s64) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[BITCAST]](<8 x s64>), [[C8]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AIE_SEXT_EXTRACT_VECTOR_ELT8]](s64) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST1]](<8 x s8>) + %0:_(<32 x s8>) = COPY $wh0 + %143:_(<32 x s8>) = COPY $wh0 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %143:_(<32 x s8>), %143:_, shufflemask(24, 25, 26, 27, 28, 29, 30, 31) + PseudoRET implicit $lr, implicit %2 +... + diff --git a/llvm/test/CodeGen/AIE/aie2p/buildvector.ll b/llvm/test/CodeGen/AIE/aie2p/buildvector.ll index 15f549863845..66f1ef2cd282 100644 --- a/llvm/test/CodeGen/AIE/aie2p/buildvector.ll +++ b/llvm/test/CodeGen/AIE/aie2p/buildvector.ll @@ -83,3 +83,23 @@ entry: ret void } +define void @test_buildvector_64bits() { +; CHECK-LABEL: test_buildvector_64bits: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova r0, #777; nopx +; CHECK-NEXT: vpush.hi.16 x0, x0, r0 +; CHECK-NEXT: vpush.hi.16 x0, x0, r0 +; CHECK-NEXT: vpush.hi.16 x0, x0, r0 +; CHECK-NEXT: vpush.hi.16 x0, x0, r0 +; CHECK-NEXT: vextract.64 r1:r0, x0, #0, vaddsign1 +; CHECK-NEXT: ret lr +; CHECK-NEXT: mova p0, #0 // Delay Slot 5 +; CHECK-NEXT: st r0, [p0, #0] // Delay Slot 4 +; CHECK-NEXT: mova p0, #4 // Delay Slot 3 +; CHECK-NEXT: st r1, [p0, #0] // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + store <4 x i16> , ptr addrspace(6) null, align 32 + ret void +}