Skip to content

Commit 116ca8f

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:71d6b0b0c1e5 into amd-gfx:5fe77763bcaf
Local branch amd-gfx 5fe7776 Merged main:40e734e041fa into amd-gfx:709a102901c2 Remote branch main 71d6b0b [AArch64][GlobalISel] Lower shuffle vector with scalar destinations. (llvm#121384)
2 parents 5fe7776 + 71d6b0b commit 116ca8f

File tree

10 files changed

+433
-38
lines changed

10 files changed

+433
-38
lines changed

compiler-rt/test/builtins/Unit/truncxfhf2_test.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,17 @@ int main() {
4848

4949
// Positive infinity
5050
if (test_truncxfhf2(UINT16_C(0x7fff), UINT64_C(0x8000000000000000),
51-
UINT16_C(0x7c00U)))
51+
UINT16_C(0x7c00)))
5252
return 1;
5353

5454
// Negative infinity
5555
if (test_truncxfhf2(UINT16_C(0xffff), UINT64_C(0x8000000000000000),
56-
UINT16_C(0xfc00U)))
56+
UINT16_C(0xfc00)))
5757
return 1;
5858

5959
// NaN
6060
if (test_truncxfhf2(UINT16_C(0x7fff), UINT64_C(0xc000000000000000),
61-
UINT16_C(0x7e00U)))
61+
UINT16_C(0x7e00)))
6262
return 1;
6363

6464
return 0;

lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,17 @@ def read_register_values(self, reg_infos, endian, thread_id=None):
14101410
p_response = context.get("p_response")
14111411
self.assertIsNotNone(p_response)
14121412
self.assertTrue(len(p_response) > 0)
1413-
self.assertFalse(p_response[0] == "E")
1413+
1414+
# on x86 Darwin, 4 GPR registers are often
1415+
# unavailable, this is expected and correct.
1416+
if (
1417+
self.getArchitecture() == "x86_64"
1418+
and self.platformIsDarwin()
1419+
and p_response[0] == "E"
1420+
):
1421+
values[reg_index] = 0
1422+
else:
1423+
self.assertFalse(p_response[0] == "E")
14141424

14151425
values[reg_index] = unpack_register_hex_unsigned(endian, p_response)
14161426

lldb/test/API/commands/register/register/register_command/TestRegisters.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ def test_register_commands(self):
5858
# could not be read. This is expected.
5959
error_str_matched = True
6060

61+
if self.getArchitecture() == "x86_64" and self.platformIsDarwin():
62+
# debugserver on x86 will provide ds/es/ss/gsbase when the
63+
# kernel provides them, but most of the time they will be
64+
# unavailable. So "register read -a" will report that
65+
# 4 registers were unavailable, it is expected.
66+
error_str_matched = True
67+
6168
self.expect(
6269
"register read -a",
6370
MISSING_EXPECTED_REGISTERS,

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 522552
19+
#define LLVM_MAIN_REVISION 522558
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -979,26 +979,24 @@ defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b001100
979979
defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
980980
defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
981981
defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
982-
983-
} // [HasSME2, HasFP8]
982+
}
984983

985984
let Predicates = [HasSME2, HasFAMINMAX] in {
986985
defm FAMAX_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famax", 0b0010100>;
987986
defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
988987

989988
defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
990989
defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
991-
} //[HasSME2, HasFAMINMAX]
992-
990+
}
993991

994992
let Predicates = [HasSME_LUTv2] in {
995993
defm MOVT_TIZ : sme2_movt_zt_to_zt<"movt", 0b0011111, int_aarch64_sme_write_lane_zt, int_aarch64_sme_write_zt>;
996994
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
997-
} //[HasSME_LUTv2]
995+
}
998996

999997
let Predicates = [HasSME2p1, HasSME_LUTv2] in {
1000998
def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
1001-
} //[HasSME2p1, HasSME_LUTv2]
999+
}
10021000

10031001
let Predicates = [HasSMEF8F16] in {
10041002
defm FVDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fvdot", 0b110, int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2>;
@@ -1014,17 +1012,15 @@ defm FMLAL_MZZI_BtoH : sme2_fp8_fmlal_index_za16<"fmlal", int_aarch64_
10141012
defm FMLAL_VG2_M2ZZI_BtoH : sme2_fp8_fmlal_index_za16_vgx2<"fmlal", int_aarch64_sme_fp8_fmlal_lane_za16_vg2x2>;
10151013
defm FMLAL_VG4_M4ZZI_BtoH : sme2_fp8_fmlal_index_za16_vgx4<"fmlal", int_aarch64_sme_fp8_fmlal_lane_za16_vg2x4>;
10161014

1017-
// FP8 FMLAL (single)
10181015
defm FMLAL_VG2_MZZ_BtoH : sme2_fp8_fmlal_single_za16<"fmlal", int_aarch64_sme_fp8_fmlal_single_za16_vg2x1>;
1019-
defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x2, [FPMR, FPCR]>;
1016+
defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x2, [FPMR, FPCR]>;
10201017
defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x4, [FPMR, FPCR]>;
10211018

1022-
// FP8 FMLALL (multi)
10231019
defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlal_multi_za16_vg2x2, [FPMR, FPCR]>;
10241020
defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlal_multi_za16_vg2x4, [FPMR, FPCR]>;
10251021

10261022
defm FMOPA_MPPZZ_BtoH : sme2_fp8_fmopa_za16<"fmopa", int_aarch64_sme_fp8_fmopa_za16>;
1027-
} //[HasSMEF8F16]
1023+
}
10281024

10291025
let Predicates = [HasSMEF8F32] in {
10301026
defm FDOT_VG2_M2ZZI_BtoS : sme2_fp8_fdot_index_za32_vg1x2<"fdot", int_aarch64_sme_fp8_fdot_lane_za32_vg1x2>;
@@ -1042,17 +1038,15 @@ defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b0
10421038
defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, int_aarch64_sme_fp8_fmlall_lane_za32_vg4x2, [FPMR, FPCR]>;
10431039
defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, int_aarch64_sme_fp8_fmlall_lane_za32_vg4x4, [FPMR, FPCR]>;
10441040

1045-
// FP8 FMLALL (single)
10461041
defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x1, [FPMR, FPCR]>;
10471042
defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x2, [FPMR, FPCR]>;
10481043
defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x4, [FPMR, FPCR]>;
10491044

1050-
// FP8 FMLALL (multi)
10511045
defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall", 0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlall_multi_za32_vg4x2, [FPMR, FPCR]>;
10521046
defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall", 0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlall_multi_za32_vg4x4, [FPMR, FPCR]>;
10531047

10541048
defm FMOPA_MPPZZ_BtoS : sme2_fp8_fmopa_za32<"fmopa", int_aarch64_sme_fp8_fmopa_za32>;
1055-
} //[HasSMEF8F32]
1049+
}
10561050

10571051
let Predicates = [HasSME2, HasSVEBFSCALE] in {
10581052
defm BFSCALE : sme2_bfscale_single<"bfscale">;
@@ -1077,31 +1071,31 @@ let Predicates = [HasSME2p2] in {
10771071

10781072
defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a">;
10791073
defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s">;
1080-
} // [HasSME2p2]
1074+
}
10811075

10821076
let Predicates = [HasSME2p2, HasSMEB16B16] in {
10831077
def BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, "bftmopa">;
1084-
} // [HasSME2p2, HasSMEB16B16]
1078+
}
10851079

10861080
let Predicates = [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR] in {
10871081
def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">;
1088-
} // [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR]
1082+
}
10891083

10901084
let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in {
10911085
def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">;
10921086
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">;
1093-
} // [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR]
1087+
}
10941088

10951089
let Predicates = [HasSME2p2, HasSMEF16F16] in {
10961090
def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
10971091
defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a">;
10981092
defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s">;
1099-
} // [HasSME2p2, HasSMEF16F16]
1093+
}
11001094

11011095
let Predicates = [HasSME2, HasSVEBFSCALE] in {
11021096
defm BFMUL : sme2_bfmul_single<"bfmul">;
11031097
defm BFMUL : sme2_bfmul_multi<"bfmul">;
1104-
} //[HasSME2, HasSVEBFSCALE]
1098+
}
11051099

11061100
let Uses = [FPMR, FPCR] in {
11071101
let Predicates = [HasSME2p2, HasSMEF8F32] in {

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,10 +1062,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10621062
return llvm::is_contained(
10631063
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
10641064
})
1065-
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
1066-
// just want those lowered into G_BUILD_VECTOR
1065+
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1066+
// destinations, we just want those lowered into G_BUILD_VECTOR or
1067+
// G_EXTRACT_ELEMENT.
10671068
.lowerIf([=](const LegalityQuery &Query) {
1068-
return !Query.Types[1].isVector();
1069+
return !Query.Types[0].isVector() || !Query.Types[1].isVector();
10691070
})
10701071
.moreElementsIf(
10711072
[](const LegalityQuery &Query) {
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple aarch64 -O0 -global-isel -o - %s | FileCheck %s
3+
4+
define <1 x i1> @shuffle_extract_4(<8 x i1> %a, <8 x i1> %b) {
5+
; CHECK-LABEL: shuffle_extract_4:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
8+
; CHECK-NEXT: umov w8, v0.h[4]
9+
; CHECK-NEXT: and w0, w8, #0x1
10+
; CHECK-NEXT: ret
11+
%extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 4>
12+
ret <1 x i1> %extractvec60
13+
}
14+
15+
define <1 x i1> @shuffle_extract_12(<8 x i1> %a, <8 x i1> %b) {
16+
; CHECK-LABEL: shuffle_extract_12:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: ushll v0.8h, v1.8b, #0
19+
; CHECK-NEXT: umov w8, v0.h[4]
20+
; CHECK-NEXT: and w0, w8, #0x1
21+
; CHECK-NEXT: ret
22+
%extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 12>
23+
ret <1 x i1> %extractvec60
24+
}
25+
26+
define <1 x i1> @shuffle_extract_p(<8 x i1> %a, <8 x i1> %b) {
27+
; CHECK-LABEL: shuffle_extract_p:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: // implicit-def: $w8
30+
; CHECK-NEXT: and w0, w8, #0x1
31+
; CHECK-NEXT: ret
32+
%extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 poison>
33+
ret <1 x i1> %extractvec60
34+
}
35+
36+
define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
37+
; CHECK-LABEL: shufflevector_v1i32:
38+
; CHECK: // %bb.0:
39+
; CHECK-NEXT: fmov d0, d1
40+
; CHECK-NEXT: ret
41+
%c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
42+
ret <1 x i32> %c
43+
}

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,3 +618,32 @@ body: |
618618
RET_ReallyLR implicit $q0
619619
620620
...
621+
---
622+
name: shuffle_v8i1_v1i8
623+
alignment: 4
624+
tracksRegLiveness: true
625+
body: |
626+
bb.1:
627+
liveins: $d0, $d1
628+
; CHECK-LABEL: name: shuffle_v8i1_v1i8
629+
; CHECK: liveins: $d0, $d1
630+
; CHECK-NEXT: {{ $}}
631+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d1
632+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
633+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[COPY]](<8 x s8>)
634+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ANYEXT]](<8 x s16>), [[C]](s64)
635+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
636+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
637+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
638+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
639+
; CHECK-NEXT: RET_ReallyLR implicit $w0
640+
%2:_(<8 x s8>) = COPY $d0
641+
%0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>)
642+
%3:_(<8 x s8>) = COPY $d1
643+
%1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>)
644+
%4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12)
645+
%5:_(s8) = G_ZEXT %4:_(s1)
646+
%6:_(s32) = G_ANYEXT %5:_(s8)
647+
$w0 = COPY %6:_(s32)
648+
RET_ReallyLR implicit $w0
649+
...

0 commit comments

Comments
 (0)