diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index eb18677780803..556eaaad7a84b 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -735,7 +735,9 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { case AMDGPU::V_ASHRREV_I16_e32: case AMDGPU::V_LSHLREV_B16_e32: case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_LSHRREV_B16_vop3_e64: case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_vop3_e64: case AMDGPU::V_LSHLREV_B16_e64: { // from: v_lshrrev_b16_e32 v1, 8, v0 // to SDWA src:v0 src_sel:BYTE_1 @@ -758,11 +760,13 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { break; if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_vop3_e64 || Opcode == AMDGPU::V_LSHLREV_B16_e64) return std::make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); return std::make_unique( Src1, Dst, BYTE_1, false, false, Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_vop3_e64 && Opcode != AMDGPU::V_LSHRREV_B16_e64); break; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 0c7e20fc1ebf3..67fb68a3eee83 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -211,6 +211,10 @@ multiclass VOP2Inst_e64_t16 { let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { defm NAME : VOP2Inst; + let SubtargetPredicate = isGFX10Only in { + def _vop3_e64 : VOP3InstBase , node, 1>, + Commutable_REV; + } } let SubtargetPredicate = UseRealTrue16Insts in { defm _t16 : VOP2Inst_e64, node, revOp#"_t16">; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 0252c4f1b0929..597202d47591d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1932,16 +1932,14 @@ defm V_DIV_FIXUP_F16 : defm V_ADD_NC_U16 : VOP3OpSel_Real_gfx10<0x303>; defm V_SUB_NC_U16 : VOP3OpSel_Real_gfx10<0x304>; -// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these -// (they do not support SDWA or DPP). -defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">; -defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">; -defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">; -defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16", "v_max_u16">; -defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16", "v_max_i16">; -defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16", "v_min_u16">; -defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16", "v_min_i16">; -defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16", "v_lshlrev_b16">; +defm V_MUL_LO_U16 : VOP3OpSel_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_vop3", "v_mul_lo_u16">; +defm V_LSHRREV_B16 : VOP3OpSel_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_vop3", "v_lshrrev_b16">; +defm V_ASHRREV_I16 : VOP3OpSel_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_vop3", "v_ashrrev_i16">; +defm V_MAX_U16 : VOP3OpSel_Real_gfx10_with_name<0x309, "V_MAX_U16_vop3", "v_max_u16">; +defm V_MAX_I16 : VOP3OpSel_Real_gfx10_with_name<0x30a, "V_MAX_I16_vop3", "v_max_i16">; +defm V_MIN_U16 : VOP3OpSel_Real_gfx10_with_name<0x30b, "V_MIN_U16_vop3", "v_min_u16">; +defm V_MIN_I16 : VOP3OpSel_Real_gfx10_with_name<0x30c, "V_MIN_I16_vop3", "v_min_i16">; +defm V_LSHLREV_B16 : VOP3OpSel_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_vop3", "v_lshlrev_b16">; defm V_PERMLANE16_B32 : VOP3OpSel_Real_gfx10<0x377>; defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index 4c3f4d9b06ed1..461021112cfef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -100,7 +100,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 @@ -193,7 +193,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 @@ -238,7 +238,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] @@ -292,7 +292,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec @@ -442,7 +442,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index 4769b5f77e3b2..c17b32d5c1676 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -98,7 +98,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 @@ -191,7 +191,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 @@ -236,7 +236,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] @@ -290,7 +290,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec @@ -440,7 +440,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir index 19143c52b3f43..db5490ac7b90c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s --- @@ -34,6 +34,15 @@ body: | ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]] ; + ; GFX10-LABEL: name: smed3_s16_vvv + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]] + ; ; GFX11-LABEL: name: smed3_s16_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -88,6 +97,16 @@ body: | ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] ; + ; GFX10-LABEL: name: smed3_s16_vvv_multiuse0 + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MAX_I16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_vop3_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_vop3_e64_]] + ; ; GFX11-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -143,6 +162,16 @@ body: | ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]] ; + ; GFX10-LABEL: name: smed3_s16_vvv_multiuse1 + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MIN_I16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_vop3_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_vop3_e64_]] + ; ; GFX11-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -199,6 +228,17 @@ body: | ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] ; + ; GFX10-LABEL: name: smed3_s16_vvv_multiuse2 + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MIN_I16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_vop3_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MAX_I16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_vop3_e64 0, [[V_MIN_I16_vop3_e64_]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_vop3_e64_]] + ; ; GFX11-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir index b7f48d34b8f96..c3dd6e8e521db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s --- @@ -34,6 +34,15 @@ body: | ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]] ; + ; GFX10-LABEL: name: umed3_s16_vvv + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]] + ; ; GFX11-LABEL: name: umed3_s16_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -88,6 +97,16 @@ body: | ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] ; + ; GFX10-LABEL: name: umed3_s16_vvv_multiuse0 + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MAX_U16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_vop3_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_vop3_e64_]] + ; ; GFX11-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -143,6 +162,16 @@ body: | ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]] ; + ; GFX10-LABEL: name: umed3_s16_vvv_multiuse1 + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MIN_U16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_vop3_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_vop3_e64_]] + ; ; GFX11-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -199,6 +228,17 @@ body: | ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] ; + ; GFX10-LABEL: name: umed3_s16_vvv_multiuse2 + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[V_MIN_U16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_vop3_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MAX_U16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_vop3_e64 0, [[V_MIN_U16_vop3_e64_]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_vop3_e64_]] + ; ; GFX11-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index 73f164ed10df1..632b68fe80b2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -36,6 +36,7 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX9-LABEL: name: shl_s16_s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -45,6 +46,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX10-LABEL: name: shl_s16_s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} @@ -54,6 +56,7 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX11-LABEL: name: shl_s16_s16_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} @@ -86,6 +89,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -93,13 +97,15 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-NEXT: [[V_LSHLREV_B16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_vop3_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -132,6 +138,7 @@ body: | ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX9-LABEL: name: shl_s16_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -140,6 +147,7 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX10-LABEL: name: shl_s16_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -148,6 +156,7 @@ body: | ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX11-LABEL: name: shl_s16_s32_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -179,6 +188,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -186,13 +196,15 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-NEXT: [[V_LSHLREV_B16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_vop3_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -224,6 +236,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -231,15 +244,17 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHLREV_B16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHLREV_B16_e64_]], implicit $exec + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHLREV_B16_vop3_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -276,6 +291,7 @@ body: | ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -285,18 +301,20 @@ body: | ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_LSHLREV_B16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_vop3_e64_]], implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX11-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -335,6 +353,7 @@ body: | ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX9-LABEL: name: shl_s16_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -343,6 +362,7 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX10-LABEL: name: shl_s16_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} @@ -351,6 +371,7 @@ body: | ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX11-LABEL: name: shl_s16_s32_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} @@ -382,6 +403,7 @@ body: | ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX9-LABEL: name: shl_s16_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -390,6 +412,7 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX10-LABEL: name: shl_s16_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -398,6 +421,7 @@ body: | ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX11-LABEL: name: shl_s16_s32_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -428,6 +452,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -435,13 +460,15 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-NEXT: [[V_LSHLREV_B16_vop3_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_vop3_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_vop3_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -473,6 +500,7 @@ body: | ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX9-LABEL: name: shl_s16_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -481,6 +509,7 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX10-LABEL: name: shl_s16_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -489,6 +518,7 @@ body: | ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; ; GFX11-LABEL: name: shl_s16_s32_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s index c151bf99b76c5..6bb0f4b1dff2d 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s @@ -8974,6 +8974,9 @@ v_mul_lo_u16 v5, v1, 0.5 v_mul_lo_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x05,0xd7,0x01,0xef,0x01,0x00] +v_mul_lo_u16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x05,0xd7,0x01,0x05,0x02,0x00] + v_lshrrev_b16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] @@ -9052,6 +9055,9 @@ v_lshrrev_b16 v5, v1, 0.5 v_lshrrev_b16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x07,0xd7,0x01,0xef,0x01,0x00] +v_lshrrev_b16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x07,0xd7,0x01,0x05,0x02,0x00] + v_ashrrev_i16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x08,0xd7,0x01,0x05,0x02,0x00] @@ -9130,6 +9136,9 @@ v_ashrrev_i16 v5, v1, 0.5 v_ashrrev_i16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x08,0xd7,0x01,0xef,0x01,0x00] +v_ashrrev_i16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x08,0xd7,0x01,0x05,0x02,0x00] + v_max_u16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] @@ -9208,6 +9217,9 @@ v_max_u16 v5, v1, 0.5 v_max_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x09,0xd7,0x01,0xef,0x01,0x00] +v_max_u16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x09,0xd7,0x01,0x05,0x02,0x00] + v_max_i16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] @@ -9286,6 +9298,9 @@ v_max_i16 v5, v1, 0.5 v_max_i16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x0a,0xd7,0x01,0xef,0x01,0x00] +v_max_i16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x0a,0xd7,0x01,0x05,0x02,0x00] + v_min_u16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] @@ -9364,6 +9379,9 @@ v_min_u16 v5, v1, 0.5 v_min_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x0b,0xd7,0x01,0xef,0x01,0x00] +v_min_u16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x0b,0xd7,0x01,0x05,0x02,0x00] + v_min_i16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] @@ -9442,6 +9460,9 @@ v_min_i16 v5, v1, 0.5 v_min_i16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x0c,0xd7,0x01,0xef,0x01,0x00] +v_min_i16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x0c,0xd7,0x01,0x05,0x02,0x00] + v_add_nc_i16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] @@ -10009,6 +10030,9 @@ v_lshlrev_b16 v5, v1, 0.5 v_lshlrev_b16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x14,0xd7,0x01,0xef,0x01,0x00] +v_lshlrev_b16 v5, v1, v2 op_sel:[1,1,1] +// GFX10: encoding: [0x05,0x58,0x14,0xd7,0x01,0x05,0x02,0x00] + v_mad_u16 v5, 0, v2, v3 // GFX10: encoding: [0x05,0x00,0x40,0xd7,0x80,0x04,0x0e,0x04] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt index 6da1423fe8278..721babdd64245 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -1503,6 +1503,9 @@ # GFX10: v_ashrrev_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_ashrrev_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x08,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x08,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_ashrrev_i32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x18,0xd5,0x01,0x05,0x02,0x00 @@ -8309,6 +8312,9 @@ # GFX10: v_lshlrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_lshlrev_b16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x14,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x14,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_lshlrev_b32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00 @@ -8537,6 +8543,9 @@ # GFX10: v_lshrrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_lshrrev_b16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x07,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x07,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_lshrrev_b32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x16,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x16,0xd5,0x01,0x05,0x02,0x00 @@ -11292,6 +11301,9 @@ # GFX10: v_max_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_max_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x0a,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_max_i32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x12,0xd5,0x01,0x05,0x02,0x00 @@ -11448,6 +11460,9 @@ # GFX10: v_max_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_max_u16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x09,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x09,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_max_u32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x14,0xd5,0x01,0x05,0x02,0x00 @@ -13728,6 +13743,9 @@ # GFX10: v_min_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_min_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x0c,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_min_i32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x11,0xd5,0x01,0x05,0x02,0x00 @@ -13884,6 +13902,9 @@ # GFX10: v_min_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_min_u16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x0b,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_min_u32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x13,0xd5,0x01,0x05,0x02,0x00 @@ -15228,6 +15249,9 @@ # GFX10: v_mul_lo_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_mul_lo_u16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x05,0xd7,0x01,0x05,0x02,0x00] +0x05,0x58,0x05,0xd7,0x01,0x05,0x02,0x00 + # GFX10: v_mul_lo_u32 v255, v1, v2 ; encoding: [0xff,0x00,0x69,0xd5,0x01,0x05,0x02,0x00] 0xff,0x00,0x69,0xd5,0x01,0x05,0x02,0x00