Skip to content

[AMDGPU] Do not allow M0 as v_readlane_b32 dst #128867

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4997,7 +4997,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
Register NewActiveBitsReg = MRI.createVirtualRegister(WaveMaskRegClass);

Register FF1Reg = MRI.createVirtualRegister(DstRegClass);
Register LaneValueReg = MRI.createVirtualRegister(DstRegClass);
Register LaneValueReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);

bool IsWave32 = ST.isWave32();
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2179,6 +2179,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {

case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
MI.setDesc(get(AMDGPU::V_READLANE_B32));
MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
&AMDGPU::SReg_32_XM0RegClass);
break;

case AMDGPU::V_MOV_B64_PSEUDO: {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2202,6 +2202,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
// Don't need to write VGPR out.
}

MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();

// Restore clobbered registers in the specified restore block.
MI = RestoreMBB.end();
SB.setMI(&RestoreMBB, MI);
Expand All @@ -2216,6 +2218,7 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
SB.NumSubRegs == 1
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
SubReg)
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
}

def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
let Outs32 = (outs SReg_32:$vdst);
let Outs32 = (outs SReg_32_XM0:$vdst);
let Outs64 = Outs32;
let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1);
let Ins64 = Ins32;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX908-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
; GFX908-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
; GFX908-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
Expand Down Expand Up @@ -175,7 +175,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
Expand Down Expand Up @@ -247,7 +247,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec
; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
Expand Down Expand Up @@ -188,7 +188,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec
; GFX942-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
Expand Down Expand Up @@ -274,11 +274,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_3]]
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_3]]
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READLANE_B32_]], [[S_MOV_B32_4]], [[V_MOV_B32_dpp5]]
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 31
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]]
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]]
; GFX11-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_1]]
; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY14]], implicit $exec
; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX908-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
; GFX908-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
Expand Down Expand Up @@ -136,7 +136,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX90A_GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX90A_GFX942-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
; GFX90A_GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
; GFX90A_GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX90A-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
Expand Down Expand Up @@ -159,7 +159,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
; GFX942-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
Expand Down Expand Up @@ -239,11 +239,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_2]]
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_2]]
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed [[V_READLANE_B32_]], killed [[S_MOV_B32_3]], [[V_MOV_B32_dpp5]]
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 31
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_4]]
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_4]]
; GFX11-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_1]], implicit $exec
; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_LO_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.2
; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %11, %bb.2
; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]]
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sgpr_32 = S_MAX_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc
; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]]
; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.2
; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %11, %bb.2
; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]]
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sgpr_32 = S_MIN_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc
; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]]
; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ body: |
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]]
; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY8]]
; CHECK-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]]
; CHECK-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]]
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY7]], [[V_READLANE_B32_]], implicit-def dead $scc
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 1, [[S_FF1_I32_B32_]], implicit-def dead $scc
; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[COPY8]], [[S_LSHL_B32_]], implicit-def dead $scc
Expand Down Expand Up @@ -311,7 +311,7 @@ body: |
%2:sreg_32 = COPY killed %25
%3:sreg_32 = COPY killed %26
%14:sreg_32 = S_FF1_I32_B32 %3
%15:sreg_32 = V_READLANE_B32 %8, %14
%15:sreg_32_xm0 = V_READLANE_B32 %8, %14
%4:sreg_32 = S_ADD_I32 killed %2, killed %15, implicit-def dead $scc
%17:sreg_32 = S_LSHL_B32 1, killed %14, implicit-def dead $scc
%5:sreg_32 = S_ANDN2_B32 killed %3, killed %17, implicit-def dead $scc
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/wqm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ body: |
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
%11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
%12:sreg_32_xm0 = V_READLANE_B32 %11:vgpr_32, 63
early-clobber %13:vgpr_32 = STRICT_WWM %9:vgpr_32, implicit $exec

BUFFER_STORE_DWORD_OFFSET_exact killed %13, %4, %5, 4, 0, 0, implicit $exec
Expand Down