Skip to content

[AMDGPU] Remove s_wakeup_barrier instruction #122277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,6 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
Expand Down
15 changes: 0 additions & 15 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,6 @@ void test_s_barrier_join(void *bar)
__builtin_amdgcn_s_barrier_join(bar);
}

// CHECK-LABEL: @test_s_wakeup_barrier(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
// CHECK-NEXT: ret void
//
void test_s_wakeup_barrier(void *bar)
{
__builtin_amdgcn_s_wakeup_barrier(bar);
}

// CHECK-LABEL: @test_s_barrier_leave(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1)
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,6 @@ def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;

// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
// The %barrier argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;

// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2239,7 +2239,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
case Intrinsic::amdgcn_s_get_named_barrier_state:
return selectNamedBarrierInst(I, IntrinsicID);
case Intrinsic::amdgcn_s_get_barrier_state:
Expand Down Expand Up @@ -5839,8 +5838,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
llvm_unreachable("not a named barrier op");
case Intrinsic::amdgcn_s_barrier_join:
return AMDGPU::S_BARRIER_JOIN_IMM;
case Intrinsic::amdgcn_s_wakeup_barrier:
return AMDGPU::S_WAKEUP_BARRIER_IMM;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_IMM;
};
Expand All @@ -5850,8 +5847,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
llvm_unreachable("not a named barrier op");
case Intrinsic::amdgcn_s_barrier_join:
return AMDGPU::S_BARRIER_JOIN_M0;
case Intrinsic::amdgcn_s_wakeup_barrier:
return AMDGPU::S_WAKEUP_BARRIER_M0;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_M0;
};
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,6 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
case Intrinsic::amdgcn_s_barrier_wait:
case Intrinsic::amdgcn_s_barrier_leave:
case Intrinsic::amdgcn_s_get_barrier_state:
case Intrinsic::amdgcn_s_wakeup_barrier:
case Intrinsic::amdgcn_wave_barrier:
case Intrinsic::amdgcn_sched_barrier:
case Intrinsic::amdgcn_sched_group_barrier:
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3304,7 +3304,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(B, MI, 1);
return;
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
constrainOpWithReadfirstlane(B, MI, 1);
return;
case Intrinsic::amdgcn_s_barrier_init:
Expand Down Expand Up @@ -5272,7 +5271,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_barrier_init:
Expand Down
27 changes: 5 additions & 22 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10076,8 +10076,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
return SDValue(NewMI, 0);
}
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier: {
case Intrinsic::amdgcn_s_barrier_join: {
// these three intrinsics have one operand: barrier pointer
SDValue Chain = Op->getOperand(0);
SmallVector<SDValue, 2> Ops;
Expand All @@ -10086,32 +10085,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,

if (isa<ConstantSDNode>(BarOp)) {
uint64_t BarVal = cast<ConstantSDNode>(BarOp)->getZExtValue();
switch (IntrinsicID) {
default:
return SDValue();
case Intrinsic::amdgcn_s_barrier_join:
Opc = AMDGPU::S_BARRIER_JOIN_IMM;
break;
case Intrinsic::amdgcn_s_wakeup_barrier:
Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
break;
}
Opc = AMDGPU::S_BARRIER_JOIN_IMM;

// extract the BarrierID from bits 4-9 of the immediate
unsigned BarID = (BarVal >> 4) & 0x3F;
SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
Ops.push_back(K);
Ops.push_back(Chain);
} else {
switch (IntrinsicID) {
default:
return SDValue();
case Intrinsic::amdgcn_s_barrier_join:
Opc = AMDGPU::S_BARRIER_JOIN_M0;
break;
case Intrinsic::amdgcn_s_wakeup_barrier:
Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
break;
}
Opc = AMDGPU::S_BARRIER_JOIN_M0;

// extract the BarrierID from bits 4-9 of BarOp, copy to M0[5:0]
SDValue M0Val;
M0Val = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp,
Expand Down
12 changes: 0 additions & 12 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,6 @@ def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
let isConvergent = 1;
}

def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
}
} // End Uses = [M0]

def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
Expand All @@ -514,11 +509,6 @@ def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
let isConvergent = 1;
}

def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
}
} // End has_sdst = 0

def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
Expand Down Expand Up @@ -2092,13 +2082,11 @@ defm S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_M0_Real_gfx12<0x04f>;
defm S_GET_BARRIER_STATE_M0 : SOP1_M0_Real_gfx12<0x050>;
defm S_BARRIER_INIT_M0 : SOP1_M0_Real_gfx12<0x051>;
defm S_BARRIER_JOIN_M0 : SOP1_M0_Real_gfx12<0x052>;
defm S_WAKEUP_BARRIER_M0 : SOP1_M0_Real_gfx12<0x057>;
defm S_BARRIER_SIGNAL_IMM : SOP1_IMM_Real_gfx12<0x04e>;
defm S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_IMM_Real_gfx12<0x04f>;
defm S_GET_BARRIER_STATE_IMM : SOP1_IMM_Real_gfx12<0x050>;
defm S_BARRIER_INIT_IMM : SOP1_IMM_Real_gfx12<0x051>;
defm S_BARRIER_JOIN_IMM : SOP1_IMM_Real_gfx12<0x052>;
defm S_WAKEUP_BARRIER_IMM : SOP1_IMM_Real_gfx12<0x057>;
defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>;

//===----------------------------------------------------------------------===//
Expand Down
9 changes: 0 additions & 9 deletions llvm/test/CodeGen/AMDGPU/s-barrier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
; GFX12-SDAG-NEXT: s_barrier_wait 1
; GFX12-SDAG-NEXT: s_barrier_leave
; GFX12-SDAG-NEXT: s_wakeup_barrier m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
; GFX12-SDAG-NEXT: s_wakeup_barrier m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0
Expand Down Expand Up @@ -176,8 +172,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-GISEL-NEXT: s_barrier_join m0
; GFX12-GISEL-NEXT: s_barrier_wait 1
; GFX12-GISEL-NEXT: s_barrier_leave
; GFX12-GISEL-NEXT: s_wakeup_barrier 2
; GFX12-GISEL-NEXT: s_wakeup_barrier m0
; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
Expand Down Expand Up @@ -218,8 +212,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
call void @llvm.amdgcn.s.barrier.leave(i16 1)
call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) @bar)
call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %in)
%state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar)
%state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in)
call void @llvm.amdgcn.s.barrier()
Expand Down Expand Up @@ -295,7 +287,6 @@ declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1
declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1
declare void @llvm.amdgcn.s.barrier.leave(i16) #1
declare void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3)) #1
declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1

Expand Down
9 changes: 0 additions & 9 deletions llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -726,15 +726,6 @@ s_barrier_join -2
s_barrier_join m0
// GFX12: encoding: [0x7d,0x52,0x80,0xbe]

s_wakeup_barrier 1
// GFX12: encoding: [0x81,0x57,0x80,0xbe]

s_wakeup_barrier -1
// GFX12: encoding: [0xc1,0x57,0x80,0xbe]

s_wakeup_barrier m0
// GFX12: encoding: [0x7d,0x57,0x80,0xbe]

s_get_barrier_state s3, -1
// GFX12: encoding: [0xc1,0x50,0x83,0xbe]

Expand Down
9 changes: 0 additions & 9 deletions llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -726,15 +726,6 @@
# GFX12: s_barrier_join m0 ; encoding: [0x7d,0x52,0x80,0xbe]
0x7d,0x52,0x80,0xbe

# GFX12: s_wakeup_barrier 1 ; encoding: [0x81,0x57,0x80,0xbe]
0x81,0x57,0x80,0xbe

# GFX12: s_wakeup_barrier -1 ; encoding: [0xc1,0x57,0x80,0xbe]
0xc1,0x57,0x80,0xbe

# GFX12: s_wakeup_barrier m0 ; encoding: [0x7d,0x57,0x80,0xbe]
0x7d,0x57,0x80,0xbe

# GFX12: s_get_barrier_state s3, -1 ; encoding: [0xc1,0x50,0x83,0xbe]
0xc1,0x50,0x83,0xbe

Expand Down
Loading