diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index a99fd25477553..52d00485385c2 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -669,6 +669,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { if (!MO.isReg() || !MO.getReg().isVirtual()) continue; const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg()); + if (SrcRC == &AMDGPU::VReg_1RegClass) + continue; + if (TRI->hasVectorRegisters(SrcRC)) { const TargetRegisterClass *DestRC = TRI->getEquivalentSGPRClass(SrcRC); diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 757458363284c..0a420396f52a9 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -121,6 +121,68 @@ endloop: ; preds = %if1, %Flow2 ret void } +define amdgpu_ps void @i1_copy_assert(i1 %v4) { +; ISA-LABEL: i1_copy_assert: +; ISA: ; %bb.0: ; %start +; ISA-NEXT: v_and_b32_e32 v0, 1, v0 +; ISA-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; ISA-NEXT: s_mov_b32 s8, 0 +; ISA-NEXT: s_mov_b64 s[0:1], 0 +; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5 +; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3 +; ISA-NEXT: s_branch .LBB1_3 +; ISA-NEXT: .LBB1_1: ; %endif1 +; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; ISA-NEXT: s_and_b64 s[8:9], vcc, exec +; ISA-NEXT: s_mov_b64 s[6:7], 0 +; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; ISA-NEXT: .LBB1_2: ; %Flow +; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; ISA-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; ISA-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec +; ISA-NEXT: s_mov_b32 s8, 1 +; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] +; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1] +; ISA-NEXT: s_cbranch_execz .LBB1_5 +; ISA-NEXT: .LBB1_3: ; %loop +; ISA-NEXT: ; =>This Inner Loop Header: Depth=1 +; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec +; ISA-NEXT: s_cmp_lg_u32 s8, 0 +; ISA-NEXT: s_cbranch_scc1 .LBB1_1 +; ISA-NEXT: ; %bb.4: ; in Loop: Header=BB1_3 Depth=1 +; ISA-NEXT: s_mov_b64 s[6:7], -1 +; ISA-NEXT: s_branch .LBB1_2 +; ISA-NEXT: .LBB1_5: ; %Flow2 +; ISA-NEXT: s_or_b64 exec, exec, s[0:1] +; ISA-NEXT: v_mov_b32_e32 v0, 0 +; ISA-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s[2:3] +; ISA-NEXT: exp mrt0 off, off, off, off +; ISA-NEXT: s_endpgm +start: + br label %loop + +loop: ; preds = %Flow, %start + %v1 = phi i32 [ 0, %start ], [ 1, %Flow ] + %v2 = icmp ugt i32 %v1, 0 + br i1 %v2, label %endif1, label %Flow + +Flow2: ; preds = %Flow + %spec.select = select i1 %i1, float 1.000000e+00, float 0.000000e+00 + call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %spec.select, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i1 false, i1 false) + ret void + +endif1: ; preds = %loop + br label %Flow + +Flow: ; preds = %endif1, %loop + %i = phi i1 [ %v4, %endif1 ], [ true, %loop ] + %i1 = phi i1 [ false, %endif1 ], [ true, %loop ] + br i1 %i, label %Flow2, label %loop +} + ; Function Attrs: nounwind readnone speculatable willreturn declare float @llvm.sqrt.f32(float) #0 diff --git a/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir b/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir new file mode 100644 index 0000000000000..1b5999c689178 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=si-fix-sgpr-copies,si-i1-copies -o - %s | FileCheck %s + +# Make sure SIFixSGPRCopies does not assert on a phi with vreg_1 +# inputs. + +--- +name: i1_copy_assert +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: i1_copy_assert + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[COPY]], 1, implicit $exec + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %14, %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %8, %bb.4 + ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_MOV_B64_1]] + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PHI]], $exec, implicit-def $scc + ; CHECK-NEXT: S_CMP_LG_U32 [[DEF2]], killed [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8 + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_MOV_B32_e32_]], [[COPY2]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit killed [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[S_OR_B64_]], $exec, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], $exec, implicit-def $scc + ; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_OR_B64_]], %bb.1, [[S_OR_B64_1]], %bb.3 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY1]], %bb.1, [[S_MOV_B64_2]], %bb.3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[PHI2]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI1]], $exec, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc + ; CHECK-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK-NEXT: SI_LOOP [[DEF3]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec + %2:sreg_64 = S_MOV_B64 0 + %3:vreg_1 = COPY %1 + + bb.1: + %4:sreg_64 = S_MOV_B64 -1 + %5:vreg_1 = COPY %4 + %6:sreg_32 = S_MOV_B32 0 + %7:sreg_32 = IMPLICIT_DEF + S_CMP_LG_U32 %7, killed %6, implicit-def $scc + S_CBRANCH_SCC1 %bb.3, implicit $scc + S_BRANCH %bb.4 + + bb.2: + %8:vreg_1 = PHI %9, %bb.4 + %10:sreg_64_xexec = COPY %8 + %11:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %12:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %11, %10, implicit $exec + S_ENDPGM 0, implicit killed %12 + + bb.3: + %13:sreg_64 = S_MOV_B64 0 + + bb.4: + %14:vreg_1 = PHI %5, %bb.1, %3, %bb.3 + %9:sreg_64 = PHI %5, %bb.1, %13, %bb.3 + %15:sreg_64 = COPY %14 + %16:sreg_64 = IMPLICIT_DEF + SI_LOOP %16, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + +...