diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 86de1f3be9047..6a0175ca283b2 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5719,6 +5719,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl &DstRegs, ArrayRef Src1Regs, ArrayRef Src2Regs, LLT NarrowTy) { + const LLT S1 = LLT::scalar(1); MachineIRBuilder &B = MIRBuilder; unsigned SrcParts = Src1Regs.size(); unsigned DstParts = DstRegs.size(); @@ -5731,6 +5732,8 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl &DstRegs, unsigned CarrySumPrevDstIdx; SmallVector Factors; + const Register Zero = B.buildConstant(NarrowTy, 0).getReg(0); + for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { // Collect low parts of muls for DstIdx. for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; @@ -5755,15 +5758,15 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl &DstRegs, // Add all factors and accumulate all carries into CarrySum. if (DstIdx != DstParts - 1) { MachineInstrBuilder Uaddo = - B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); + B.buildUAddo(NarrowTy, S1, Factors[0], Factors[1]); FactorSum = Uaddo.getReg(0); - CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); + CarrySum = Zero; for (unsigned i = 2; i < Factors.size(); ++i) { - MachineInstrBuilder Uaddo = - B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); - FactorSum = Uaddo.getReg(0); - MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); - CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); + auto Uadde = + B.buildUAdde(NarrowTy, S1, FactorSum, Factors[i], Uaddo.getReg(1)); + FactorSum = Uadde.getReg(0); + CarrySum = B.buildUAdde(NarrowTy, S1, CarrySum, Zero, Uadde.getReg(1)) + .getReg(0); } } else { // Since value for the next index is not calculated, neither is CarrySum. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll new file mode 100644 index 0000000000000..5d8546c72d570 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll @@ -0,0 +1,210 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +define i8 @mul_i8(i8 %x, i8 %y) { +; CHECK-LABEL: mul_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mul w0, w0, w1 +; CHECK-NEXT: ret + %mul = mul i8 %x, %y + ret i8 %mul +} + +define i16 @mul_i16(i16 %x, i16 %y) { +; CHECK-LABEL: mul_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mul w0, w0, w1 +; CHECK-NEXT: ret + %mul = mul i16 %x, %y + ret i16 %mul +} + +define i32 @mul_i32(i32 %x, i32 %y) { +; CHECK-LABEL: mul_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mul w0, w0, w1 +; CHECK-NEXT: ret + %mul = mul i32 %x, %y + ret i32 %mul +} + +define i64 @mul_i64(i64 %x, i64 %y) { +; CHECK-LABEL: mul_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x0, x0, x1 +; CHECK-NEXT: ret + %mul = mul i64 %x, %y + ret i64 %mul +} + +define i96 @mul_i96(i96 %x, i96 %y) { +; CHECK-LABEL: mul_i96: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x9, x0, x3 +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: madd x9, x1, x2, x9 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add x1, x9, x10 +; CHECK-NEXT: ret + %mul = mul i96 %x, %y + ret i96 %mul +} + +define i128 @mul_i128(i128 %x, i128 %y) { +; CHECK-LABEL: mul_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x9, x0, x3 +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: madd x9, x1, x2, x9 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add x1, x9, x10 +; CHECK-NEXT: ret + %mul = mul i128 %x, %y + ret i128 %mul +} + +define i160 @mul_i160(i160 %x, i160 %y) { +; CHECK-LABEL: mul_i160: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x8, x1, x4 +; CHECK-NEXT: mul x9, x0, x5 +; CHECK-NEXT: umulh x10, x0, x4 +; CHECK-NEXT: mul x11, x2, x4 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: mul x12, x1, x5 +; CHECK-NEXT: mul x13, x0, x6 +; CHECK-NEXT: umulh x14, x1, x4 +; CHECK-NEXT: adcs x1, x8, x10 +; CHECK-NEXT: adc x9, xzr, xzr +; CHECK-NEXT: adds x10, x11, x12 +; CHECK-NEXT: umulh x8, x0, x5 +; CHECK-NEXT: cset w11, hs +; CHECK-NEXT: adc x10, x10, x13 +; CHECK-NEXT: cmp w11, #1 +; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: adc x10, x10, x14 +; CHECK-NEXT: adc x8, x10, x8 +; CHECK-NEXT: adc x2, x8, x9 +; CHECK-NEXT: ret + %mul = mul i160 %x, %y + ret i160 %mul +} + +define i192 @mul_i192(i192 %x, i192 %y) { +; CHECK-LABEL: mul_i192: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x8, x1, x4 +; CHECK-NEXT: mul x9, x0, x5 +; CHECK-NEXT: umulh x10, x0, x4 +; CHECK-NEXT: mul x11, x2, x4 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: mul x12, x1, x5 +; CHECK-NEXT: mul x13, x0, x6 +; CHECK-NEXT: umulh x14, x1, x4 +; CHECK-NEXT: adcs x1, x8, x10 +; CHECK-NEXT: adc x9, xzr, xzr +; CHECK-NEXT: adds x10, x11, x12 +; CHECK-NEXT: umulh x8, x0, x5 +; CHECK-NEXT: cset w11, hs +; CHECK-NEXT: adc x10, x10, x13 +; CHECK-NEXT: cmp w11, #1 +; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: adc x10, x10, x14 +; CHECK-NEXT: adc x8, x10, x8 +; CHECK-NEXT: adc x2, x8, x9 +; CHECK-NEXT: ret + %mul = mul i192 %x, %y + ret i192 %mul +} + +define i224 @mul_i224(i224 %x, i224 %y) { +; CHECK-LABEL: mul_i224: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x8, x1, x4 +; CHECK-NEXT: mul x9, x0, x5 +; CHECK-NEXT: umulh x10, x0, x4 +; CHECK-NEXT: mul x11, x2, x4 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: mul x12, x1, x5 +; CHECK-NEXT: adcs x8, x8, x10 +; CHECK-NEXT: mul x14, x2, x5 +; CHECK-NEXT: adc x10, xzr, xzr +; CHECK-NEXT: mul x13, x0, x6 +; CHECK-NEXT: adds x11, x11, x12 +; CHECK-NEXT: umulh x15, x1, x4 +; CHECK-NEXT: madd x14, x3, x4, x14 +; CHECK-NEXT: umulh x16, x0, x5 +; CHECK-NEXT: madd x12, x1, x6, x14 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: adcs x11, x11, x13 +; CHECK-NEXT: adc x13, xzr, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x17, x2, x4 +; CHECK-NEXT: adcs x11, x11, x15 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x9, x1, x5 +; CHECK-NEXT: adcs x11, x11, x16 +; CHECK-NEXT: mov x1, x8 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x18, x0, x6 +; CHECK-NEXT: adcs x2, x11, x10 +; CHECK-NEXT: adc x10, x13, xzr +; CHECK-NEXT: madd x12, x0, x7, x12 +; CHECK-NEXT: add x9, x17, x9 +; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: add x9, x9, x18 +; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x3, x12, x9 +; CHECK-NEXT: ret + %mul = mul i224 %x, %y + ret i224 %mul +} + +define i256 @mul_i256(i256 %x, i256 %y) { +; CHECK-LABEL: mul_i256: +; CHECK: // %bb.0: +; CHECK-NEXT: mul x8, x1, x4 +; CHECK-NEXT: mul x9, x0, x5 +; CHECK-NEXT: umulh x10, x0, x4 +; CHECK-NEXT: mul x11, x2, x4 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: mul x12, x1, x5 +; CHECK-NEXT: adcs x8, x8, x10 +; CHECK-NEXT: mul x14, x2, x5 +; CHECK-NEXT: adc x10, xzr, xzr +; CHECK-NEXT: mul x13, x0, x6 +; CHECK-NEXT: adds x11, x11, x12 +; CHECK-NEXT: umulh x15, x1, x4 +; CHECK-NEXT: madd x14, x3, x4, x14 +; CHECK-NEXT: umulh x16, x0, x5 +; CHECK-NEXT: madd x12, x1, x6, x14 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: adcs x11, x11, x13 +; CHECK-NEXT: adc x13, xzr, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x17, x2, x4 +; CHECK-NEXT: adcs x11, x11, x15 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x9, x1, x5 +; CHECK-NEXT: adcs x11, x11, x16 +; CHECK-NEXT: mov x1, x8 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x18, x0, x6 +; CHECK-NEXT: adcs x2, x11, x10 +; CHECK-NEXT: adc x10, x13, xzr +; CHECK-NEXT: madd x12, x0, x7, x12 +; CHECK-NEXT: add x9, x17, x9 +; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: add x9, x9, x18 +; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x3, x12, x9 +; CHECK-NEXT: ret + %mul = mul i256 %x, %y + ret i256 %mul +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 2bf8649e76242..a79f1db9b8cb2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -619,25 +619,24 @@ body: | ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH2]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UADDE2]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDE]](s32), [[ADD4]](s32) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; ; GFX89-LABEL: name: test_mul_s96 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index f9ec3bca78931..81e13b6cf6745 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -49,6 +49,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -87,6 +88,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -125,6 +127,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -244,6 +247,7 @@ body: | ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_sdiv_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -313,6 +317,7 @@ body: | ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_sdiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -382,6 +387,7 @@ body: | ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_sdiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -506,6 +512,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -515,89 +522,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -610,9 +601,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV22]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV23]], [[UADDO21]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] @@ -620,9 +611,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV24]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV25]], [[UADDO23]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -635,6 +626,7 @@ body: | ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_sdiv_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -683,89 +675,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -782,9 +759,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -792,9 +769,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -807,6 +784,7 @@ body: | ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_sdiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -855,89 +833,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -954,9 +917,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -964,9 +927,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -979,6 +942,7 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_sdiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -1027,93 +991,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1126,9 +1075,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV28]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV29]], [[UADDO21]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] @@ -1136,9 +1085,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV30]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV31]], [[UADDO23]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -1208,6 +1157,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -1217,89 +1167,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1312,9 +1246,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV26]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV27]], [[UADDO21]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -1322,9 +1256,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV28]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV29]], [[UADDO23]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -1340,14 +1274,14 @@ body: | ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] - ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO25]] + ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] - ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO27]] + ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -1371,94 +1305,79 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH16]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH18]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE62]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO33]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO32]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO32]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE64]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO32]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[MUL24]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH21]], [[UADDO35]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[ADD13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH23]], [[UADDO37]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE76]] + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD14]], [[UADDO39]] ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX6-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO38]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE78]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO38]] + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH25]], [[UADDO41]] + ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE78]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO38]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE78]] + ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH27]], [[UADDO43]] + ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX6-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX6-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE78]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE90]] + ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD15]](s32) ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDE88]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDE88]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDE88]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD17]], [[USUBO11]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD17]] ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -1470,9 +1389,9 @@ body: | ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX6-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] - ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] - ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV56]] + ; GFX6-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV57]], [[UADDO45]] + ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] @@ -1480,9 +1399,9 @@ body: | ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX6-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] - ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] - ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV58]] + ; GFX6-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV59]], [[UADDO47]] + ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -1496,6 +1415,7 @@ body: | ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_sdiv_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1546,89 +1466,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1645,9 +1550,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV38]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV39]], [[UADDO21]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] @@ -1655,9 +1560,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV40]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV41]], [[UADDO23]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -1673,14 +1578,14 @@ body: | ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] - ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO25]] + ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] - ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO27]] + ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -1709,85 +1614,70 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO33]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]] ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE64]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV64]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[UV66]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV64]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV66]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV64]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV66]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV66]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD4]], [[UADDO39]] ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO38]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE78]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO38]] + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]] + ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE78]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO38]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE78]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]] + ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX8-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX8-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE78]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]] + ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD5]](s32) ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDE88]], [[C5]] ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDE88]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] @@ -1803,9 +1693,9 @@ body: | ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX8-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] - ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] - ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV80]] + ; GFX8-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV81]], [[UADDO45]] + ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] @@ -1813,9 +1703,9 @@ body: | ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX8-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] - ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] - ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV82]] + ; GFX8-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV83]], [[UADDO47]] + ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -1829,6 +1719,7 @@ body: | ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_sdiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1879,89 +1770,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1978,9 +1854,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV38]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV39]], [[UADDO21]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] @@ -1988,9 +1864,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV40]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV41]], [[UADDO23]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -2006,14 +1882,14 @@ body: | ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO25]] + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO27]] + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -2042,85 +1918,70 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO33]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]] ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE64]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV64]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[UV66]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV64]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV66]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV64]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV66]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV66]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD4]], [[UADDO39]] ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO38]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE78]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO38]] + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]] + ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE78]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO38]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE78]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]] + ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX9-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX9-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE78]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]] + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD5]](s32) ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDE88]], [[C5]] ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDE88]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] @@ -2136,9 +1997,9 @@ body: | ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX9-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] - ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] - ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV80]] + ; GFX9-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV81]], [[UADDO45]] + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] @@ -2146,9 +2007,9 @@ body: | ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX9-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] - ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] - ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV82]] + ; GFX9-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV83]], [[UADDO47]] + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -2162,6 +2023,7 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_sdiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2212,93 +2074,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD8]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -2311,9 +2158,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV32]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV33]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV32]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV33]], [[UADDO21]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]] @@ -2321,9 +2168,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV34]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV35]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV34]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV35]], [[UADDO23]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -2339,14 +2186,14 @@ body: | ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO43]] - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO25]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO45]] - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO27]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -2369,95 +2216,80 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV54]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV54]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV54]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO33]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]] ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV56]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV56]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE64]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV56]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV56]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV56]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[ADD13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD14]], [[UADDO39]] ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO68]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO68]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO68]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE18]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE18]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO38]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE78]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO38]] + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]] + ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE78]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO38]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE78]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]] + ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX10-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX10-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE78]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD15]](s32) ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDO78]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDE88]], [[C5]] ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDO78]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDE88]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV58]], [[UV64]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD35]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD17]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD17]] ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV67]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -2469,9 +2301,9 @@ body: | ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV67]], [[USUBO11]] ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX10-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV68]] - ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV69]], [[UADDO81]] - ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV68]] + ; GFX10-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV69]], [[UADDO45]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV67]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV66]] @@ -2479,9 +2311,9 @@ body: | ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV67]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX10-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV70]] - ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV71]], [[UADDO83]] - ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV70]] + ; GFX10-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV71]], [[UADDO47]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -2547,6 +2379,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2587,6 +2420,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2627,6 +2461,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2763,6 +2598,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_sdiv_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2844,6 +2680,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_sdiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2922,6 +2759,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB7]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_sdiv_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3052,6 +2890,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -3092,6 +2931,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -3132,6 +2972,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3227,6 +3068,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -3267,6 +3109,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -3307,6 +3150,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3407,6 +3251,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -3416,89 +3261,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -3511,9 +3340,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV22]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV23]], [[UADDO21]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] @@ -3521,9 +3350,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV24]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV25]], [[UADDO23]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3536,6 +3365,7 @@ body: | ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_sdiv_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -3586,89 +3416,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3685,9 +3500,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -3695,9 +3510,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3710,6 +3525,7 @@ body: | ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_sdiv_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3760,89 +3576,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3859,9 +3660,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -3869,9 +3670,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3884,6 +3685,7 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_sdiv_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -3934,93 +3736,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -4033,9 +3820,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV28]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV29]], [[UADDO21]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] @@ -4043,9 +3830,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV30]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV31]], [[UADDO23]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index 08bb589b6ded2..bb2b47d6ee663 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -45,6 +45,7 @@ body: | ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX8-LABEL: name: test_srem_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -79,6 +80,7 @@ body: | ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX9-LABEL: name: test_srem_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -113,6 +115,7 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX10-LABEL: name: test_srem_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -221,6 +224,7 @@ body: | ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_srem_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -283,6 +287,7 @@ body: | ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_srem_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -345,6 +350,7 @@ body: | ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_srem_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -462,6 +468,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -471,88 +478,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] @@ -586,6 +577,7 @@ body: | ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_srem_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -634,88 +626,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -753,6 +730,7 @@ body: | ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_srem_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -801,88 +779,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -920,6 +883,7 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_srem_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -968,92 +932,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] @@ -1144,6 +1093,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -1153,88 +1103,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] @@ -1271,14 +1205,14 @@ body: | ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] - ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO21]] + ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] - ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO23]] + ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -1302,93 +1236,78 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH16]], [[UADDO25]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH18]], [[UADDO27]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE58]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO29]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO28]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO28]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE60]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO28]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[MUL24]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH21]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[ADD13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH23]], [[UADDO33]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE72]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD14]], [[UADDO35]] ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO34]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE74]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO34]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH25]], [[UADDO37]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE74]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO34]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE74]] + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH27]], [[UADDO39]] + ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE74]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE86]] ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDE84]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDE84]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDE84]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] - ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD17]], [[USUBO13]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD17]] ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] @@ -1423,6 +1342,7 @@ body: | ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_srem_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1473,88 +1393,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1595,14 +1500,14 @@ body: | ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] - ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO21]] + ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] - ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO23]] + ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -1631,84 +1536,69 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO29]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]] ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE60]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO28]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV60]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[UV62]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV60]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV62]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV60]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV62]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV62]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD4]], [[UADDO35]] ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO34]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE74]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO34]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE74]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO34]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE74]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]] + ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE74]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]] ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDE84]], [[C5]] ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDE84]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] @@ -1747,6 +1637,7 @@ body: | ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_srem_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1797,88 +1688,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1919,14 +1795,14 @@ body: | ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO21]] + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO23]] + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -1955,84 +1831,69 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO29]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]] ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE60]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO28]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV60]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[UV62]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV60]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV62]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV60]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV62]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV62]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD4]], [[UADDO35]] ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO34]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE74]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO34]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE74]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO34]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE74]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]] + ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE74]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]] ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDE84]], [[C5]] ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDE84]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] @@ -2071,6 +1932,7 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_srem_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2121,92 +1983,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD8]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] @@ -2243,14 +2090,14 @@ body: | ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO39]] - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO21]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO41]] - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO23]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -2273,94 +2120,79 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV50]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV50]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV50]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO29]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]] ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV52]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV52]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV52]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE60]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO28]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV52]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV52]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV52]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[ADD13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD14]], [[UADDO35]] ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO64]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO64]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO64]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE14]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE14]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO34]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE74]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO34]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE74]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO34]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE74]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]] + ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE74]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]] ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDO74]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDE84]], [[C5]] ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDO74]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDE84]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV54]], [[UV60]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD35]], [[USUBO13]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD35]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD17]], [[USUBO13]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD17]] ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV63]] @@ -2445,6 +2277,7 @@ body: | ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_srem_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2483,6 +2316,7 @@ body: | ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_srem_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2521,6 +2355,7 @@ body: | ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_srem_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2648,6 +2483,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_srem_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2722,6 +2558,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_srem_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2793,6 +2630,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_srem_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2912,6 +2750,7 @@ body: | ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX8-LABEL: name: test_srem_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2948,6 +2787,7 @@ body: | ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX9-LABEL: name: test_srem_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2984,6 +2824,7 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX10-LABEL: name: test_srem_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3071,6 +2912,7 @@ body: | ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX8-LABEL: name: test_srem_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -3107,6 +2949,7 @@ body: | ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX9-LABEL: name: test_srem_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -3143,6 +2986,7 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX10-LABEL: name: test_srem_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3239,6 +3083,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -3248,88 +3093,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] @@ -3363,6 +3192,7 @@ body: | ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_srem_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -3413,88 +3243,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3532,6 +3347,7 @@ body: | ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_srem_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3582,88 +3398,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3701,6 +3502,7 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_srem_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -3751,92 +3553,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index bfba201e264b1..6d9a168aa65e1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -381,6 +381,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -390,89 +391,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -485,9 +470,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV14]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV15]], [[UADDO17]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] @@ -495,9 +480,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV16]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV17]], [[UADDO19]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -537,89 +522,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -636,9 +606,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -646,9 +616,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -688,89 +658,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -787,9 +742,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -797,9 +752,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -839,93 +794,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -938,9 +878,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV20]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV21]], [[UADDO17]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] @@ -948,9 +888,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV22]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV23]], [[UADDO19]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -998,6 +938,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -1007,89 +948,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1102,9 +1027,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV18]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV19]], [[UADDO17]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] @@ -1112,9 +1037,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV20]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV21]], [[UADDO19]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -1140,94 +1065,79 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH16]], [[UADDO21]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH18]], [[UADDO23]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE54]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO25]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO24]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO24]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE56]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO24]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[MUL24]] + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH21]], [[UADDO27]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[ADD13]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH23]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE68]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD14]], [[UADDO31]] ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO30]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE70]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO30]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH25]], [[UADDO33]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE70]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO30]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE70]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH27]], [[UADDO35]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE70]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE82]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD15]](s32) ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDE80]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDE80]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDE80]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD17]], [[USUBO9]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD17]] ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -1239,9 +1149,9 @@ body: | ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV36]] + ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV37]], [[UADDO37]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] @@ -1249,9 +1159,9 @@ body: | ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] + ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV39]], [[UADDO39]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -1294,89 +1204,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1393,9 +1288,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV30]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV31]], [[UADDO17]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] @@ -1403,9 +1298,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV32]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV33]], [[UADDO19]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -1436,85 +1331,70 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO25]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]] ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE56]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO24]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV44]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[UV46]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV44]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV46]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV44]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV46]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV46]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD4]], [[UADDO31]] ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO30]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE70]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO30]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE70]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO30]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE70]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE70]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD5]](s32) ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDE80]], [[C4]] ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDE80]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] @@ -1530,9 +1410,9 @@ body: | ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV60]] + ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV61]], [[UADDO37]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] @@ -1540,9 +1420,9 @@ body: | ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV62]] + ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV63]], [[UADDO39]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -1585,89 +1465,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1684,9 +1549,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV30]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV31]], [[UADDO17]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] @@ -1694,9 +1559,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV32]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV33]], [[UADDO19]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -1727,85 +1592,70 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO25]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]] ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE56]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO24]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV44]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[UV46]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV44]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV46]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV44]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV46]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV46]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD4]], [[UADDO31]] ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO30]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE70]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO30]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE70]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO30]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE70]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE70]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD5]](s32) ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDE80]], [[C4]] ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDE80]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] @@ -1821,9 +1671,9 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV60]] + ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV61]], [[UADDO37]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] @@ -1831,9 +1681,9 @@ body: | ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV62]] + ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV63]], [[UADDO39]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -1876,93 +1726,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1975,9 +1810,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV24]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV25]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV24]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV25]], [[UADDO17]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]] @@ -1985,9 +1820,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV26]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV27]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV26]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV27]], [[UADDO19]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -2012,95 +1847,80 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV34]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV34]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV34]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO25]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]] ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV36]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV36]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV36]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE56]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO24]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV36]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV36]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV36]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[ADD13]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD14]], [[UADDO31]] ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO60]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO60]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO60]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE10]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE10]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO30]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE70]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO30]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE70]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO30]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE70]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE70]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD15]](s32) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDO70]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDE80]], [[C4]] ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDO70]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDE80]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV44]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD35]], [[USUBO9]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD35]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD17]], [[USUBO9]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD17]] ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV47]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -2112,9 +1932,9 @@ body: | ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV47]], [[USUBO9]] ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV48]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV49]], [[UADDO73]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV48]] + ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV49]], [[UADDO37]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV47]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV46]] @@ -2122,9 +1942,9 @@ body: | ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV47]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV50]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV51]], [[UADDO75]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV50]] + ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV51]], [[UADDO39]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -2855,6 +2675,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -2864,89 +2685,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -2959,9 +2764,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV14]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV15]], [[UADDO17]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] @@ -2969,9 +2774,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV16]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV17]], [[UADDO19]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3014,89 +2819,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3113,9 +2903,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -3123,9 +2913,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3168,89 +2958,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3267,9 +3042,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -3277,9 +3052,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3322,93 +3097,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -3421,9 +3181,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV20]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV21]], [[UADDO17]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] @@ -3431,9 +3191,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV22]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV23]], [[UADDO19]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 3a919f004964b..54a2327b5e20f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -77,28 +77,24 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; ; GFX9-LABEL: name: test_umulh_s64 @@ -108,28 +104,24 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 @@ -152,52 +144,43 @@ body: | ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; @@ -210,52 +193,43 @@ body: | ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir index 13c52d08b4941..2917b06936944 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -115,42 +115,38 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C1]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT]](s64) ; ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -159,42 +155,38 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C1]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_UMULO %0, %1 @@ -218,79 +210,70 @@ body: | ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C1]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C1]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C2]] ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C2]] ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) @@ -304,79 +287,70 @@ body: | ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C1]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C1]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C2]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C2]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index 44f44123bb736..b01e0eda6768e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -349,6 +349,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -358,88 +359,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] @@ -501,88 +486,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -648,88 +618,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -795,92 +750,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] @@ -950,6 +890,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -959,88 +900,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] @@ -1088,93 +1013,78 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH16]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH18]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE50]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO21]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO20]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO20]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE52]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO20]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[MUL24]] + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH21]], [[UADDO23]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[ADD13]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH23]], [[UADDO25]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE64]] + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD14]], [[UADDO27]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO26]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE66]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH25]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE66]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE66]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH27]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE66]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE78]] ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE76]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE76]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE76]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] - ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD17]], [[USUBO11]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD17]] ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] @@ -1239,88 +1149,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1377,84 +1272,69 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO21]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]] ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE52]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO20]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV40]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[UV42]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV40]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV42]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV40]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV42]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV42]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD4]], [[UADDO27]] ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO26]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE66]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE66]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE66]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE66]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]] ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDE76]], [[C4]] ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDE76]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] @@ -1523,88 +1403,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1661,84 +1526,69 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO21]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]] ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE52]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO20]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV40]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[UV42]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV40]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV42]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV40]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV42]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV42]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD4]], [[UADDO27]] ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO26]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE66]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE66]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE66]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE66]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]] ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDE76]], [[C4]] ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDE76]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] @@ -1807,92 +1657,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] @@ -1939,94 +1774,79 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV30]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV30]] - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV30]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO21]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]] ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV32]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV32]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV32]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE52]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO20]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV32]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV32]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV32]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[ADD13]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD14]], [[UADDO27]] ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO56]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO56]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE6]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO26]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE66]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE66]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE66]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE66]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]] ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDO66]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDE76]], [[C4]] ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDO66]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDE76]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV40]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD35]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD17]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD17]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV43]] @@ -2723,6 +2543,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -2732,88 +2553,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] @@ -2878,88 +2683,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3028,88 +2818,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3178,92 +2953,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 377fa24cb4755..cafd4c1c5c813 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -26,133 +26,116 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v2, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v1, v3, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 -; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1 -; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v2 -; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v3 -; CHECK-NEXT: v_trunc_f32_e32 v8, v6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8 -; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v8 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0 -; CHECK-NEXT: v_mov_b32_e32 v3, v7 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4] -; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] -; CHECK-NEXT: v_mul_hi_u32 v8, v9, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6 -; CHECK-NEXT: v_mul_lo_u32 v13, v9, v7 -; CHECK-NEXT: v_mul_lo_u32 v14, v12, v7 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v6, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v7, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v7 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v7, vcc +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v3, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v3 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3] +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3] +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v8, v2 +; CHECK-NEXT: v_mul_lo_u32 v14, v11, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v2 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v12, vcc +; CHECK-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v12, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v2, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3] +; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3] +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v9 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v9, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v3, v9 +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v10, v8, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_xor_b32_e32 v4, v4, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v2 +; CHECK-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v12, vcc +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; CHECK-NEXT: v_mul_hi_u32 v8, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v14, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; CHECK-NEXT: v_mul_hi_u32 v7, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v3 -; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v6, vcc -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0 -; CHECK-NEXT: v_mov_b32_e32 v3, v7 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4] -; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v10 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v10, vcc -; CHECK-NEXT: v_xor_b32_e32 v8, v3, v10 -; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6 -; CHECK-NEXT: v_mul_lo_u32 v5, v9, v7 -; CHECK-NEXT: v_xor_b32_e32 v11, v4, v10 -; CHECK-NEXT: v_mul_hi_u32 v4, v9, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v11, v3 -; CHECK-NEXT: v_mul_lo_u32 v6, v8, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v3 -; CHECK-NEXT: v_mul_hi_u32 v3, v11, v3 -; CHECK-NEXT: v_mul_hi_u32 v9, v11, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v11, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v8, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v3, v5 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v7, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v5 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v7, v[4:5] -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v11, v4, vcc -; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v11, v4 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5] -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v7 -; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v10, vcc +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v1, v8, vcc +; CHECK-NEXT: v_mul_hi_u32 v10, v4, v2 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, 0 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v3 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v10, v[2:3] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v8, v[2:3] +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v4, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v5, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v8 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v9, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v7 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -235,18 +218,13 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, vcc ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc @@ -258,21 +236,16 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 ; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -280,38 +253,33 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_lo_u32 v3, s12, v1 ; CHECK-NEXT: v_mul_hi_u32 v4, s12, v0 ; CHECK-NEXT: v_mul_hi_u32 v0, s13, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s13, v1 +; CHECK-NEXT: v_mov_b32_e32 v5, s13 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s13, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, s12, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 -; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, s13, v1 +; CHECK-NEXT: v_mul_hi_u32 v4, s12, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, s13, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v2, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v5, s13 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s12, v0 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v3, s11 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v4, s11 ; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s13, v1 ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v2 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s11, v1 @@ -321,11 +289,11 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s11, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc ; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] ; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 @@ -404,105 +372,90 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11 ; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v17, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v17, v14, v12 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v5, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v5 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; GISEL-NEXT: v_mov_b32_e32 v5, v12 -; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[5:6] -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v9, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13] -; GISEL-NEXT: v_xor_b32_e32 v5, v0, v9 -; GISEL-NEXT: v_mul_lo_u32 v0, v17, v11 -; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 -; GISEL-NEXT: v_xor_b32_e32 v15, v1, v9 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_xor_b32_e32 v13, v0, v5 +; GISEL-NEXT: v_mul_lo_u32 v0, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v14, v12 +; GISEL-NEXT: v_xor_b32_e32 v16, v1, v5 ; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v15 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v1, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v14, v12 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v15, v0 -; GISEL-NEXT: v_mul_lo_u32 v12, v5, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v15, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v5, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v11 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v11 -; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v14, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v4, v13, v[11:12] -; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v15, v11, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v15, v11 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v9, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v16, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v16, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v0, v11, vcc +; GISEL-NEXT: v_mul_hi_u32 v11, v16, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v14, 0 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v11, v9 +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v15, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v4, v14, v[11:12] +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v16, v11, vcc +; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], v16, v11 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v12, v11, v12, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc -; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v5 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v7, v1, v5 -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v1, v9 +; GISEL-NEXT: v_xor_b32_e32 v6, v6, v9 ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v15, v6 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v13 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v14, vcc -; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v15 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v14 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v15, vcc +; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 @@ -515,130 +468,115 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v18 ; GISEL-NEXT: v_subb_u32_e32 v21, vcc, 0, v6, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v15, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v4, v13, v10, vcc ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v20, v18, v[1:2] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v16 ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v21, v19, v[10:11] -; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v16, v1, vcc ; GISEL-NEXT: v_mul_lo_u32 v1, v18, v0 ; GISEL-NEXT: v_mul_lo_u32 v11, v19, v10 ; GISEL-NEXT: v_mul_hi_u32 v16, v19, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc ; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v18, v10 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 -; GISEL-NEXT: v_mul_hi_u32 v11, v19, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v16, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v18, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v10 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v10, v18, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v19, v0 -; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v18, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v10, 0 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v0 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v18, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v16, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v13, v9, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v20, v11, v[1:2] -; GISEL-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc -; GISEL-NEXT: v_ashrrev_i32_e32 v14, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v21, v10, v[8:9] -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v14 -; GISEL-NEXT: v_xor_b32_e32 v1, v4, v13 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v14, vcc -; GISEL-NEXT: v_xor_b32_e32 v9, v2, v14 -; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0 -; GISEL-NEXT: v_mul_lo_u32 v4, v10, v8 -; GISEL-NEXT: v_xor_b32_e32 v15, v3, v14 -; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v14, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v20, v17, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v5, v8 +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v21, v16, v[10:11] +; GISEL-NEXT: v_cndmask_b32_e32 v13, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v16, v4 +; GISEL-NEXT: v_xor_b32_e32 v11, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v3, v16, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v16, v4 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v4, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 -; GISEL-NEXT: v_mul_lo_u32 v4, v9, v2 -; GISEL-NEXT: v_mul_hi_u32 v10, v9, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v11, v15, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v17, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v5, v2 +; GISEL-NEXT: v_xor_b32_e32 v10, v12, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v15, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GISEL-NEXT: v_mul_hi_u32 v4, v9, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v2 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v2 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v0, v4, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v11, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0 +; GISEL-NEXT: v_xor_b32_e32 v13, v13, v1 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v4, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v11, v[0:1] -; GISEL-NEXT: v_xor_b32_e32 v8, v12, v13 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v13 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v10, v[3:4] -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v13, vcc -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v14, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v1 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v12, v[3:4] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v13, v1, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v11, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v7 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v6 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v8, v9, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v10 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v10, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v12 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v14, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; GISEL-NEXT: v_xor_b32_e32 v4, v14, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -662,131 +600,116 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v5, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v1, v3, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v1 -; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; CGP-NEXT: v_trunc_f32_e32 v5, v4 -; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v12, v3 -; CGP-NEXT: v_cvt_u32_f32_e32 v15, v5 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5] -; CGP-NEXT: v_mul_hi_u32 v16, v12, v3 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v5, v15, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 -; CGP-NEXT: v_mul_lo_u32 v17, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v18, v15, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 -; CGP-NEXT: v_mul_hi_u32 v16, v12, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v18, v3 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v3 -; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v4, vcc -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v5, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v5 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v3, v2 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v3 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3] +; CGP-NEXT: v_mul_hi_u32 v16, v12, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3] +; CGP-NEXT: v_mul_lo_u32 v3, v15, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v15, v1 +; CGP-NEXT: v_mul_lo_u32 v17, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v18, v15, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v16, v12, v2 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v2, v15, v2 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v16, vcc +; CGP-NEXT: v_addc_u32_e64 v16, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v1 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v2, vcc +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3] ; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v11 -; CGP-NEXT: v_mul_hi_u32 v16, v12, v3 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5] -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3] +; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v13 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v13 -; CGP-NEXT: v_mul_lo_u32 v5, v15, v3 -; CGP-NEXT: v_mul_lo_u32 v14, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 +; CGP-NEXT: v_xor_b32_e32 v11, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v3, v15, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v1, v15, v1 ; CGP-NEXT: v_xor_b32_e32 v10, v10, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v15, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CGP-NEXT: v_mul_hi_u32 v14, v12, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v15, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v10, v3 -; CGP-NEXT: v_mul_lo_u32 v12, v11, v4 -; CGP-NEXT: v_mul_hi_u32 v14, v11, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v10, v3 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_mul_hi_u32 v12, v11, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v15, v2 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v2 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CGP-NEXT: v_mul_hi_u32 v2, v15, v2 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v16, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v15, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v10, v1 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v2 +; CGP-NEXT: v_mul_hi_u32 v14, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v10, v1 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v3, v5 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v14, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v5 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v12, v[4:5] -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v11, v3 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v14, v[4:5] -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v10, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v10, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v14, v11, v2 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v1, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v14, v10, v2 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v12, 0 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v14, v[2:3] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v12, v[2:3] +; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v10, v2, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v10, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5 +; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v11, s[4:5] -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v14 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5 +; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v14, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v2, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v11, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc ; CGP-NEXT: v_xor_b32_e32 v3, v13, v0 -; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v2, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v1, v3 ; CGP-NEXT: v_xor_b32_e32 v1, v2, v3 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -835,131 +758,116 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: .LBB2_7: ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v4, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v3, v5, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v4 -; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v10, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v13, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7] -; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7] -; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v10, v6 -; CGP-NEXT: v_mul_lo_u32 v16, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v6 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v5 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v6, vcc -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v7, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v7 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v7, vcc +; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CGP-NEXT: v_trunc_f32_e32 v5, v4 +; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v5 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v4 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v14, vcc +; CGP-NEXT: v_mul_hi_u32 v14, v10, v4 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v14, vcc +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v14, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v3 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5] ; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7] -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v11 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc -; CGP-NEXT: v_xor_b32_e32 v9, v7, v11 -; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 +; CGP-NEXT: v_xor_b32_e32 v9, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 ; CGP-NEXT: v_xor_b32_e32 v8, v8, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_mul_hi_u32 v12, v10, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v8, v5 -; CGP-NEXT: v_mul_lo_u32 v10, v9, v6 -; CGP-NEXT: v_mul_hi_u32 v12, v9, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v8, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CGP-NEXT: v_mul_hi_u32 v10, v9, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v4 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v4 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v14, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v13, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v8, v3 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v8, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v12, 0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v7 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v10, v[6:7] -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v9, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[6:7] -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v8, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v8, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v12, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v12, vcc +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v3, v10, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v10, 0 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v5 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v12, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v10, v[4:5] +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v8, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v8, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v8, v9, s[4:5] -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5] +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v5, v11, v2 -; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 @@ -1069,18 +977,13 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2 +; CHECK-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v4, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 ; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc @@ -1095,23 +998,18 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 ; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v0, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v7, v2 ; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc @@ -1121,24 +1019,19 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v9, v1 -; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v2 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v7, v[1:2] ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 ; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 @@ -1149,8 +1042,8 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v4, s[4:5] -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v7 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -1161,8 +1054,8 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 @@ -1182,6 +1075,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: s_sub_u32 s8, 0, 0x12d8fb +; GISEL-NEXT: s_subb_u32 s9, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v7, v5 @@ -1199,18 +1094,13 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0 @@ -1226,23 +1116,18 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc @@ -1252,28 +1137,21 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v9, v8 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9] -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb -; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 @@ -1282,107 +1160,92 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2] -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9] -; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2] +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v14 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v17, v6, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v16, v17 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v14, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v7, v8 ; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v14, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v0 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v7, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v13, v15, v13, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s8, v14, v[1:2] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 -; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v13, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0 ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 ; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v14, v6 ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc ; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v0, v6, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v6, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v8, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 @@ -1393,7 +1256,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc @@ -1405,12 +1268,12 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: @@ -1423,222 +1286,192 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 -; CGP-NEXT: v_mov_b32_e32 v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] -; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11] -; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v5 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v10 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v9 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v9 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc -; CGP-NEXT: v_mov_b32_e32 v4, v14 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_xor_b32_e32 v15, v0, v7 -; CGP-NEXT: v_mul_lo_u32 v0, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v14 -; CGP-NEXT: v_xor_b32_e32 v18, v1, v7 -; CGP-NEXT: v_mul_hi_u32 v1, v16, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v8, v4, vcc +; CGP-NEXT: v_mov_b32_e32 v4, v12 +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v12 +; CGP-NEXT: v_xor_b32_e32 v18, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v5, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v16, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CGP-NEXT: v_mul_hi_u32 v4, v16, v14 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v18, v0 -; CGP-NEXT: v_mul_lo_u32 v14, v15, v1 -; CGP-NEXT: v_mul_hi_u32 v16, v15, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v16, v12 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v17, vcc +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v13, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v18, v0 -; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v18, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v15, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v0, v13 -; CGP-NEXT: v_mul_hi_u32 v17, v18, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v13 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v15, v0 -; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v13, vcc -; CGP-NEXT: v_sub_i32_e64 v13, s[4:5], v18, v13 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v1 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v0, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v18, v1 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v16, 0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v12, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v17, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v11, vcc +; CGP-NEXT: v_sub_i32_e64 v11, s[4:5], v18, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v16 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v16 ; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v17, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; CGP-NEXT: v_mov_b32_e32 v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5] -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_mov_b32_e32 v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, -1, v12, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1] -; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v19, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v15 -; CGP-NEXT: v_mul_lo_u32 v19, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] -; CGP-NEXT: v_cndmask_b32_e32 v10, v17, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v10, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] +; CGP-NEXT: v_cndmask_b32_e32 v10, -1, v19, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v13 +; CGP-NEXT: v_mul_lo_u32 v19, v7, v0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v18, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v11, v18, v11, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v19 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v7, v0 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v13, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v10, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 -; CGP-NEXT: v_mul_lo_u32 v2, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v6 ; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v3, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v6 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CGP-NEXT: v_mul_hi_u32 v6, v8, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v5 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] -; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v0, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v13, v2 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v6, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v9 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v8, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, -1, v7, s[4:5] +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6 ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v7 ; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -1673,130 +1506,115 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: .LBB7_3: ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v6, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v2, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v1, v5, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v2 -; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v2 -; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CHECK-NEXT: v_trunc_f32_e32 v7, v6 -; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7 -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7] -; CHECK-NEXT: v_mul_hi_u32 v12, v8, v5 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_mul_lo_u32 v7, v11, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v13, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v14, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CHECK-NEXT: v_mul_hi_u32 v12, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v5 -; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v6, vcc -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7] -; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc -; CHECK-NEXT: v_xor_b32_e32 v7, v3, v9 -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v8, v6 -; CHECK-NEXT: v_xor_b32_e32 v12, v4, v9 -; CHECK-NEXT: v_mul_hi_u32 v4, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 -; CHECK-NEXT: v_mul_hi_u32 v10, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v11, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v12, v3 -; CHECK-NEXT: v_mul_lo_u32 v6, v7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v7, v3 -; CHECK-NEXT: v_mul_hi_u32 v3, v12, v3 -; CHECK-NEXT: v_mul_hi_u32 v10, v12, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v12, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v7, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v3, v5 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v8, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v5 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v8, v[4:5] -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v12, v4, vcc -; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v10, s[4:5] -; CHECK-NEXT: v_add_i32_e32 v7, vcc, 1, v8 -; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v7, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v8, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v7 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v8 +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 +; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v5, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v5 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3] +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v1 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v12, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v13 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v5 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v2, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3] +; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v10 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v10, vcc +; CHECK-NEXT: v_xor_b32_e32 v4, v2, v10 +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v9, v5 +; CHECK-NEXT: v_xor_b32_e32 v11, v3, v10 +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v12, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v5 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v5 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v2 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v6, v11, v2 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v5, 0 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v3 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, v[2:3] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v5, v[2:3] +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v11, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v8 +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v8 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v9, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v5 +; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v7 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v10, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v8 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 @@ -1868,21 +1686,16 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 ; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v18, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v18, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v18, vcc +; GISEL-NEXT: v_mul_lo_u32 v7, v17, v12 +; GISEL-NEXT: v_mul_hi_u32 v18, v14, v12 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 ; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], v7, v18, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v7 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v11, vcc @@ -1898,23 +1711,18 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v15, v14, v12 ; GISEL-NEXT: v_xor_b32_e32 v16, v1, v7 ; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v17, v12 +; GISEL-NEXT: v_mul_hi_u32 v1, v17, v11 ; GISEL-NEXT: v_mul_hi_u32 v15, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc @@ -1922,28 +1730,22 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v1 ; GISEL-NEXT: v_mul_hi_u32 v14, v13, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 -; GISEL-NEXT: v_mul_hi_u32 v15, v16, v1 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v16, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v0, v11 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v16, v1 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v14, vcc +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v0, v12, vcc +; GISEL-NEXT: v_mul_hi_u32 v12, v16, v1 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v14, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v11 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v12, v11 ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v8, v15, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v14, v[11:12] -; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v16, v11, vcc ; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v16, v11 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5 @@ -1993,116 +1795,102 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc ; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v18, v11 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; GISEL-NEXT: v_mul_hi_u32 v12, v19, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v16, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v18, v11 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v11, v18, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v0 -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v18, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v11, 0 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v0 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v18, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v16, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v13, v14, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v20, v12, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v13, v7 -; GISEL-NEXT: v_ashrrev_i32_e32 v13, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v21, v11, v[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v20, v17, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v21, v16, v[11:12] ; GISEL-NEXT: v_cndmask_b32_e32 v8, v15, v8, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v13, vcc -; GISEL-NEXT: v_xor_b32_e32 v5, v2, v13 -; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v4 -; GISEL-NEXT: v_xor_b32_e32 v15, v3, v13 -; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v14, v2 -; GISEL-NEXT: v_mul_hi_u32 v14, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v14 -; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v16, v4 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v16, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v16, v4 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v17, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 ; GISEL-NEXT: v_mul_lo_u32 v4, v5, v2 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v12, v15, v2 +; GISEL-NEXT: v_xor_b32_e32 v11, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v13, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v15, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GISEL-NEXT: v_mul_hi_u32 v4, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v13, v5, v2 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v0, v4, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v13, 0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v1 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v4, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[0:1] -; GISEL-NEXT: v_xor_b32_e32 v8, v8, v7 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v11, v[3:4] -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v14, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v11, v1 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v13, v[3:4] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v10 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v9 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v11 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v8, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v13 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v14, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v9, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc -; GISEL-NEXT: v_xor_b32_e32 v4, v13, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v7, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -2129,131 +1917,119 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v12 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v0 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v12, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v1, v10, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v4, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v13 ; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v11, v1 -; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v4 -; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; CGP-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; CGP-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 -; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10 -; CGP-NEXT: v_trunc_f32_e32 v12, v11 -; CGP-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 -; CGP-NEXT: v_cvt_u32_f32_e32 v13, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] -; CGP-NEXT: v_mul_hi_u32 v17, v13, v10 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] -; CGP-NEXT: v_mul_lo_u32 v12, v16, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v18, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v19, v16, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; CGP-NEXT: v_mul_hi_u32 v17, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v18, v12 +; CGP-NEXT: v_sub_i32_e32 v15, vcc, 0, v13 +; CGP-NEXT: v_subb_u32_e32 v16, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v12, v10 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v17, v12 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0 +; CGP-NEXT: v_mov_b32_e32 v1, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v1, v17, v10 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12] +; CGP-NEXT: v_mul_hi_u32 v12, v14, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v17, v10 +; CGP-NEXT: v_mul_lo_u32 v18, v14, v11 +; CGP-NEXT: v_mul_lo_u32 v19, v17, v11 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v18 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v14, v11 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v19, v10 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v17 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v10 -; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] -; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v8, v14 -; CGP-NEXT: v_mul_lo_u32 v8, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 -; CGP-NEXT: v_xor_b32_e32 v17, v9, v14 -; CGP-NEXT: v_mul_hi_u32 v9, v13, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v16, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; CGP-NEXT: v_mul_hi_u32 v15, v13, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 -; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v16, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v17, v8 -; CGP-NEXT: v_mul_lo_u32 v11, v12, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v12, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v17, v8 -; CGP-NEXT: v_mul_hi_u32 v15, v17, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v17, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v12, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v8, v10 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v13, 0 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v17, v11 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v10, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v10 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v1 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, v17, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0 +; CGP-NEXT: v_mov_b32_e32 v1, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[1:2] +; CGP-NEXT: v_ashrrev_i32_e32 v15, 31, v9 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v8, v15 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v15, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v1, v15 +; CGP-NEXT: v_mul_lo_u32 v1, v17, v10 +; CGP-NEXT: v_mul_lo_u32 v9, v14, v11 +; CGP-NEXT: v_xor_b32_e32 v16, v8, v15 +; CGP-NEXT: v_mul_hi_u32 v8, v14, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v17, v11 +; CGP-NEXT: v_mul_hi_u32 v8, v17, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v14, v11 +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v10, vcc +; CGP-NEXT: v_mul_hi_u32 v10, v17, v11 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v17, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v16, v1 +; CGP-NEXT: v_mul_lo_u32 v10, v12, v8 +; CGP-NEXT: v_mul_hi_u32 v11, v12, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v1 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v16, v8 +; CGP-NEXT: v_mul_hi_u32 v11, v12, v8 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v11, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v1, v10, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v10, v16, v8 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, 0 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v10, v1 +; CGP-NEXT: v_mov_b32_e32 v1, v9 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v14, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v12, v8 ; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[9:10] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v12, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v13, v[9:10] -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v17, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v17, v9 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1 -; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v16, v9, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v16, v9 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v1 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v13 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v13 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v4 ; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, v12, v15, s[4:5] -; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v13 -; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v12, s[4:5] +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v11 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v14, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v4 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v4, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v12 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v13 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v10 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v15, v8, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v8, v14, v0 -; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v8, v15, v0 +; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v1, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v4, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 @@ -2304,133 +2080,116 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: .LBB8_7: ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v10, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v4, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v3, v6, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v10, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v11, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v14, v10 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v6, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7] -; CGP-NEXT: v_mul_lo_u32 v6, v14, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_mul_hi_u32 v10, v11, v8 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v10, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v10 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v10, vcc +; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CGP-NEXT: v_trunc_f32_e32 v8, v4 +; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v8 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9] +; CGP-NEXT: v_mul_hi_u32 v9, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v11, v8 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 ; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 -; CGP-NEXT: v_mul_lo_u32 v15, v11, v9 -; CGP-NEXT: v_mul_lo_u32 v16, v14, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v16, v8 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10 -; CGP-NEXT: v_mul_hi_u32 v9, v14, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v6 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v8, vcc -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v6, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7] +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v3 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5] ; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v7, v12, vcc -; CGP-NEXT: v_xor_b32_e32 v10, v5, v12 -; CGP-NEXT: v_mul_lo_u32 v5, v14, v8 -; CGP-NEXT: v_mul_lo_u32 v7, v11, v9 -; CGP-NEXT: v_xor_b32_e32 v13, v6, v12 -; CGP-NEXT: v_mul_hi_u32 v6, v11, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v12 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v4, v12 +; CGP-NEXT: v_mul_lo_u32 v4, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v9, v11, v8 +; CGP-NEXT: v_xor_b32_e32 v13, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v14, v8 +; CGP-NEXT: v_mul_hi_u32 v9, v11, v8 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v6, v14, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v7, v11, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_mul_hi_u32 v8, v14, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v14, v6, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 -; CGP-NEXT: v_mul_lo_u32 v8, v10, v6 -; CGP-NEXT: v_mul_hi_u32 v9, v10, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 -; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_mul_hi_u32 v8, v10, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v14, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v8, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, 0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v7 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v8, v[6:7] -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[6:7] -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v13, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v9, vcc +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v3, v8, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v13, v4 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v5 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v9, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v7, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v8, v[4:5] +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v13, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v13, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[4:5] -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v8 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v9, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v11, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10 +; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v11, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v7, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v5, v12, v2 -; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 @@ -2536,235 +2295,204 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 ; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], 0, 0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v1 ; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v9, v7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v9 ; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, v8 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v13, v7 ; GISEL-NEXT: v_mul_hi_u32 v14, v10, v7 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 ; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v14, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v7 ; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v4 ; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v7, vcc ; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, v8 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v13, v7 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, 0, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] ; GISEL-NEXT: v_mul_hi_u32 v0, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 ; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 -; GISEL-NEXT: v_and_b32_e32 v12, 0xffffff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v4, v0, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v13, v8 +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v7 ; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v13, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v7 ; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_and_b32_e32 v10, 0xffffff, v2 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v7 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v7 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v4, 0 +; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[5:6] -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9] -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v11, v7 -; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], v3, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[8:9] ; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v2 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 -; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v11, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v9, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v6, v1 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc -; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v5, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v4 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, 0, v2 -; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v11, 0 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v3, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v13, v[5:6] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v10 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v11, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v4, v[8:9] +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v11, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v2 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v8 +; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v1 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v7, s[4:5] +; GISEL-NEXT: v_trunc_f32_e32 v7, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v5 +; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], 0, v2 +; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v7 +; GISEL-NEXT: v_subb_u32_e32 v8, vcc, v8, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v9, v1 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7] +; GISEL-NEXT: v_mul_lo_u32 v7, v15, v5 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v6 +; GISEL-NEXT: v_mul_hi_u32 v20, v12, v5 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v0, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v18 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v7, v15, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 +; GISEL-NEXT: v_mul_hi_u32 v20, v12, v6 +; GISEL-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v5, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v18, vcc +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v5 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v13, v4 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v18, v1, vcc -; GISEL-NEXT: v_mul_hi_u32 v1, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v13, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v13, v4 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v19, v1, vcc +; GISEL-NEXT: v_mov_b32_e32 v1, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[1:2] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v16 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v17, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v15, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, v12, v6 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 0, v10 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v15, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v6 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v5, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v5, v13, v5 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v1 -; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v13, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v8, 0 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v16 -; GISEL-NEXT: v_mov_b32_e32 v1, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v11, v[1:2] -; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v17, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v8, v[5:6] -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v13, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v9, v17, v18, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_mul_lo_u32 v6, v11, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v8, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], 0, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 -; GISEL-NEXT: v_mul_hi_u32 v7, v8, v5 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v12, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v12, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 -; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v11, v5, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v10, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v9, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v10, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v3, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v7, v10, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v9, v3, v5 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v15, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v6 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v10, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v8, v4, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v6 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], v4, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], v4, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v5, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v6, v9 ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v9, v[0:1] ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v7, vcc ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 ; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v6, v8, v10, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v8 ; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc @@ -2772,13 +2500,13 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v7 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v10, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index b666f45521661..73157c28ca82b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -147,23 +147,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_ashr_i32 s2, s9, 31 -; GFX8-NEXT: s_ashr_i32 s12, s11, 31 -; GFX8-NEXT: s_add_u32 s0, s8, s2 -; GFX8-NEXT: s_addc_u32 s1, s9, s2 -; GFX8-NEXT: s_add_u32 s8, s10, s12 -; GFX8-NEXT: s_mov_b32 s13, s12 -; GFX8-NEXT: s_addc_u32 s9, s11, s12 -; GFX8-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] +; GFX8-NEXT: s_ashr_i32 s12, s9, 31 +; GFX8-NEXT: s_ashr_i32 s14, s11, 31 +; GFX8-NEXT: s_add_u32 s0, s8, s12 +; GFX8-NEXT: s_addc_u32 s1, s9, s12 +; GFX8-NEXT: s_add_u32 s2, s10, s14 +; GFX8-NEXT: s_mov_b32 s15, s14 +; GFX8-NEXT: s_addc_u32 s3, s11, s14 +; GFX8-NEXT: s_xor_b64 s[8:9], s[2:3], s[14:15] ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX8-NEXT: s_mov_b32 s3, s2 -; GFX8-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] +; GFX8-NEXT: s_mov_b32 s13, s12 +; GFX8-NEXT: s_xor_b64 s[10:11], s[0:1], s[12:13] ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s14, 0, s8 -; GFX8-NEXT: s_subb_u32 s15, 0, s9 +; GFX8-NEXT: s_sub_u32 s18, 0, s8 +; GFX8-NEXT: s_subb_u32 s19, 0, s9 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX8-NEXT: v_trunc_f32_e32 v2, v1 @@ -171,10 +171,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -182,44 +182,34 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[16:17], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -227,28 +217,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 ; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s11, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s11, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s10, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v6, s11 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s10, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, s11, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s10, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v5, s9 ; GFX8-NEXT: v_subb_u32_e64 v2, s[0:1], v6, v1, vcc ; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s11, v1 @@ -261,8 +246,8 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v0 ; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v3 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 @@ -280,20 +265,20 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1] -; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] -; GFX8-NEXT: v_xor_b32_e32 v0, s0, v4 -; GFX8-NEXT: v_xor_b32_e32 v1, s1, v3 +; GFX8-NEXT: s_xor_b64 s[0:1], s[12:13], s[14:15] +; GFX8-NEXT: v_xor_b32_e32 v0, s0, v3 +; GFX8-NEXT: v_xor_b32_e32 v1, s1, v4 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc -; GFX8-NEXT: v_xor_b32_e32 v3, s2, v5 -; GFX8-NEXT: v_xor_b32_e32 v4, s2, v2 -; GFX8-NEXT: v_mov_b32_e32 v5, s2 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s2, v3 +; GFX8-NEXT: v_xor_b32_e32 v3, s12, v5 +; GFX8-NEXT: v_xor_b32_e32 v4, s12, v2 +; GFX8-NEXT: v_mov_b32_e32 v5, s12 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s12, v3 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v4, v5, vcc ; GFX8-NEXT: v_mov_b32_e32 v4, s4 ; GFX8-NEXT: v_mov_b32_e32 v5, s5 @@ -307,23 +292,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i32 s2, s9, 31 -; GFX9-NEXT: s_ashr_i32 s12, s11, 31 -; GFX9-NEXT: s_add_u32 s0, s8, s2 -; GFX9-NEXT: s_addc_u32 s1, s9, s2 -; GFX9-NEXT: s_add_u32 s8, s10, s12 -; GFX9-NEXT: s_mov_b32 s13, s12 -; GFX9-NEXT: s_addc_u32 s9, s11, s12 -; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] +; GFX9-NEXT: s_ashr_i32 s12, s9, 31 +; GFX9-NEXT: s_ashr_i32 s14, s11, 31 +; GFX9-NEXT: s_add_u32 s0, s8, s12 +; GFX9-NEXT: s_addc_u32 s1, s9, s12 +; GFX9-NEXT: s_add_u32 s2, s10, s14 +; GFX9-NEXT: s_mov_b32 s15, s14 +; GFX9-NEXT: s_addc_u32 s3, s11, s14 +; GFX9-NEXT: s_xor_b64 s[8:9], s[2:3], s[14:15] ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX9-NEXT: s_mov_b32 s3, s2 -; GFX9-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] +; GFX9-NEXT: s_mov_b32 s13, s12 +; GFX9-NEXT: s_xor_b64 s[10:11], s[0:1], s[12:13] ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_u32 s14, 0, s8 -; GFX9-NEXT: s_subb_u32 s15, 0, s9 +; GFX9-NEXT: s_sub_u32 s18, 0, s8 +; GFX9-NEXT: s_subb_u32 s19, 0, s9 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v2, v1 @@ -331,10 +316,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -342,92 +327,79 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[16:17], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, s9 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s10, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX9-NEXT: v_mul_hi_u32 v6, s11, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s11, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s10, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] ; GFX9-NEXT: v_mov_b32_e32 v6, s11 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s10, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v4, s11, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s10, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2] -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v5, s9 ; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 ; GFX9-NEXT: v_sub_u32_e32 v1, s11, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[0:1] +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[0:1] ; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s8, v0 ; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s8, v8 +; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s8, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 ; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc @@ -435,26 +407,27 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1] -; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] -; GFX9-NEXT: v_xor_b32_e32 v0, s0, v5 -; GFX9-NEXT: v_xor_b32_e32 v1, s1, v3 +; GFX9-NEXT: s_xor_b64 s[0:1], s[12:13], s[14:15] +; GFX9-NEXT: v_xor_b32_e32 v0, s0, v3 +; GFX9-NEXT: v_xor_b32_e32 v1, s1, v4 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: v_xor_b32_e32 v3, s2, v6 -; GFX9-NEXT: v_xor_b32_e32 v5, s2, v2 -; GFX9-NEXT: v_mov_b32_e32 v6, s2 -; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v3 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v6, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] +; GFX9-NEXT: v_xor_b32_e32 v3, s12, v5 +; GFX9-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-NEXT: v_xor_b32_e32 v4, s12, v2 +; GFX9-NEXT: v_mov_b32_e32 v5, s12 +; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s12, v3 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v5, vcc +; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] +; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: sdivrem_i64: @@ -470,10 +443,11 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: s_addc_u32 s9, s11, s12 ; GFX10-NEXT: s_mov_b32 s3, s2 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] -; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s9 ; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX10-NEXT: s_sub_u32 s10, 0, s8 +; GFX10-NEXT: s_sub_u32 s14, 0, s8 +; GFX10-NEXT: s_subb_u32 s15, 0, s9 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -484,85 +458,72 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s11, s10, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s11, s10, v4, v[1:2] -; GFX10-NEXT: s_subb_u32 s11, 0, s9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s14, s11, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s14, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s15, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s14, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v6, s14, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v0, s14, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v2, s14, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s14 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s14, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s14 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s16, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s10, v4, v[1:2] -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s11, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s14, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s15, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v6, s10, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v2, s10, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s14, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, s1, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_mul_lo_u32 v2, s11, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo -; GFX10-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, s1, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX10-NEXT: v_mul_lo_u32 v5, s1, v1 -; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v0, s10, v5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 -; GFX10-NEXT: v_add_co_u32 v0, s10, v0, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v5, s10, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, s1, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v5, 0 -; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s8, v3, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s9, v5, v[1:2] -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v6, s1, v1 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo +; GFX10-NEXT: v_mul_hi_u32 v4, s10, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, s11, v0 +; GFX10-NEXT: v_mul_lo_u32 v3, s10, v1 +; GFX10-NEXT: v_mul_lo_u32 v5, s11, v1 +; GFX10-NEXT: v_mul_hi_u32 v6, s10, v1 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v3 +; GFX10-NEXT: v_add_co_u32 v0, s0, v5, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v6, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s14, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_mul_hi_u32 v2, s11, v1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s8, v4, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v2, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s8, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s9, v4, v[1:2] +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v4, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s10, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v6, s11, v1 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s11, v1, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s9, v6, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s8, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo @@ -576,7 +537,7 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 ; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v5, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v1 @@ -586,12 +547,12 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v6, s0, 0, v6, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v7 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v7, v8, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo ; GFX10-NEXT: s_xor_b64 s[8:9], s[2:3], s[12:13] -; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 @@ -1272,25 +1233,25 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-LABEL: sdivrem_v2i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_ashr_i32 s4, s13, 31 -; GFX8-NEXT: s_ashr_i32 s6, s1, 31 -; GFX8-NEXT: s_add_u32 s16, s12, s4 -; GFX8-NEXT: s_addc_u32 s17, s13, s4 -; GFX8-NEXT: s_add_u32 s0, s0, s6 -; GFX8-NEXT: s_mov_b32 s7, s6 -; GFX8-NEXT: s_addc_u32 s1, s1, s6 -; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] +; GFX8-NEXT: s_ashr_i32 s16, s13, 31 +; GFX8-NEXT: s_ashr_i32 s18, s5, 31 +; GFX8-NEXT: s_add_u32 s0, s12, s16 +; GFX8-NEXT: s_addc_u32 s1, s13, s16 +; GFX8-NEXT: s_add_u32 s2, s4, s18 +; GFX8-NEXT: s_mov_b32 s19, s18 +; GFX8-NEXT: s_addc_u32 s3, s5, s18 +; GFX8-NEXT: s_xor_b64 s[12:13], s[2:3], s[18:19] ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX8-NEXT: s_mov_b32 s5, s4 -; GFX8-NEXT: s_xor_b64 s[16:17], s[16:17], s[4:5] +; GFX8-NEXT: s_mov_b32 s17, s16 +; GFX8-NEXT: s_xor_b64 s[4:5], s[0:1], s[16:17] ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s18, 0, s12 -; GFX8-NEXT: s_subb_u32 s19, 0, s13 +; GFX8-NEXT: s_sub_u32 s22, 0, s12 +; GFX8-NEXT: s_subb_u32 s23, 0, s13 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX8-NEXT: v_trunc_f32_e32 v2, v1 @@ -1298,10 +1259,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1309,243 +1270,213 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[20:21], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX8-NEXT: s_xor_b64 s[20:21], s[16:17], s[18:19] +; GFX8-NEXT: s_ashr_i32 s18, s7, 31 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX8-NEXT: s_mov_b32 s19, s18 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX8-NEXT: s_xor_b64 s[18:19], s[4:5], s[6:7] -; GFX8-NEXT: s_ashr_i32 s6, s15, 31 -; GFX8-NEXT: s_mov_b32 s7, s6 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s17, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s16, v1 -; GFX8-NEXT: v_mul_hi_u32 v4, s16, v0 -; GFX8-NEXT: v_mul_hi_u32 v0, s17, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s17, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, s5, v0 +; GFX8-NEXT: v_mul_lo_u32 v3, s4, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX8-NEXT: v_mov_b32_e32 v6, s5 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s17, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s16, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v4, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v3, v[1:2] -; GFX8-NEXT: v_mov_b32_e32 v6, s17 -; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s16, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v4, v[1:2] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s5, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s4, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, s5, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2] +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s4, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v5, s13 -; GFX8-NEXT: s_ashr_i32 s16, s3, 31 +; GFX8-NEXT: s_ashr_i32 s4, s15, 31 ; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc -; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s17, v1 +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s5, v1 ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v6 ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v7 ; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v6 -; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s12, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[0:1] -; GFX8-NEXT: v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc -; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], 1, v4 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v8 +; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, s12, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v8, v1, v2, s[0:1] +; GFX8-NEXT: v_subbrev_u32_e64 v10, s[0:1], 0, v0, vcc +; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], 1, v3 +; GFX8-NEXT: v_addc_u32_e64 v11, s[0:1], 0, v4, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v10 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] -; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v1 -; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] -; GFX8-NEXT: s_add_u32 s0, s14, s6 -; GFX8-NEXT: s_addc_u32 s1, s15, s6 -; GFX8-NEXT: s_add_u32 s2, s2, s16 -; GFX8-NEXT: s_mov_b32 s17, s16 -; GFX8-NEXT: s_addc_u32 s3, s3, s16 -; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[16:17] -; GFX8-NEXT: v_cvt_f32_u32_e32 v14, s3 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v10 +; GFX8-NEXT: v_cndmask_b32_e64 v12, v1, v12, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v2 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v11, s[0:1] +; GFX8-NEXT: s_add_u32 s0, s14, s4 +; GFX8-NEXT: s_addc_u32 s1, s15, s4 +; GFX8-NEXT: s_add_u32 s2, s6, s18 +; GFX8-NEXT: s_addc_u32 s3, s7, s18 +; GFX8-NEXT: s_xor_b64 s[6:7], s[2:3], s[18:19] +; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s7 +; GFX8-NEXT: v_cvt_f32_u32_e32 v15, s6 ; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s2 -; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, s12, v8 -; GFX8-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v0, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v14 -; GFX8-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v5, v1, v12, vcc -; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] -; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; GFX8-NEXT: v_add_f32_e32 v1, v1, v15 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s12, v9 +; GFX8-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v0, vcc +; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX8-NEXT: v_trunc_f32_e32 v11, v1 -; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v11 +; GFX8-NEXT: v_trunc_f32_e32 v16, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v16 ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v12, v0 -; GFX8-NEXT: s_sub_u32 s5, 0, s2 -; GFX8-NEXT: s_subb_u32 s20, 0, s3 -; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v12, 0 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1] -; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v10, v3, v10, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v15, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[14:15], s5, v5, v[1:2] -; GFX8-NEXT: v_mul_lo_u32 v3, v5, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[14:15], s20, v12, v[1:2] -; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v16, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1] -; GFX8-NEXT: v_mul_lo_u32 v8, v12, v1 -; GFX8-NEXT: v_mul_hi_u32 v2, v12, v0 -; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0 -; GFX8-NEXT: v_xor_b32_e32 v9, s19, v10 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v3, v5, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GFX8-NEXT: v_mul_hi_u32 v8, v12, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 -; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_cvt_u32_f32_e32 v17, v0 +; GFX8-NEXT: s_mov_b32 s5, s4 +; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[4:5] +; GFX8-NEXT: s_sub_u32 s14, 0, s6 +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v17, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v12, v2, v13, vcc +; GFX8-NEXT: v_cvt_u32_f32_e32 v13, v16 +; GFX8-NEXT: s_subb_u32 s15, 0, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v15, vcc +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v13, v[1:2] +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v8, v3, v12, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s15, v17, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v13, v0 +; GFX8-NEXT: v_mul_hi_u32 v10, v17, v0 +; GFX8-NEXT: v_mul_lo_u32 v3, v17, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc +; GFX8-NEXT: v_mul_hi_u32 v0, v13, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v10, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v13, v1 +; GFX8-NEXT: v_mul_hi_u32 v10, v17, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[2:3], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_mul_hi_u32 v1, v13, v1 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v10, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v12, v0 -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v8, 0 -; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc -; GFX8-NEXT: v_xor_b32_e32 v1, s18, v4 +; GFX8-NEXT: v_add_u32_e32 v10, vcc, v17, v0 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[2:3], s14, v10, 0 +; GFX8-NEXT: v_addc_u32_e32 v11, vcc, v13, v1, vcc ; GFX8-NEXT: v_mov_b32_e32 v0, v3 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v5, v[0:1] -; GFX8-NEXT: v_mov_b32_e32 v10, s19 -; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s18, v1 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v8, v[3:4] -; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v9, v10, vcc -; GFX8-NEXT: v_xor_b32_e32 v4, s4, v7 -; GFX8-NEXT: v_mul_lo_u32 v7, v5, v2 -; GFX8-NEXT: v_mul_lo_u32 v9, v8, v3 -; GFX8-NEXT: v_mul_hi_u32 v11, v8, v2 -; GFX8-NEXT: v_mul_hi_u32 v2, v5, v2 -; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v11, v5, v3 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v9, v8, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v11, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9 -; GFX8-NEXT: v_mul_hi_u32 v3, v5, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc -; GFX8-NEXT: v_mov_b32_e32 v10, s4 -; GFX8-NEXT: v_mul_lo_u32 v7, s13, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, s12, v3 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 -; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v10, vcc -; GFX8-NEXT: v_mul_hi_u32 v6, s12, v2 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, s13, v3 +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v11, v[0:1] +; GFX8-NEXT: v_xor_b32_e32 v7, s20, v8 +; GFX8-NEXT: v_xor_b32_e32 v8, s21, v4 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s15, v10, v[0:1] +; GFX8-NEXT: v_mov_b32_e32 v9, s21 +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s20, v7 +; GFX8-NEXT: v_xor_b32_e32 v4, s16, v5 +; GFX8-NEXT: v_mul_lo_u32 v5, v11, v2 +; GFX8-NEXT: v_mul_lo_u32 v7, v10, v3 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v8, v9, vcc +; GFX8-NEXT: v_mul_hi_u32 v8, v10, v2 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; GFX8-NEXT: v_mul_hi_u32 v2, v11, v2 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, v11, v3 +; GFX8-NEXT: v_mul_hi_u32 v8, v10, v3 +; GFX8-NEXT: v_addc_u32_e64 v7, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_mul_hi_u32 v3, v11, v3 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], v2, v8, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, s13, v2 +; GFX8-NEXT: v_mul_lo_u32 v7, s12, v3 +; GFX8-NEXT: v_mul_hi_u32 v9, s12, v2 ; GFX8-NEXT: v_mul_hi_u32 v2, s13, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: v_mul_hi_u32 v8, s12, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v2, v6 -; GFX8-NEXT: v_mul_hi_u32 v9, s13, v3 -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v8, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v6 -; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v9, v[3:4] +; GFX8-NEXT: v_mul_hi_u32 v10, s13, v3 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, s13, v3 +; GFX8-NEXT: v_mul_hi_u32 v9, s12, v3 +; GFX8-NEXT: v_addc_u32_e64 v7, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], v2, v9, vcc +; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v2, v7, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s6, v9, 0 +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc +; GFX8-NEXT: v_xor_b32_e32 v6, s16, v6 +; GFX8-NEXT: v_mov_b32_e32 v8, s16 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s16, v4 +; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v8, vcc +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v10, v7 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s6, v8, v[3:4] ; GFX8-NEXT: v_mov_b32_e32 v10, s13 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s12, v2 -; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v8, v[6:7] -; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s7, v9, v[6:7] +; GFX8-NEXT: v_mov_b32_e32 v3, s7 ; GFX8-NEXT: v_subb_u32_e64 v7, s[0:1], v10, v6, vcc ; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s13, v6 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v7 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v2 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v7 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v7 ; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s2, v2 +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s6, v2 ; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc -; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v8 -; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v9, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v12 +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v9 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v8, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v11 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v11 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v12 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s2, v11 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v12 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v11 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v16, s[0:1] ; GFX8-NEXT: v_add_u32_e64 v16, s[0:1], 1, v13 ; GFX8-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc @@ -1556,20 +1487,20 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v11, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v13, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v13, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v6, v2, v6, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] -; GFX8-NEXT: s_xor_b64 s[0:1], s[6:7], s[16:17] -; GFX8-NEXT: v_xor_b32_e32 v2, s0, v8 -; GFX8-NEXT: v_xor_b32_e32 v3, s1, v9 +; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[18:19] +; GFX8-NEXT: v_xor_b32_e32 v2, s0, v9 +; GFX8-NEXT: v_xor_b32_e32 v3, s1, v8 ; GFX8-NEXT: v_mov_b32_e32 v8, s1 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v2 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc -; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6 -; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 -; GFX8-NEXT: v_mov_b32_e32 v8, s6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v6 +; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6 +; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7 +; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s4, v6 ; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v7, v8, vcc ; GFX8-NEXT: v_mov_b32_e32 v8, s8 ; GFX8-NEXT: v_mov_b32_e32 v9, s9 @@ -1583,25 +1514,25 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-LABEL: sdivrem_v2i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX9-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x20 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_ashr_i32 s4, s13, 31 -; GFX9-NEXT: s_ashr_i32 s6, s1, 31 -; GFX9-NEXT: s_add_u32 s16, s12, s4 -; GFX9-NEXT: s_addc_u32 s17, s13, s4 -; GFX9-NEXT: s_add_u32 s0, s0, s6 +; GFX9-NEXT: s_ashr_i32 s6, s17, 31 +; GFX9-NEXT: s_add_u32 s0, s12, s4 +; GFX9-NEXT: s_addc_u32 s1, s13, s4 +; GFX9-NEXT: s_add_u32 s2, s16, s6 ; GFX9-NEXT: s_mov_b32 s7, s6 -; GFX9-NEXT: s_addc_u32 s1, s1, s6 -; GFX9-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] +; GFX9-NEXT: s_addc_u32 s3, s17, s6 +; GFX9-NEXT: s_xor_b64 s[12:13], s[2:3], s[6:7] ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s12 ; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_xor_b64 s[16:17], s[16:17], s[4:5] +; GFX9-NEXT: s_xor_b64 s[16:17], s[0:1], s[4:5] ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_u32 s18, 0, s12 -; GFX9-NEXT: s_subb_u32 s19, 0, s13 +; GFX9-NEXT: s_sub_u32 s22, 0, s12 +; GFX9-NEXT: s_subb_u32 s23, 0, s13 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v2, v1 @@ -1609,10 +1540,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1620,239 +1551,214 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[20:21], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, s13 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX9-NEXT: s_xor_b64 s[20:21], s[4:5], s[6:7] +; GFX9-NEXT: s_ashr_i32 s6, s15, 31 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX9-NEXT: s_mov_b32 s7, s6 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX9-NEXT: s_xor_b64 s[18:19], s[4:5], s[6:7] -; GFX9-NEXT: s_ashr_i32 s6, s15, 31 -; GFX9-NEXT: s_mov_b32 s7, s6 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s17, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s16, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s16, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s17, v0 -; GFX9-NEXT: v_mul_hi_u32 v6, s17, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s17, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s16, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v5, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v4, v3, v0, v6 -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s12, v4, v[0:1] ; GFX9-NEXT: v_mov_b32_e32 v6, s17 -; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, s16, v1 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s13, v5, v[2:3] -; GFX9-NEXT: s_ashr_i32 s16, s3, 31 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v2, vcc -; GFX9-NEXT: v_sub_u32_e32 v1, s17, v2 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s17, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s16, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v3, s17, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v4, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v5, v3, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v5, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s16, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v4, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v3, s13 +; GFX9-NEXT: s_ashr_i32 s16, s19, 31 +; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc +; GFX9-NEXT: v_sub_u32_e32 v1, s17, v1 ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v7 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v6 -; GFX9-NEXT: v_subrev_co_u32_e32 v10, vcc, s12, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v3, s[0:1] -; GFX9-NEXT: v_subbrev_co_u32_e64 v11, s[0:1], 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e64 v3, s[0:1], 1, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v12, s[0:1], 0, v4, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11 +; GFX9-NEXT: v_subrev_co_u32_e32 v9, vcc, s12, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v2, v8, s[0:1] +; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[0:1], 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e64 v11, s[0:1], 1, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v12, s[0:1], 0, v5, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v10 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v10 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v2, v13, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v3 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v11 ; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v12, s[0:1] ; GFX9-NEXT: s_add_u32 s0, s14, s6 ; GFX9-NEXT: s_addc_u32 s1, s15, s6 -; GFX9-NEXT: s_add_u32 s2, s2, s16 +; GFX9-NEXT: s_add_u32 s2, s18, s16 ; GFX9-NEXT: s_mov_b32 s17, s16 -; GFX9-NEXT: s_addc_u32 s3, s3, s16 -; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[16:17] -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s3 -; GFX9-NEXT: v_cvt_f32_u32_e32 v16, s2 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc +; GFX9-NEXT: s_addc_u32 s3, s19, s16 +; GFX9-NEXT: s_xor_b64 s[14:15], s[2:3], s[16:17] +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s15 +; GFX9-NEXT: v_cvt_f32_u32_e32 v16, s14 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 ; GFX9-NEXT: v_add_f32_e32 v2, v2, v16 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s12, v10 -; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v1, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v16, vcc, s12, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v17, vcc, 0, v1, vcc ; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 -; GFX9-NEXT: v_trunc_f32_e32 v17, v2 -; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v17 +; GFX9-NEXT: v_trunc_f32_e32 v3, v2 +; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v3 ; GFX9-NEXT: v_add_f32_e32 v1, v2, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v18, v1 ; GFX9-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] -; GFX9-NEXT: s_sub_u32 s5, 0, s2 +; GFX9-NEXT: s_sub_u32 s5, 0, s14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v18, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v13, v3, v14, vcc -; GFX9-NEXT: v_cvt_u32_f32_e32 v14, v17 -; GFX9-NEXT: s_subb_u32 s20, 0, s3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v3 +; GFX9-NEXT: s_subb_u32 s18, 0, s15 +; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v14, v[2:3] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, v4, v12, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[14:15], s20, v18, v[2:3] -; GFX9-NEXT: v_mul_lo_u32 v3, v14, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v16, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v18, v2 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v13, v[2:3] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v4, v11, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[2:3], s18, v18, v[2:3] +; GFX9-NEXT: v_mul_lo_u32 v3, v13, v1 ; GFX9-NEXT: v_mul_hi_u32 v11, v18, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, v14, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v4, v18, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v16, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v17, vcc +; GFX9-NEXT: v_mul_hi_u32 v1, v13, v1 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v11, v14, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v18, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v14, v2 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v11, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 -; GFX9-NEXT: v_add_u32_e32 v4, v11, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v11, vcc +; GFX9-NEXT: v_mul_lo_u32 v3, v13, v2 +; GFX9-NEXT: v_mul_hi_u32 v11, v18, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[2:3], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v1, s[2:3], v1, v11, vcc +; GFX9-NEXT: v_mul_hi_u32 v2, v13, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v18, v1 -; GFX9-NEXT: v_add3_u32 v2, v4, v3, v2 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[14:15], s5, v11, 0 -; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v14, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[2:3], s5, v11, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v13, v2, vcc ; GFX9-NEXT: v_mov_b32_e32 v1, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[0:1] ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v12, v[1:2] -; GFX9-NEXT: v_xor_b32_e32 v8, s18, v5 -; GFX9-NEXT: v_xor_b32_e32 v9, s19, v9 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s20, v11, v[1:2] -; GFX9-NEXT: v_mov_b32_e32 v10, s19 -; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s18, v8 +; GFX9-NEXT: v_xor_b32_e32 v9, s21, v5 +; GFX9-NEXT: v_xor_b32_e32 v8, s20, v8 +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s18, v11, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v10, s21 +; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s20, v8 ; GFX9-NEXT: v_xor_b32_e32 v5, s4, v7 ; GFX9-NEXT: v_mul_lo_u32 v7, v12, v3 ; GFX9-NEXT: v_mul_lo_u32 v8, v11, v4 ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v9, v10, vcc ; GFX9-NEXT: v_mul_hi_u32 v9, v11, v3 ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v9, v12, v4 ; GFX9-NEXT: v_mul_hi_u32 v3, v12, v3 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v11, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v9, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, v12, v4 +; GFX9-NEXT: v_mul_hi_u32 v9, v11, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v7, v3 ; GFX9-NEXT: v_mul_hi_u32 v4, v12, v4 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v9, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 -; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v4, v8, v7, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v3, v9, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v7 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v11, v3 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v12, v4, vcc ; GFX9-NEXT: v_mul_lo_u32 v7, s13, v3 ; GFX9-NEXT: v_mul_lo_u32 v8, s12, v4 ; GFX9-NEXT: v_mul_hi_u32 v10, s12, v3 ; GFX9-NEXT: v_mul_hi_u32 v3, s13, v3 -; GFX9-NEXT: v_mul_hi_u32 v12, s13, v4 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v10, s13, v4 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, s12, v4 ; GFX9-NEXT: v_xor_b32_e32 v6, s4, v6 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v10, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v3, v7 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v11, 0 +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v10, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, s13, v4 +; GFX9-NEXT: v_mul_hi_u32 v10, s12, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v7, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v3, v10, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v3, v8, vcc +; GFX9-NEXT: v_mul_hi_u32 v8, s13, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v10, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc ; GFX9-NEXT: v_mov_b32_e32 v9, s4 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s4, v5 -; GFX9-NEXT: v_add_u32_e32 v8, v10, v8 ; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc -; GFX9-NEXT: v_add3_u32 v9, v8, v7, v12 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s2, v9, v[4:5] -; GFX9-NEXT: v_mov_b32_e32 v10, s13 +; GFX9-NEXT: v_add_u32_e32 v9, v8, v7 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s14, v9, v[4:5] +; GFX9-NEXT: v_mov_b32_e32 v11, s13 ; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s12, v3 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s3, v11, v[7:8] -; GFX9-NEXT: v_mov_b32_e32 v4, s3 -; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v10, v7, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v8 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s15, v10, v[7:8] +; GFX9-NEXT: v_mov_b32_e32 v4, s15 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v11, v7, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v8 ; GFX9-NEXT: v_sub_u32_e32 v7, s13, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v8 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v8 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v12, vcc, s2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v12, vcc, s14, v3 ; GFX9-NEXT: v_subbrev_co_u32_e64 v13, s[0:1], 0, v7, vcc -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v11 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v10 ; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v13 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v13 ; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v12 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v12 ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v7, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v13 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s2, v12 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v13 +; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s14, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v17, s[0:1], 1, v14 ; GFX9-NEXT: v_subbrev_co_u32_e32 v4, vcc, 0, v4, vcc @@ -1860,10 +1766,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v17, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v15, v15, v18, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, v14, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v14, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v3, v7, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v4, s[0:1] @@ -1897,26 +1803,26 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: s_addc_u32 s1, s1, s16 ; GFX10-NEXT: s_mov_b32 s5, s4 ; GFX10-NEXT: s_xor_b64 s[6:7], s[0:1], s[16:17] -; GFX10-NEXT: s_xor_b64 s[0:1], s[12:13], s[4:5] +; GFX10-NEXT: s_xor_b64 s[12:13], s[12:13], s[4:5] ; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s7 -; GFX10-NEXT: s_sub_u32 s21, 0, s6 -; GFX10-NEXT: s_subb_u32 s20, 0, s7 -; GFX10-NEXT: s_ashr_i32 s12, s15, 31 -; GFX10-NEXT: s_xor_b64 s[18:19], s[4:5], s[16:17] -; GFX10-NEXT: s_ashr_i32 s16, s3, 31 -; GFX10-NEXT: s_add_u32 s14, s14, s12 -; GFX10-NEXT: s_addc_u32 s15, s15, s12 +; GFX10-NEXT: s_sub_u32 s25, 0, s6 +; GFX10-NEXT: s_subb_u32 s24, 0, s7 +; GFX10-NEXT: s_xor_b64 s[20:21], s[4:5], s[16:17] +; GFX10-NEXT: s_ashr_i32 s16, s15, 31 +; GFX10-NEXT: s_ashr_i32 s18, s3, 31 +; GFX10-NEXT: s_add_u32 s0, s14, s16 +; GFX10-NEXT: s_addc_u32 s1, s15, s16 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s6 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 -; GFX10-NEXT: s_add_u32 s2, s2, s16 +; GFX10-NEXT: s_add_u32 s2, s2, s18 +; GFX10-NEXT: s_mov_b32 s19, s18 +; GFX10-NEXT: s_addc_u32 s3, s3, s18 ; GFX10-NEXT: s_mov_b32 s17, s16 -; GFX10-NEXT: s_addc_u32 s3, s3, s16 -; GFX10-NEXT: s_mov_b32 s13, s12 -; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[16:17] +; GFX10-NEXT: s_xor_b64 s[14:15], s[2:3], s[18:19] ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 -; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s3 -; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s2 -; GFX10-NEXT: s_xor_b64 s[14:15], s[14:15], s[12:13] +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 +; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s14 +; GFX10-NEXT: s_xor_b64 s[22:23], s[0:1], s[16:17] ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v2 @@ -1927,255 +1833,230 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v1 ; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 ; GFX10-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v2 +; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v2 ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 -; GFX10-NEXT: v_trunc_f32_e32 v6, v4 -; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0 -; GFX10-NEXT: v_mul_f32_e32 v4, 0xcf800000, v6 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s21, v7, 0 -; GFX10-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX10-NEXT: s_sub_u32 s5, 0, s2 +; GFX10-NEXT: v_trunc_f32_e32 v4, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v0 +; GFX10-NEXT: v_mul_f32_e32 v5, 0xcf800000, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s25, v6, 0 +; GFX10-NEXT: v_add_f32_e32 v3, v5, v3 +; GFX10-NEXT: s_sub_u32 s2, 0, s14 +; GFX10-NEXT: s_subb_u32 s3, 0, s15 ; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v3 -; GFX10-NEXT: v_mul_hi_u32 v10, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s22, s5, v8, 0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s22, s21, v9, v[1:2] -; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v6 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s25, v7, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v10, v7, v0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s24, v6, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v1, v3 -; GFX10-NEXT: v_mul_hi_u32 v6, v7, v0 -; GFX10-NEXT: s_subb_u32 s22, 0, s3 +; GFX10-NEXT: v_mul_hi_u32 v3, v6, v0 +; GFX10-NEXT: v_mul_hi_u32 v5, v7, v0 +; GFX10-NEXT: v_mul_lo_u32 v11, v9, v2 ; GFX10-NEXT: v_mul_hi_u32 v12, v8, v2 -; GFX10-NEXT: v_mul_lo_u32 v11, v5, v2 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s23, s20, v7, v[4:5] -; GFX10-NEXT: v_mul_lo_u32 v4, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s5, v5, v[1:2] -; GFX10-NEXT: v_mul_hi_u32 v2, v5, v2 -; GFX10-NEXT: v_mul_lo_u32 v13, v7, v3 -; GFX10-NEXT: v_mul_lo_u32 v14, v9, v3 -; GFX10-NEXT: v_mul_hi_u32 v15, v7, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s22, v8, v[0:1] -; GFX10-NEXT: v_mul_hi_u32 v1, v9, v3 -; GFX10-NEXT: v_add_co_u32 v3, s23, v4, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v10, s23, v14, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s23 -; GFX10-NEXT: v_mul_lo_u32 v14, v8, v0 -; GFX10-NEXT: v_add_co_u32 v3, s23, v3, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v6, s23, v10, v15 -; GFX10-NEXT: v_mul_lo_u32 v15, v5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23 -; GFX10-NEXT: v_mul_hi_u32 v16, v8, v0 -; GFX10-NEXT: v_mul_hi_u32 v17, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v4, v3 -; GFX10-NEXT: v_add_co_u32 v4, s23, v11, v14 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v13, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v2, s23, v15, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v0, s23, v6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v4, s23, v4, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v16 -; GFX10-NEXT: v_add3_u32 v1, v3, v6, v1 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v7, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v10, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s23 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v9, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s21, v6, 0 -; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23 -; GFX10-NEXT: v_mov_b32_e32 v10, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v9, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v13, v6, v4 +; GFX10-NEXT: v_mul_lo_u32 v14, v7, v4 +; GFX10-NEXT: v_mul_hi_u32 v15, v6, v4 +; GFX10-NEXT: v_mul_hi_u32 v2, v9, v2 +; GFX10-NEXT: v_mul_hi_u32 v4, v7, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s3, v8, v[0:1] +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v10, v13 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, s0, v14, v5 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s5, 0, 0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v10, v8, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, v1, v15, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_lo_u32 v13, v9, v0 +; GFX10-NEXT: v_mul_hi_u32 v14, v8, v0 +; GFX10-NEXT: v_mul_hi_u32 v15, v9, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v1, v3, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v11, v10 +; GFX10-NEXT: v_add_co_u32 v2, s0, v13, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v4, v1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v12, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s1, v2, v14, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s5, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v2, v3, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s25, v6, 0 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2 -; GFX10-NEXT: v_mul_hi_u32 v11, v7, v0 -; GFX10-NEXT: v_add3_u32 v3, v4, v3, v17 -; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v3, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s23, s5, v8, 0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s21, s21, v7, v[1:2] +; GFX10-NEXT: v_add_nc_u32_e32 v3, v15, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s25, v7, v[1:2] +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mul_lo_u32 v10, v7, v0 +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s24, v6, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v1, v3 -; GFX10-NEXT: v_mul_lo_u32 v12, v9, v2 -; GFX10-NEXT: v_mul_hi_u32 v13, v8, v2 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s20, s20, v6, v[4:5] -; GFX10-NEXT: v_mul_lo_u32 v4, v7, v0 -; GFX10-NEXT: v_mul_hi_u32 v5, v6, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s5, v9, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v3, v6, v0 +; GFX10-NEXT: v_mul_hi_u32 v5, v7, v0 +; GFX10-NEXT: v_mul_lo_u32 v11, v9, v2 +; GFX10-NEXT: v_mul_hi_u32 v12, v8, v2 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v9, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v13, v6, v4 +; GFX10-NEXT: v_mul_lo_u32 v14, v7, v4 +; GFX10-NEXT: v_mul_hi_u32 v15, v6, v4 +; GFX10-NEXT: v_mul_hi_u32 v4, v7, v4 ; GFX10-NEXT: v_mul_hi_u32 v2, v9, v2 -; GFX10-NEXT: v_mul_lo_u32 v14, v6, v3 -; GFX10-NEXT: v_mul_lo_u32 v15, v7, v3 -; GFX10-NEXT: v_mul_hi_u32 v16, v6, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s22, v8, v[0:1] -; GFX10-NEXT: v_mul_hi_u32 v1, v7, v3 -; GFX10-NEXT: v_add_co_u32 v3, s5, v4, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v11, s5, v15, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v5 -; GFX10-NEXT: v_mul_lo_u32 v15, v8, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v5, s5, v11, v16 -; GFX10-NEXT: v_mul_lo_u32 v16, v9, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s3, v8, v[0:1] +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v10, v13 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, s0, v14, v5 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v10, v8, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, v1, v15, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_lo_u32 v13, v9, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, vcc_lo, v1, v3, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v11, v10 +; GFX10-NEXT: v_mul_hi_u32 v14, v8, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 +; GFX10-NEXT: v_add_co_u32 v2, s0, v13, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v12, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v6, v1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v7, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s1, v2, v14, s0 +; GFX10-NEXT: v_mul_lo_u32 v6, s13, v1 +; GFX10-NEXT: v_mul_lo_u32 v10, s12, v3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_hi_u32 v7, s12, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, s13, v1 +; GFX10-NEXT: v_mul_lo_u32 v11, s13, v3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v2, v4, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_mul_hi_u32 v5, s12, v3 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v10 +; GFX10-NEXT: v_add_co_u32 v1, s0, v11, v1 ; GFX10-NEXT: v_mul_hi_u32 v0, v9, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v14, v11 -; GFX10-NEXT: v_add_co_u32 v11, s5, v12, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v2, s5, v16, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v5, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v11, s5, v11, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v17 -; GFX10-NEXT: v_add3_u32 v1, v4, v5, v1 -; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v6, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s5 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v1, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v6, s1, v3 -; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v14, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10-NEXT: v_mul_lo_u32 v11, s0, v1 -; GFX10-NEXT: v_mul_hi_u32 v7, s0, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s1, v3 -; GFX10-NEXT: v_mul_lo_u32 v12, s1, v1 -; GFX10-NEXT: v_add3_u32 v0, v5, v4, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v7, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, v1, v5, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s1, 0, 0, s1 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v0, v4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, vcc_lo, v1, v5, s0 +; GFX10-NEXT: v_mul_hi_u32 v3, s13, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2 -; GFX10-NEXT: v_mul_hi_u32 v4, s0, v1 -; GFX10-NEXT: v_mul_hi_u32 v5, s1, v1 -; GFX10-NEXT: v_add_co_u32 v1, s5, v6, v11 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v9, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v12, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v7 -; GFX10-NEXT: v_mul_lo_u32 v0, s15, v2 -; GFX10-NEXT: v_mul_lo_u32 v12, s14, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v4 -; GFX10-NEXT: v_mul_hi_u32 v9, s14, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10-NEXT: v_mul_hi_u32 v2, s15, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, s15, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v6, v1 -; GFX10-NEXT: v_add_co_u32 v6, s5, v0, v12 -; GFX10-NEXT: v_mul_hi_u32 v13, s14, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v12, s5, v3, v1 -; GFX10-NEXT: v_add_co_u32 v2, s20, v7, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s5 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s6, v12, 0 -; GFX10-NEXT: v_add_co_u32 v6, s5, v6, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v9, s5, v2, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s20 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 -; GFX10-NEXT: v_add3_u32 v4, v4, v7, v5 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v11, v6 -; GFX10-NEXT: v_mul_hi_u32 v5, s15, v8 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v12, 1 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v2 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s6, v4, v[1:2] -; GFX10-NEXT: v_add_co_u32 v6, s5, v9, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v4, vcc_lo -; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v7, 1 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s7, v12, v[1:2] -; GFX10-NEXT: v_add3_u32 v5, v3, v9, v5 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, s2, v6, 0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v8, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s0, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v9, s1, v1 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v15, s0, s1, v1, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v14, s6 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v16, s0, 0, v9, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v14 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v5, v[0:1] -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v19, v18, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v15 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v17, v20, v17, s0 -; GFX10-NEXT: v_sub_co_u32 v1, s0, v3, s6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v9, v4, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s6, v5, 0 +; GFX10-NEXT: v_mul_lo_u32 v7, s23, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v6 +; GFX10-NEXT: v_mul_lo_u32 v6, s22, v4 +; GFX10-NEXT: v_mul_hi_u32 v9, s22, v2 +; GFX10-NEXT: v_mul_hi_u32 v10, s23, v2 +; GFX10-NEXT: v_mul_lo_u32 v11, s23, v4 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s6, v3, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v12, s22, v4 +; GFX10-NEXT: v_mul_hi_u32 v4, s23, v4 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v7, v6 +; GFX10-NEXT: v_sub_co_u32 v6, s2, s12, v0 +; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v9, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s3, 0, 0, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s7, v5, v[1:2] +; GFX10-NEXT: v_add_co_u32 v2, s0, v11, v10 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s1, v2, v12, s0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, vcc_lo, s13, v1, s2 +; GFX10-NEXT: v_sub_nc_u32_e32 v7, s13, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, v2, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v7, vcc_lo, s7, v7, s2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v11, vcc_lo, v6, s6 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s2, 0, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v13, s2, v5, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s2, 0, v3, s2 +; GFX10-NEXT: v_cmp_eq_u32_e64 s2, s7, v9 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s7, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, 0, 0, s1 +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s7, v12 +; GFX10-NEXT: v_cmp_le_u32_e64 s1, s6, v11 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s0, 0, v1, s0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s14, v17, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, -1, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, -1, s1 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v12 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v16, s0 +; GFX10-NEXT: v_add_co_u32 v16, s0, v13, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v14, s0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2] +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX10-NEXT: v_sub_co_u32 v2, s0, v11, s6 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v7, s0, 0, v7, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v10 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v2, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s15, v17, v[1:2] +; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v18, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v13, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s22, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v10, s1, s23, v1, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s23, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v14, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v10 +; GFX10-NEXT: v_xor_b32_e32 v0, s20, v2 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s15, v1, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v7 +; GFX10-NEXT: v_xor_b32_e32 v2, s21, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, -1, s0 +; GFX10-NEXT: v_xor_b32_e32 v5, s4, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, v7, s14 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v13, s0, 0, v9, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v0, s0, v0, s20 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s21, v2, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s15, v10 +; GFX10-NEXT: v_xor_b32_e32 v2, s4, v6 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s15, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 +; GFX10-NEXT: v_add_co_u32 v14, s0, v17, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, 0, v4, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s15, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v11, s0 +; GFX10-NEXT: v_add_co_u32 v11, s0, v14, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v15, s0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: v_sub_co_u32 v6, s0, v12, s14 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v9, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s3, v6, v[0:1] -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v7, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s14, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v8, s0 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s1, s15, v0, vcc_lo -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s15, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v14, v3, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v8 -; GFX10-NEXT: v_xor_b32_e32 v1, s18, v1 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v0, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 -; GFX10-NEXT: v_xor_b32_e32 v4, s19, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 -; GFX10-NEXT: v_xor_b32_e32 v3, s4, v3 -; GFX10-NEXT: v_xor_b32_e32 v7, s4, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v2, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s0, 0, v11, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v0, s0, v1, s18 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s19, v4, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v8 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v4, v9, v12, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 -; GFX10-NEXT: v_add_co_u32 v15, s0, v6, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v5, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v12, s0 -; GFX10-NEXT: v_add_co_u32 v12, s0, v15, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v16, s0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: v_sub_co_u32 v9, s0, v13, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v11, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v12, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v4 -; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v17, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v13, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v12, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v5, v15, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v4, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v9, s0 -; GFX10-NEXT: s_xor_b64 s[0:1], s[12:13], s[16:17] -; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v3, s4 -; GFX10-NEXT: v_xor_b32_e32 v3, s0, v6 -; GFX10-NEXT: v_xor_b32_e32 v6, s1, v11 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v7, vcc_lo -; GFX10-NEXT: v_xor_b32_e32 v7, s12, v2 -; GFX10-NEXT: v_xor_b32_e32 v8, s12, v8 -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s0 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v6, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v7, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s12, v8, vcc_lo -; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[8:9] -; GFX10-NEXT: global_store_dwordx4 v10, v[4:7], s[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v11, v14, v11, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v12, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v13, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v9, v17, v11, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v11, v4, v14, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s0 +; GFX10-NEXT: s_xor_b64 s[0:1], s[16:17], s[18:19] +; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v2, s4 +; GFX10-NEXT: v_xor_b32_e32 v2, s0, v9 +; GFX10-NEXT: v_xor_b32_e32 v7, s1, v11 +; GFX10-NEXT: v_xor_b32_e32 v9, s16, v3 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v5, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v10, s16, v6 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v2, s0 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v7, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v9, s16 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s16, v10, vcc_lo +; GFX10-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] +; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[10:11] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i64> %x, %y store <2 x i64> %div, ptr addrspace(1) %out0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 83ebc84e1f84a..8aa2238a90d1d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -24,135 +24,120 @@ define i64 @v_srem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v1 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 -; CHECK-NEXT: v_xor_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 -; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v6, v3 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v6 -; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v6 -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4] -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 -; CHECK-NEXT: v_mul_lo_u32 v12, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v13, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v13, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v3, vcc -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4] -; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v9 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v9, vcc -; CHECK-NEXT: v_xor_b32_e32 v5, v3, v9 -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, v8, v6 -; CHECK-NEXT: v_xor_b32_e32 v10, v4, v9 -; CHECK-NEXT: v_mul_hi_u32 v4, v8, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 +; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v12, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v11, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v8, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v7, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_xor_b32_e32 v4, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v1 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v5, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v10, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v10, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v2, v4 -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v7, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v7, v[3:4] -; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v9, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v1 +; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v0, v7, vcc +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[1:2] +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v2, v0 -; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 +; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v9 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v9 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] @@ -229,18 +214,13 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, vcc ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc @@ -252,21 +232,16 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 ; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -274,41 +249,36 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1 ; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0 ; CHECK-NEXT: v_mul_hi_u32 v0, s11, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s11, v1 +; CHECK-NEXT: v_mov_b32_e32 v5, s11 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, s10, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 -; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v4, s10, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, s11, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v5, s11 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v3, s9 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v4, s9 ; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[0:1] +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v5, s[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -394,21 +364,16 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 ; GISEL-NEXT: v_mul_lo_u32 v11, v12, v10 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_mul_hi_u32 v11, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v16, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v15, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v10 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v16, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v4 ; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v9, vcc @@ -424,23 +389,18 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_lo_u32 v13, v12, v10 ; GISEL-NEXT: v_xor_b32_e32 v14, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v12, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v15, v10 +; GISEL-NEXT: v_mul_hi_u32 v1, v15, v9 ; GISEL-NEXT: v_mul_hi_u32 v13, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v15, v1, vcc @@ -448,159 +408,139 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_lo_u32 v10, v11, v1 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v1 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v14, v1 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v10, v11, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v0, v9 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v14, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v11, v1 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v0, v10, vcc +; GISEL-NEXT: v_mul_hi_u32 v10, v14, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v12, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v9, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v11, v0 ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v8, v12, v[9:10] -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v11, v0 -; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v14, v9, vcc +; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v14, v9, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v14, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v12, v1, v9, s[4:5] -; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v0, v8, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v8 +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v0, v8, vcc ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v13, v1, v9, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v0 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v0, vcc ; GISEL-NEXT: v_xor_b32_e32 v6, v1, v0 ; GISEL-NEXT: v_xor_b32_e32 v7, v7, v0 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v6 ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 -; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v9, vcc +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v11, v5 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc ; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v14, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v16, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v5 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, v15, v1, s[4:5] ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v16, v1 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v16 -; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v0 -; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, v7, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v17, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v16 -; GISEL-NEXT: v_subb_u32_e32 v20, vcc, v9, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v18, v16, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v19, v17, v[8:9] -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v20, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v16, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v17, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v14, v17, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v16, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v17, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v8, v16, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v0 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v16, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v13, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v18, v14, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v10, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v19, v13, v[8:9] +; GISEL-NEXT: v_trunc_f32_e32 v17, v1 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v17 +; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v19, s[4:5], 0, v6 +; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, v7, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v18, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v17 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v21, v9, v10, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[1:2] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v14, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v20, v18, v[9:10] +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v15, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v18, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v10 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v8 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 +; GISEL-NEXT: v_mul_hi_u32 v8, v17, v8 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v18, v0 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v17, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v10, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v19, v14, v[1:2] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v20, v10, v[8:9] +; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 ; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0 -; GISEL-NEXT: v_mul_lo_u32 v9, v13, v8 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v8, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v3 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v2 ; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v10, 0 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v13, v[8:9] -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v10, v[8:9] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 @@ -611,24 +551,24 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v2, v6 ; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v7 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64: @@ -646,131 +586,116 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_2 ; CGP-NEXT: ; %bb.1: -; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v1 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 -; CGP-NEXT: v_xor_b32_e32 v1, v2, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v0 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 -; CGP-NEXT: v_trunc_f32_e32 v4, v3 -; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v2 -; CGP-NEXT: v_cvt_u32_f32_e32 v14, v4 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4] -; CGP-NEXT: v_mul_hi_u32 v15, v5, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4] -; CGP-NEXT: v_mul_lo_u32 v4, v14, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v14, v2 -; CGP-NEXT: v_mul_lo_u32 v16, v5, v3 -; CGP-NEXT: v_mul_lo_u32 v17, v14, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v3 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v16, v4 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v17, v2 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v2 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v3, vcc -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4] +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v3, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v3 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v2, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2] +; CGP-NEXT: v_mul_hi_u32 v15, v5, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v16, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v16 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v15, vcc +; CGP-NEXT: v_mul_hi_u32 v15, v5, v1 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v17, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v15, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v15, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2] ; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4] -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v12 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2] +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v12 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v12, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v4, v12 -; CGP-NEXT: v_mul_lo_u32 v4, v14, v2 -; CGP-NEXT: v_mul_lo_u32 v13, v5, v3 -; CGP-NEXT: v_mul_hi_u32 v2, v14, v2 +; CGP-NEXT: v_xor_b32_e32 v11, v2, v12 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v5, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 ; CGP-NEXT: v_xor_b32_e32 v10, v10, v12 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v15, v14, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v5, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v14, v3, vcc -; CGP-NEXT: v_mul_lo_u32 v4, v10, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v11, v3 -; CGP-NEXT: v_mul_hi_u32 v13, v11, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v10, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v14, v1 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v1 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v13, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v14, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v13, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v2, v4 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v13, 0 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v13, v[3:4] -; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v13, v11, v1 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v0, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v13, v10, v1 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v5, v[1:2] +; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v10, v1, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v10, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v2, v0 -; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v3, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v1 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v3 +; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v1 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 ; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v12 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 @@ -815,131 +740,116 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] ; CGP-NEXT: .LBB2_7: -; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v7 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v3 -; CGP-NEXT: v_xor_b32_e32 v3, v4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, v3 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v12, v6 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6] -; CGP-NEXT: v_mul_hi_u32 v13, v7, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6] -; CGP-NEXT: v_mul_lo_u32 v6, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v14, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v12, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v5 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v14, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v5, v12, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v4 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v5, vcc -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6] +; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v5, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v6, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v5 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v6, vcc +; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CGP-NEXT: v_trunc_f32_e32 v4, v3 +; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_mul_hi_u32 v13, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4] +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_hi_u32 v13, v7, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v2 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6] -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v9, v6, v10 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v11, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v4, v12, v4 +; CGP-NEXT: v_xor_b32_e32 v9, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 ; CGP-NEXT: v_xor_b32_e32 v8, v8, v10 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v12, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_mul_hi_u32 v11, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_hi_u32 v5, v12, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v12, v5, vcc -; CGP-NEXT: v_mul_lo_u32 v6, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v5 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 -; CGP-NEXT: v_mul_hi_u32 v12, v8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_mul_hi_u32 v7, v9, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v12, v3 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v3 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v8, v2 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v6 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v11, 0 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6] -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v11, v[5:6] -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v8, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v8, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v8, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v3 +; CGP-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v2, v7, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v8, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v7, 0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4] +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v8, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v8, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 -; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v4, v2 -; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v5, v3, vcc -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v6 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -999,18 +909,13 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2 +; CHECK-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v4, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 ; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc @@ -1025,23 +930,18 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 ; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v0, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v7, v2 ; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc @@ -1051,23 +951,18 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 ; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5] @@ -1110,6 +1005,8 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: s_sub_u32 s8, 0, 0x1000 +; GISEL-NEXT: s_subb_u32 s9, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v7, v5 @@ -1127,18 +1024,13 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0 @@ -1154,23 +1046,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc @@ -1180,104 +1067,86 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9] -; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 -; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0 +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9] ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9] -; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v15, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v13, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v17, v6, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v17 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v7, v8 ; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v0 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], v7, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s8, v13, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v14, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 ; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 ; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v13, v6 ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc @@ -1285,30 +1154,26 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v0, v6, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v8, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 @@ -1323,18 +1188,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_pow2k_denom: @@ -1347,218 +1212,188 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 -; CGP-NEXT: v_mov_b32_e32 v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] -; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11] -; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v5 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v10 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v9 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v9 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc -; CGP-NEXT: v_mov_b32_e32 v4, v14 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_xor_b32_e32 v15, v0, v7 -; CGP-NEXT: v_mul_lo_u32 v0, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v14 -; CGP-NEXT: v_xor_b32_e32 v18, v1, v7 -; CGP-NEXT: v_mul_hi_u32 v1, v16, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v8, v4, vcc +; CGP-NEXT: v_mov_b32_e32 v4, v12 +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v12 +; CGP-NEXT: v_xor_b32_e32 v18, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v5, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v16, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CGP-NEXT: v_mul_hi_u32 v4, v16, v14 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v18, v0 -; CGP-NEXT: v_mul_lo_u32 v14, v15, v1 -; CGP-NEXT: v_mul_hi_u32 v16, v15, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v16, v12 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v17, vcc +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v13, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v18, v0 -; CGP-NEXT: v_mov_b32_e32 v4, 0x1000 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v18, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v15, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; CGP-NEXT: v_mul_hi_u32 v16, v18, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v14, vcc, v15, v0 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v13 -; CGP-NEXT: v_subb_u32_e64 v15, s[4:5], v18, v13, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v1 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v18, v1 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v11, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v11 +; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v18, v11, vcc ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4 -; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; CGP-NEXT: v_sub_i32_e32 v16, vcc, v12, v5 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 ; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc -; CGP-NEXT: v_mov_b32_e32 v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4 +; CGP-NEXT: v_mov_b32_e32 v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v11, -1, v1, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v5 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 -; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v18, vcc -; CGP-NEXT: v_mul_lo_u32 v19, v8, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 +; CGP-NEXT: v_cndmask_b32_e32 v10, -1, v18, vcc +; CGP-NEXT: v_mul_lo_u32 v19, v7, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v5 ; CGP-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v17, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v1, vcc ; CGP-NEXT: v_cndmask_b32_e32 v16, v17, v18, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] -; CGP-NEXT: v_cndmask_b32_e32 v10, v15, v16, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v10, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6] +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v19 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v0 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v10, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v11, v13, v16, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 -; CGP-NEXT: v_mul_lo_u32 v2, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v6 ; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v3, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v6 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CGP-NEXT: v_mul_hi_u32 v6, v8, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v7, v13, v2 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v13, v2 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v0, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v6 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -1594,18 +1429,13 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2 +; CHECK-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v4, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 ; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc @@ -1620,23 +1450,18 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 ; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v0, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v7, v2 ; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc @@ -1646,23 +1471,18 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 ; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5] @@ -1705,6 +1525,8 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: s_sub_u32 s8, 0, 0x12d8fb +; GISEL-NEXT: s_subb_u32 s9, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v7, v5 @@ -1722,18 +1544,13 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0 @@ -1749,23 +1566,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc @@ -1775,104 +1587,86 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9] -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb -; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0 +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9] ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9] -; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v15, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v13, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v17, v6, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v17 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v7, v8 ; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v0 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], v7, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s8, v13, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v14, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 ; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 ; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v13, v6 ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc @@ -1880,30 +1674,26 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v0, v6, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v8, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 @@ -1918,18 +1708,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_oddk_denom: @@ -1942,218 +1732,188 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 -; CGP-NEXT: v_mov_b32_e32 v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] -; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11] -; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v5 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v10 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v9 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v9 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc -; CGP-NEXT: v_mov_b32_e32 v4, v14 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_xor_b32_e32 v15, v0, v7 -; CGP-NEXT: v_mul_lo_u32 v0, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v14 -; CGP-NEXT: v_xor_b32_e32 v18, v1, v7 -; CGP-NEXT: v_mul_hi_u32 v1, v16, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v8, v4, vcc +; CGP-NEXT: v_mov_b32_e32 v4, v12 +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v12 +; CGP-NEXT: v_xor_b32_e32 v18, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v5, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v16, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CGP-NEXT: v_mul_hi_u32 v4, v16, v14 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v18, v0 -; CGP-NEXT: v_mul_lo_u32 v14, v15, v1 -; CGP-NEXT: v_mul_hi_u32 v16, v15, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v16, v12 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v17, vcc +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v13, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v18, v0 -; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v18, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v15, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; CGP-NEXT: v_mul_hi_u32 v16, v18, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v14, vcc, v15, v0 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v13 -; CGP-NEXT: v_subb_u32_e64 v15, s[4:5], v18, v13, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v1 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v18, v1 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v11, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v11 +; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v18, v11, vcc ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4 -; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; CGP-NEXT: v_sub_i32_e32 v16, vcc, v12, v5 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 ; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc -; CGP-NEXT: v_mov_b32_e32 v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4 +; CGP-NEXT: v_mov_b32_e32 v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v11, -1, v1, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v5 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 -; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v18, vcc -; CGP-NEXT: v_mul_lo_u32 v19, v8, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 +; CGP-NEXT: v_cndmask_b32_e32 v10, -1, v18, vcc +; CGP-NEXT: v_mul_lo_u32 v19, v7, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v5 ; CGP-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v17, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v1, vcc ; CGP-NEXT: v_cndmask_b32_e32 v16, v17, v18, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] -; CGP-NEXT: v_cndmask_b32_e32 v10, v15, v16, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v10, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6] +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v19 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v0 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v10, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v11, v13, v16, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 -; CGP-NEXT: v_mul_lo_u32 v2, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v6 ; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v3, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v6 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CGP-NEXT: v_mul_hi_u32 v6, v8, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v7, v13, v2 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v13, v2 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v0, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v6 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -2186,137 +1946,120 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .LBB7_3: -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v1 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 -; CHECK-NEXT: v_xor_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v1 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 -; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v5 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v7, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v7 -; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7 -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mov_b32_e32 v2, v6 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3] -; CHECK-NEXT: v_mul_lo_u32 v2, v11, v5 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v12, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v13, v11, v6 +; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v5 +; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v12, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v11, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v8 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v8, vcc +; CHECK-NEXT: v_xor_b32_e32 v4, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v7, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v1 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v5, vcc -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mov_b32_e32 v2, v6 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3] -; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v9 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v9, vcc -; CHECK-NEXT: v_xor_b32_e32 v7, v2, v9 -; CHECK-NEXT: v_mul_lo_u32 v2, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v4, v8, v6 -; CHECK-NEXT: v_xor_b32_e32 v10, v3, v9 -; CHECK-NEXT: v_mul_hi_u32 v3, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v10, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, v7, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v7, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v10, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_hi_u32 v5, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v2, v4 -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v6, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v6, v[3:4] -; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v2, v0 -; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v9, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v0, v7, vcc +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[1:2] +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v3, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v3, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5 +; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v5 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v9 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v9 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] @@ -2381,21 +2124,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_hi_u32 v10, v16, v10 ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v11 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v17, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v17, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v16, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 ; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v4 ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc @@ -2411,23 +2149,18 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v14, v13, v11 ; GISEL-NEXT: v_xor_b32_e32 v15, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v13, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v16, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v16, v11 +; GISEL-NEXT: v_mul_hi_u32 v1, v16, v10 ; GISEL-NEXT: v_mul_hi_u32 v14, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc @@ -2435,24 +2168,19 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v11, v12, v1 ; GISEL-NEXT: v_mul_hi_u32 v13, v12, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v15, v1 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v12, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v10 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v15, v1 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v1 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v0, v11, vcc +; GISEL-NEXT: v_mul_hi_u32 v11, v15, v1 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v5, v10, v[1:2] ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v7, v13, v[10:11] ; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v12, v0 @@ -2463,7 +2191,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v7 -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v0, v7, vcc +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v0, v7, vcc ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v13, v1, v6, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v0 @@ -2472,64 +2200,58 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_xor_b32_e32 v8, v8, v0 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v6 ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v8 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v11, v5 -; GISEL-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v10, vcc +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v11, v5 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc ; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v16, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v15, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v5 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, v9, v1, s[4:5] ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v9, v1 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v0 -; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, v8, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v17, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v20, v9 -; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v10, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v18, v20, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v14, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[9:10] -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v7, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v14, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v20, v0 -; GISEL-NEXT: v_mul_lo_u32 v10, v17, v9 -; GISEL-NEXT: v_mul_hi_u32 v14, v17, v0 +; GISEL-NEXT: v_trunc_f32_e32 v17, v1 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v17 +; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v19, s[4:5], 0, v6 +; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, v8, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v18, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v17 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v21, v9, v10, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[1:2] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v14, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v20, v18, v[9:10] +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v15, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v18, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v10 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v9 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 +; GISEL-NEXT: v_mul_hi_u32 v9, v17, v9 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v9, v1 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v18, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v20, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v20, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v17, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; GISEL-NEXT: v_mul_hi_u32 v9, v20, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v0 -; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v20, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v14, 0 +; GISEL-NEXT: v_addc_u32_e64 v15, s[4:5], v17, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v14, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v18, v15, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v15, v[1:2] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v14, v[9:10] -; GISEL-NEXT: v_cndmask_b32_e32 v5, v12, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v20, v14, v[9:10] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v11, v2, v7 @@ -2539,21 +2261,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_hi_u32 v3, v14, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v15, v9 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v15, v9 ; GISEL-NEXT: v_mul_hi_u32 v10, v14, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v15, v2, vcc @@ -2561,29 +2278,25 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v9, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v10, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v9, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2 ; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v0 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v9, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v13, v[9:10] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc @@ -2636,134 +2349,116 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: -; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v12 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v1 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 -; CGP-NEXT: v_xor_b32_e32 v1, v4, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v10, v1 -; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v0 -; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v10 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v12, v10 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v12 -; CGP-NEXT: v_cvt_u32_f32_e32 v13, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v11 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v4, v16, v10 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] -; CGP-NEXT: v_mul_hi_u32 v12, v13, v10 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v12 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v4, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v12 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v12 +; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v10, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v16, v10 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v13, v[10:11] +; CGP-NEXT: v_mul_hi_u32 v11, v13, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v17, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v17 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v11, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v13, v10 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v18, v0 ; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v17, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v18, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v17 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v17, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v18, v10 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v4 -; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v11 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5] +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[1:2] ; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v14 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] +; CGP-NEXT: v_add_i32_e32 v1, vcc, v8, v14 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v13, v[10:11] ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v14, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v4, v14 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v9, v13, v11 +; CGP-NEXT: v_xor_b32_e32 v11, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v9, v13, v10 ; CGP-NEXT: v_xor_b32_e32 v15, v8, v14 -; CGP-NEXT: v_mul_hi_u32 v8, v13, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v13, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_mul_hi_u32 v8, v13, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v16, v10 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v10 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v16, v10 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v15, v0 +; CGP-NEXT: v_mul_lo_u32 v9, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v15, v1 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v1 +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v0, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v15, v1 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v16, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v15, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v12, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v15, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v15, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v12, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v11, 0 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v4 -; CGP-NEXT: v_mov_b32_e32 v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v10, v[4:5] -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v11, v[9:10] -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v15, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v15, v9 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v8, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v10, v[8:9] +; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v15, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v15, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v0 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v1 -; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v11, vcc, v4, v0 -; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v9, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_subb_u32_e32 v8, vcc, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v12 +; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v8, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v1 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v1, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v8, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v14 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v14 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 @@ -2810,137 +2505,120 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] ; CGP-NEXT: .LBB8_7: -; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v10 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v3 -; CGP-NEXT: v_xor_b32_e32 v3, v4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v11, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v4, v6, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_mul_hi_u32 v10, v11, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v14, v11, v9 -; CGP-NEXT: v_mul_lo_u32 v15, v6, v9 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v8, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc +; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CGP-NEXT: v_trunc_f32_e32 v4, v3 +; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_mul_hi_u32 v13, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4] +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v4 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v12 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v12, vcc -; CGP-NEXT: v_xor_b32_e32 v7, v4, v12 -; CGP-NEXT: v_mul_lo_u32 v4, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, v11, v9 -; CGP-NEXT: v_xor_b32_e32 v13, v5, v12 -; CGP-NEXT: v_mul_hi_u32 v5, v11, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v6, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_hi_u32 v13, v9, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v10, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_xor_b32_e32 v5, v5, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v12, v3 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v3 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v5, v2 +; CGP-NEXT: v_mul_lo_u32 v9, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v5, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v3 +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v2, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v5, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v9, 0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc -; CGP-NEXT: v_mul_lo_u32 v6, v13, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v13, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v13, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v4, v6 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6] -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6] -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v9, v[3:4] +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v5, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 -; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v4, v2 -; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v6 +; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v8 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v5, v3, vcc -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v8 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v12 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v12 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] @@ -3036,223 +2714,189 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 ; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], 0, 0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v3 ; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v1 ; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v9, v7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v9 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 -; GISEL-NEXT: v_mul_hi_u32 v14, v10, v7 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v13, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v13, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v4 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v10, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v4 +; GISEL-NEXT: v_mul_lo_u32 v8, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v7 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 -; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v4 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v13, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v13, v4 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, 0, v0 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] -; GISEL-NEXT: v_mul_hi_u32 v0, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 -; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v10, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v0, v10, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v4 +; GISEL-NEXT: v_mul_lo_u32 v8, v10, v7 ; GISEL-NEXT: v_and_b32_e32 v12, 0xffffff, v2 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v5, v0, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v13, v7 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v7 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 -; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v13, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v11, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0 -; GISEL-NEXT: v_mov_b32_e32 v0, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v5, v[0:1] -; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9] -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v0 -; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v2 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v6, v9, s[4:5] -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v5, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v7, v1 -; GISEL-NEXT: v_subbrev_u32_e64 v13, s[4:5], 0, v10, vcc -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v6, v4 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 -; GISEL-NEXT: v_sub_i32_e64 v15, s[4:5], 0, v2 -; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, v3, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v3 -; GISEL-NEXT: v_mov_b32_e32 v0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v17, v[0:1] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v16, v14, v[5:6] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v13, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v6, v18, v0, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v0, v17, v4 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v5 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v4 -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v10, v3, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v19, v17, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v18, v0 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v19, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GISEL-NEXT: v_mul_hi_u32 v5, v17, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v18, v4 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0 -; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v11, v1 -; GISEL-NEXT: v_mov_b32_e32 v0, v5 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v15, v17, v[0:1] -; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v14, v[0:1] -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v18, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v6, v13, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v17, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v14, v0 -; GISEL-NEXT: v_mul_hi_u32 v10, v14, v4 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], 0, v12 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v10, v17, v0 -; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v14, v0 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v10, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5 -; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v5 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v14, v4 -; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v17, v0, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc ; GISEL-NEXT: v_mul_lo_u32 v5, v3, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v6, vcc -; GISEL-NEXT: v_mul_hi_u32 v6, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v11, v4 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v7, v3, v4 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v10, 0 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v2 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[7:8] +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v11, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], v3, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v3, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v8, v5, v8, s[4:5] +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v9, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v4 +; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], 0, v2 +; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v6, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v7, v1 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v4 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v10, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v16, v3 +; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_addc_u32_e64 v17, s[4:5], v17, v20, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v5 +; GISEL-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], v4, v20, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v17, s[4:5], 0, v17, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v17 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v4 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], v9, v5, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v17, v6, v19, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v14, v3, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v10, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v16, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, 0, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v5 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v9, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v12, v5 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v4 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v10 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v3, v0 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v4, v5 -; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v0, v6 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], v8, v7, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v7, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v9, v12, v5 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v7, v4 +; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], v4, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], v4, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v7, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v5 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v7 ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[0:1] ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v10, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v12, v4 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index d15551365707b..b152c06b9fda6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -16,10 +16,10 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB0_4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -50,18 +50,13 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -79,18 +74,13 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc @@ -102,39 +92,34 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v6, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v6, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v10 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v5, v6, vcc -; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v5, v7, vcc +; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v7 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 @@ -144,7 +129,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc @@ -152,7 +137,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: ; implicit-def: $vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6 @@ -197,10 +182,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_mov_b32_e32 v0, s3 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 -; CHECK-NEXT: s_sub_u32 s4, 0, s2 +; CHECK-NEXT: s_sub_u32 s10, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 ; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 -; CHECK-NEXT: s_subb_u32 s5, 0, s3 +; CHECK-NEXT: s_subb_u32 s11, 0, s3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 @@ -208,10 +193,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s10, v1 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 @@ -222,25 +207,20 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, vcc, v6, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s10, v4 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 @@ -251,18 +231,13 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v5, vcc, v5, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -274,47 +249,42 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 -; CHECK-NEXT: v_mul_hi_u32 v8, s2, v1 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v1 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[6:7], v1, v9, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v1, vcc, v1, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; CHECK-NEXT: v_mul_lo_u32 v5, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v1 ; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, s0, v6 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v8 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], s1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v4, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v6, vcc +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v9, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; CHECK-NEXT: s_mov_b32 s4, 0 @@ -368,233 +338,203 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v7 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v5, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v7, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v7, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v8, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v8, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v8, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v8 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v13, v8, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v12, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v8 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v8 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v19, v1, v8 ; GISEL-NEXT: v_mul_lo_u32 v8, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v21, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v18, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v16, v8 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v19 -; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v10, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v14, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v16, v5, v8 -; GISEL-NEXT: v_mul_hi_u32 v17, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v18, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v19, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v8 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 1, v9 -; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v18, s[10:11], 1, v13 -; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], v15, v10 -; GISEL-NEXT: v_add_i32_e64 v15, s[12:13], 1, v14 -; GISEL-NEXT: v_add_i32_e64 v12, s[14:15], v21, v12 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[10:11], v9, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v8, s[6:7], v8, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v8, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v9, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], v10, v12, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v8 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v19, v10 +; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], 1, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v21, v11 +; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], 1, v18 +; GISEL-NEXT: v_sub_i32_e64 v0, s[8:9], v0, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[10:11], 1, v19 +; GISEL-NEXT: v_sub_i32_e64 v2, s[12:13], v2, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v10 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v0, v4 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[18:19], v0, v4 -; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v6 -; GISEL-NEXT: v_mul_lo_u32 v20, v4, v10 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v0, v4 +; GISEL-NEXT: v_sub_i32_e64 v0, s[16:17], v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[18:19], v0, v4 ; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v4, v6, v12 +; GISEL-NEXT: v_mul_lo_u32 v4, v6, v11 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, 0, v11, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 -; GISEL-NEXT: v_addc_u32_e64 v2, s[6:7], 0, v12, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_add_i32_e64 v4, s[20:21], v16, v4 +; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], 0, v0, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v6 +; GISEL-NEXT: v_addc_u32_e64 v2, s[10:11], 0, v15, s[10:11] ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[16:17] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v19, v4 -; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, v0, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 -; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, v2, s[12:13] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], v1, v16, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v16 -; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v3, v4, s[8:9] -; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[22:23] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v5 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v5, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v7 -; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[8:9] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[18:19] -; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21] -; GISEL-NEXT: v_cndmask_b32_e64 v16, v16, v20, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[10:11], v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v17 +; GISEL-NEXT: v_subb_u32_e64 v17, vcc, v1, v13, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v13 +; GISEL-NEXT: v_subb_u32_e64 v13, vcc, v3, v4, s[12:13] +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[18:19] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v17, v5 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v17, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[6:7] +; GISEL-NEXT: v_subb_u32_e64 v1, s[6:7], v1, v5, s[8:9] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v7 +; GISEL-NEXT: v_subb_u32_e64 v3, s[8:9], v3, v7, s[12:13] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v13, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v6, v13, v6, s[10:11] +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[16:17] +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, v5 @@ -602,19 +542,19 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v16 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[8:9] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v18, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v15, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v19, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v5, v2, v17, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v20, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v12, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v16, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v5, v15, v2, s[8:9] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v12, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v11, v5, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i64: @@ -630,7 +570,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 @@ -658,18 +598,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v13, v18, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v14, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v13, vcc, v13, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc @@ -687,18 +622,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v2, v1 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v15, vcc +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v13, v12 +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v3, vcc, v3, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -710,49 +640,44 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v3, v4, v0 -; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v13, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v13, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v3, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CGP-NEXT: v_mul_lo_u32 v2, v4, v1 +; CGP-NEXT: v_mul_lo_u32 v2, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v3, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v1 ; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 ; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v14 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v2, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 -; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v11, vcc -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v12, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v13, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc @@ -761,7 +686,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 ; CGP-NEXT: .LBB2_2: ; %Flow1 -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -791,10 +716,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB2_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB2_8 ; CGP-NEXT: .LBB2_6: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -825,18 +750,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc @@ -854,18 +774,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v13, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v10 +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v5, vcc, v5, v10, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -877,49 +792,44 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v5, v6, v2 -; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v6, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v6, v2 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v10, v6, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v3 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v12 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v9, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v7, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7 +; CGP-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v11, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc @@ -927,7 +837,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr6 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_6 ; CGP-NEXT: .LBB2_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -989,18 +899,13 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v2, v0, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v2, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v4, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc ; CHECK-NEXT: v_mul_hi_u32 v1, v1, v3 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20 @@ -1030,33 +935,23 @@ define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v2, v5 ; CHECK-NEXT: v_mul_hi_u32 v3, v3, v5 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v8 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v9, v10 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v5, v0, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[8:9], v11, v12 +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], v5, v13, s[8:9] +; CHECK-NEXT: v_add_i32_e64 v4, s[10:11], v14, v4 +; CHECK-NEXT: v_addc_u32_e64 v2, s[12:13], v4, v2, s[10:11] +; CHECK-NEXT: v_addc_u32_e64 v4, s[14:15], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[8:9] +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[12:13] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v6, s[10:11] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20 ; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 20 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1079,10 +974,10 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB7_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB7_4 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -1113,18 +1008,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc @@ -1142,18 +1032,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -1165,57 +1050,52 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 -; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v2, v5, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v6, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v10 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v4, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v4, v7, vcc +; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v7 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v3, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v4, v11, v12, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB7_2 ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -1255,253 +1135,223 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v8 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v5 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v4 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v8, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v5, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v6, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v6, v6, v13 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v6 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v6 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v6 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v6 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v12, v6, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v19, v1, v6 ; GISEL-NEXT: v_mul_lo_u32 v6, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v21, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v18, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v16, v6 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v19 -; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v10, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v6 -; GISEL-NEXT: v_mul_hi_u32 v17, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v18, v4, v9 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v4, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 1, v9 -; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v18, s[10:11], 1, v13 -; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], v15, v10 -; GISEL-NEXT: v_add_i32_e64 v15, s[12:13], 1, v14 -; GISEL-NEXT: v_add_i32_e64 v12, s[14:15], v21, v12 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v0, v7 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v4 -; GISEL-NEXT: v_sub_i32_e64 v0, s[18:19], v0, v7 -; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v4 -; GISEL-NEXT: v_mul_lo_u32 v20, v7, v10 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v0, v7 -; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v10, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; GISEL-NEXT: v_mul_lo_u32 v2, v4, v12 -; GISEL-NEXT: v_add_i32_e64 v4, s[24:25], v16, v20 -; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, v12, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], v19, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v17 -; GISEL-NEXT: v_subb_u32_e64 v17, s[6:7], v1, v4, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[10:11], v9, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v6, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v6, vcc, v9, v6, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], v10, v12, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v12, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, v8, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v9 +; GISEL-NEXT: v_mul_lo_u32 v16, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v6 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v19, v10 +; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], 1, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v21, v11 +; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], 1, v18 +; GISEL-NEXT: v_sub_i32_e64 v0, s[8:9], v0, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[10:11], 1, v19 +; GISEL-NEXT: v_sub_i32_e64 v2, s[12:13], v2, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[14:15], v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v11 +; GISEL-NEXT: v_add_i32_e64 v15, s[14:15], v16, v15 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 +; GISEL-NEXT: v_sub_i32_e64 v0, s[14:15], v0, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v0, v7 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], 0, v11, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 +; GISEL-NEXT: v_sub_i32_e64 v2, s[18:19], v2, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[20:21], v2, v4 +; GISEL-NEXT: v_addc_u32_e64 v2, s[6:7], 0, v16, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v13, v14 +; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, v0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v15, v17 +; GISEL-NEXT: v_subb_u32_e64 v14, s[6:7], v1, v4, s[8:9] ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[16:17] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v17, v8 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[14:15], v17, v8 -; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, v0, s[10:11] -; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[18:19] +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v14, v8 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v14, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[8:9] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v1, v8 -; GISEL-NEXT: v_addc_u32_e64 v1, s[12:13], 0, v7, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[22:23] -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, vcc, v3, v2, s[8:9] -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v3, v2 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 -; GISEL-NEXT: v_subb_u32_e64 v2, s[8:9], v2, v5, s[8:9] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v2, vcc, 0, v2, s[20:21] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v16, s[14:15] -; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v4, s[8:9] -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v1, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[16:17] +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[20:21] +; GISEL-NEXT: v_subb_u32_e64 v15, s[10:11], v3, v13, s[12:13] +; GISEL-NEXT: v_sub_i32_e64 v3, s[10:11], v3, v13 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[10:11], v15, v5 +; GISEL-NEXT: v_subb_u32_e64 v3, s[12:13], v3, v5, s[12:13] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[12:13], v15, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[18:19] +; GISEL-NEXT: v_cndmask_b32_e64 v4, v13, v4, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v15, v14, s[12:13] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v19, s[6:7] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v8, s[6:7] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v13, v18, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v15, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v17, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v5, v7, v1, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v20, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v12, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v4, v16, v2, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v5, v0, v7, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v12, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v11, v5, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom: @@ -1520,7 +1370,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 @@ -1548,18 +1398,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v19, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v15, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v16, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v14, vcc, v14, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v16, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc @@ -1577,18 +1422,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v16, vcc +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v13 +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], v12, v17, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v12, vcc, v12, v13, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -1600,58 +1440,53 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 -; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 -; CGP-NEXT: v_mul_hi_u32 v14, v2, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v14, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v2, v1 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v3, v0 +; CGP-NEXT: v_mul_hi_u32 v13, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v14, v2, v1 ; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v0 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v15 ; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v12 -; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v9, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v3, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3 -; CGP-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v2 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v12, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v9, v12 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 +; CGP-NEXT: v_cndmask_b32_e32 v8, v13, v12, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v3 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v3 +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v15, v13, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v15, v14, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB8_4 @@ -1683,10 +1518,10 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB8_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB8_8 ; CGP-NEXT: .LBB8_6: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -1717,18 +1552,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc @@ -1746,18 +1576,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v11, v8 +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v6, vcc, v6, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -1769,57 +1594,52 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_mul_lo_u32 v6, v9, v2 -; CGP-NEXT: v_mul_lo_u32 v8, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v6, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v9, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v9, v2 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v8, v9, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v12 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v7, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v9 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v7, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v7, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e32 v6, v11, v7, vcc -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v5, v10 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v7, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v8, vcc -; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v11, vcc +; CGP-NEXT: v_cndmask_b32_e32 v6, v13, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB8_6 ; CGP-NEXT: .LBB8_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -1958,39 +1778,29 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v20, vcc +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v19, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v22, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v23 +; GISEL-NEXT: v_addc_u32_e64 v15, s[8:9], v15, v21, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v15, s[10:11], v24, v17 +; GISEL-NEXT: v_addc_u32_e64 v15, s[12:13], v15, v25, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v18, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v19, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 @@ -2016,39 +1826,29 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v17, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v12 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v18, v15 +; GISEL-NEXT: v_addc_u32_e64 v10, s[8:9], v10, v19, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v20, v13 +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v21, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v9, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v14, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc ; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 @@ -2066,78 +1866,76 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6 -; GISEL-NEXT: v_mul_hi_u32 v13, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7 -; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v1, v4 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v10, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v11, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v7 +; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v6 ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc ; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v7 ; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v5, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v10 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v15 ; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v16, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v18 ; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v19, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9 -; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v8, vcc -; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v13 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], 0, v10, vcc +; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 0, v10 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v13, s[6:7], 0, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v9 +; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v11, s[6:7], 0, v11 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v14, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 -; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v15, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, -1, v12, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v13, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v11, s[4:5] ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v13, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v11, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 ; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v14, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v18, v17, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v20, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v6, v19, v21, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index a58397eccaba7..e5dbe607de9bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -116,8 +116,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX8-NEXT: s_sub_u32 s2, 0, s10 -; GFX8-NEXT: s_subb_u32 s3, 0, s11 +; GFX8-NEXT: s_sub_u32 s14, 0, s10 +; GFX8-NEXT: s_subb_u32 s15, 0, s11 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -128,10 +128,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -139,44 +139,34 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[12:13], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -184,28 +174,23 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v6, s9 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, s9, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v5, s11 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s8, v0 ; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc ; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s9, v1 @@ -218,8 +203,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1] ; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s10, v2 ; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v0, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v3 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 @@ -235,8 +220,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v9, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v9, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v14, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] @@ -255,8 +240,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX9-NEXT: s_sub_u32 s2, 0, s10 -; GFX9-NEXT: s_subb_u32 s3, 0, s11 +; GFX9-NEXT: s_sub_u32 s14, 0, s10 +; GFX9-NEXT: s_subb_u32 s15, 0, s11 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -267,10 +252,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -278,71 +263,59 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[12:13], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, s11 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v7, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mul_hi_u32 v6, s9, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v5, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2] ; GFX9-NEXT: v_mov_b32_e32 v6, s9 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v5, v[1:2] +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s8, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v4, s9, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] ; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v0 ; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v6 @@ -351,19 +324,19 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v6 -; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[0:1] ; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s10, v2 ; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v9 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s10, v8 +; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s10, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 ; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc @@ -372,14 +345,14 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v13, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v10, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v15, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v15, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v5, s[0:1] -; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v4, s[0:1] +; GFX9-NEXT: global_store_dwordx2 v7, v[0:1], s[4:5] +; GFX9-NEXT: global_store_dwordx2 v7, v[2:3], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_i64: @@ -388,7 +361,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX10-NEXT: s_sub_u32 s0, 0, s10 +; GFX10-NEXT: s_sub_u32 s2, 0, s10 +; GFX10-NEXT: s_subb_u32 s3, 0, s11 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -399,82 +373,69 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s0, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s0, v4, v[1:2] -; GFX10-NEXT: s_subb_u32 s1, 0, s11 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, s1, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s2, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s3, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s2, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v6, s2, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v0, s2, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v2, s2, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s2 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s2, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s12, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s0, v4, v[1:2] -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s1, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s2, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s3, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo ; GFX10-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX10-NEXT: v_mul_hi_u32 v0, s9, v0 ; GFX10-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX10-NEXT: v_mul_lo_u32 v5, s9, v1 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX10-NEXT: v_mul_hi_u32 v6, s8, v1 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v3 ; GFX10-NEXT: v_add_co_u32 v0, s0, v5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 -; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v5, s0, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v6, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, vcc_lo, v0, v2, s0 ; GFX10-NEXT: v_mul_hi_u32 v2, s9, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v5, 0 -; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v4, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v2, v3 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s10, v3, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s11, v5, v[1:2] -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s11, v4, v[1:2] +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v4, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s8, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, s9, v1 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s0, s9, v1, vcc_lo @@ -491,7 +452,7 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 ; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v5, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v8 @@ -500,13 +461,13 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_sub_co_u32 v10, s0, v6, s10 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v0, s0, 0, v0, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v14, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v10, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v5, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v4, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v5, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v6, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v8, v9, s0 ; GFX10-NEXT: global_store_dwordx2 v10, v[0:1], s[4:5] @@ -980,12 +941,11 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-LABEL: udivrem_v2i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX8-NEXT: s_sub_u32 s2, 0, s12 -; GFX8-NEXT: s_subb_u32 s3, 0, s13 +; GFX8-NEXT: s_sub_u32 s8, 0, s12 +; GFX8-NEXT: s_subb_u32 s9, 0, s13 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -996,10 +956,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1007,257 +967,228 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[6:7], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] -; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 ; GFX8-NEXT: s_sub_u32 s2, 0, s14 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: s_subb_u32 s3, 0, s15 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX8-NEXT: s_subb_u32 s3, 0, s15 +; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX8-NEXT: v_mov_b32_e32 v5, s13 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v4, s9, v1 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v6, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v7, v[1:2] -; GFX8-NEXT: v_mov_b32_e32 v3, s9 -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s8, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v6, v[1:2] -; GFX8-NEXT: v_mov_b32_e32 v4, s13 -; GFX8-NEXT: v_subb_u32_e64 v0, s[0:1], v3, v1, vcc -; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s9, v1 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v3, s9, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v7, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v3, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v8, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v4, s9 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s13, v7, v[1:2] +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s8, v0 +; GFX8-NEXT: v_subb_u32_e64 v0, s[0:1], v4, v2, vcc +; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s9, v2 ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v9, v2, v3, s[0:1] -; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s15 -; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v4, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s14 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 -; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s12, v8 -; GFX8-NEXT: v_add_f32_e32 v1, v2, v1 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX8-NEXT: v_subbrev_u32_e64 v11, s[0:1], 0, v5, vcc -; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v6 -; GFX8-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 -; GFX8-NEXT: v_trunc_f32_e32 v14, v2 -; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v14 -; GFX8-NEXT: v_add_f32_e32 v1, v2, v1 -; GFX8-NEXT: v_cvt_u32_f32_e32 v15, v1 -; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v7, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v3, v4, s[0:1] +; GFX8-NEXT: v_cvt_f32_u32_e32 v3, s15 +; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v2, v5, vcc +; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s14 +; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s12, v1 +; GFX8-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX8-NEXT: v_subbrev_u32_e64 v11, s[0:1], 0, v6, vcc +; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v7 +; GFX8-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; GFX8-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; GFX8-NEXT: v_trunc_f32_e32 v14, v3 +; GFX8-NEXT: v_mul_f32_e32 v3, 0xcf800000, v14 +; GFX8-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX8-NEXT: v_cvt_u32_f32_e32 v15, v2 +; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v8, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10 ; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v15, 0 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v15, 0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v16, v3, v16, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v14, v[2:3] +; GFX8-NEXT: v_cndmask_b32_e64 v16, v4, v16, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v14, v[3:4] ; GFX8-NEXT: v_add_u32_e64 v17, s[0:1], 1, v12 ; GFX8-NEXT: v_addc_u32_e64 v18, s[0:1], 0, v13, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s3, v15, v[2:3] -; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v5, v4, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, v14, v1 -; GFX8-NEXT: v_mul_lo_u32 v5, v15, v2 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v15, v[3:4] +; GFX8-NEXT: v_subb_u32_e32 v4, vcc, v6, v5, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, v14, v2 +; GFX8-NEXT: v_mul_lo_u32 v6, v15, v3 ; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, s12, v10 -; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v3, vcc -; GFX8-NEXT: v_mul_hi_u32 v3, v15, v1 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, v14, v2 -; GFX8-NEXT: v_mul_hi_u32 v1, v14, v1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v15, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v4, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v4, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, v15, v2 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6 ; GFX8-NEXT: v_mul_hi_u32 v2, v14, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v15, vcc, v15, v1 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, 0 -; GFX8-NEXT: v_addc_u32_e32 v14, vcc, v14, v2, vcc +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v5, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v4, v14, v3 +; GFX8-NEXT: v_mul_hi_u32 v6, v15, v3 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2 +; GFX8-NEXT: v_mul_hi_u32 v3, v14, v3 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], v2, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, v15, v2 +; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v15, 0 +; GFX8-NEXT: v_addc_u32_e32 v14, vcc, v14, v3, vcc ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v12, v17, vcc -; GFX8-NEXT: v_mov_b32_e32 v1, v4 -; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v14, v[1:2] +; GFX8-NEXT: v_cndmask_b32_e32 v3, v12, v17, vcc +; GFX8-NEXT: v_mov_b32_e32 v2, v5 +; GFX8-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v14, v[2:3] ; GFX8-NEXT: v_cndmask_b32_e32 v12, v13, v18, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v15, v[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, v12, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v5, v10, v19, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, v14, v3 -; GFX8-NEXT: v_mul_lo_u32 v9, v15, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1] -; GFX8-NEXT: v_mul_hi_u32 v8, v15, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v6, v11, v20, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GFX8-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s3, v15, v[5:6] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, v3, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v12, s[0:1] +; GFX8-NEXT: v_mul_lo_u32 v7, v14, v4 +; GFX8-NEXT: v_mul_lo_u32 v8, v15, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v19, vcc +; GFX8-NEXT: v_mul_hi_u32 v10, v15, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v20, vcc ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v8, v14, v4 -; GFX8-NEXT: v_mul_hi_u32 v3, v14, v3 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v9, v15, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v8, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; GFX8-NEXT: v_mul_lo_u32 v7, v14, v5 ; GFX8-NEXT: v_mul_hi_u32 v4, v14, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v7 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v15, v3 -; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v14, v4, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, s11, v3 -; GFX8-NEXT: v_mul_lo_u32 v8, s10, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1] -; GFX8-NEXT: v_mul_hi_u32 v0, s10, v3 -; GFX8-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: v_mul_hi_u32 v10, v15, v5 +; GFX8-NEXT: v_addc_u32_e64 v8, s[2:3], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 +; GFX8-NEXT: v_mul_hi_u32 v5, v14, v5 +; GFX8-NEXT: v_addc_u32_e64 v4, s[2:3], v4, v10, vcc +; GFX8-NEXT: v_addc_u32_e64 v7, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v15, v4 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v14, v5, vcc ; GFX8-NEXT: v_mul_lo_u32 v7, s11, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v8, v0 -; GFX8-NEXT: v_mul_hi_u32 v8, s10, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v7, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v8, s10, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v6, v1, v6, s[0:1] +; GFX8-NEXT: v_mul_hi_u32 v1, s10, v4 +; GFX8-NEXT: v_mul_hi_u32 v4, s11, v4 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v3, v0 -; GFX8-NEXT: v_mul_hi_u32 v8, s11, v4 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v9, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v8, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, v4 -; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s14, v10, v[0:1] -; GFX8-NEXT: v_mov_b32_e32 v4, s11 -; GFX8-NEXT: v_mov_b32_e32 v0, s15 -; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s15, v9, v[7:8] -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s10, v3 -; GFX8-NEXT: v_subb_u32_e64 v11, s[0:1], v4, v7, vcc -; GFX8-NEXT: v_sub_u32_e64 v3, s[0:1], s11, v7 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v11 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc +; GFX8-NEXT: v_mul_lo_u32 v1, s11, v5 +; GFX8-NEXT: v_mul_hi_u32 v8, s10, v5 +; GFX8-NEXT: v_addc_u32_e64 v7, s[2:3], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4 +; GFX8-NEXT: v_addc_u32_e64 v1, s[2:3], v1, v8, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e32 v8, vcc, v1, v7, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc +; GFX8-NEXT: v_mul_hi_u32 v10, s11, v5 +; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s14, v8, 0 +; GFX8-NEXT: v_cndmask_b32_e64 v7, v0, v9, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v9, vcc, v10, v1 +; GFX8-NEXT: v_mov_b32_e32 v0, v5 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v9, v[0:1] +; GFX8-NEXT: v_mov_b32_e32 v10, s11 +; GFX8-NEXT: v_mov_b32_e32 v5, s15 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s15, v8, v[0:1] +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s10, v4 +; GFX8-NEXT: v_subb_u32_e64 v10, s[0:1], v10, v0, vcc +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s11, v0 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v10 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v11 -; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v0, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s14, v8 -; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v3, vcc -; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v9 -; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v10, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v10 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s14, v1 +; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v0, vcc +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v8 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v9, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v7 -; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v3, v0, vcc +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v12 -; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, s14, v7 +; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, s14, v11 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v16, s[0:1] ; GFX8-NEXT: v_add_u32_e64 v16, s[0:1], 1, v13 ; GFX8-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GFX8-NEXT: v_addc_u32_e64 v17, s[0:1], 0, v14, s[0:1] ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v13, v16, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v16, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v13, s[0:1] -; GFX8-NEXT: v_mov_b32_e32 v10, s5 -; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v18, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v5, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v18, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; GFX8-NEXT: v_mov_b32_e32 v9, s4 -; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v8, v11, v0, s[0:1] -; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[1:4] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v13, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v8, v1, v8, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v10, v0, s[0:1] +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX8-NEXT: v_mov_b32_e32 v0, s6 ; GFX8-NEXT: v_mov_b32_e32 v1, s7 -; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[5:8] +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[6:9] ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: udivrem_v2i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 +; GFX9-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x20 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s13 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX9-NEXT: s_sub_u32 s2, 0, s12 -; GFX9-NEXT: s_subb_u32 s3, 0, s13 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s17 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s16 +; GFX9-NEXT: s_sub_u32 s8, 0, s16 +; GFX9-NEXT: s_subb_u32 s9, 0, s17 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v2, v1 @@ -1265,10 +1196,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1276,220 +1207,195 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[6:7], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX9-NEXT: s_sub_u32 s2, 0, s18 +; GFX9-NEXT: s_subb_u32 s3, 0, s19 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: s_sub_u32 s2, 0, s14 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX9-NEXT: s_subb_u32 s3, 0, s15 +; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 -; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mul_hi_u32 v5, s9, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, s13, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, s12, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s12, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s13, v0 +; GFX9-NEXT: v_mov_b32_e32 v5, s13 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX9-NEXT: v_mov_b32_e32 v6, s13 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v8, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v9, v3, v0, v5 -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s12, v9, v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s13, v8, v[2:3] -; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v1 -; GFX9-NEXT: v_subb_co_u32_e64 v1, s[0:1], v5, v3, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s13, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s12, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v3, s13, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v8, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v9, v3, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v9, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v6, s17 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s17, v8, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s12, v0 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v5, v3, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s17, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v2 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s16, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v1 -; GFX9-NEXT: v_sub_u32_e32 v3, s9, v3 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s17, v2 +; GFX9-NEXT: v_sub_u32_e32 v3, s13, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v10, v4, v5, s[0:1] -; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s15 +; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s19 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v6, vcc -; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s14 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s18 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s12, v2 +; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s16, v1 ; GFX9-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 ; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v7, vcc ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v8 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; GFX9-NEXT: v_trunc_f32_e32 v15, v4 -; GFX9-NEXT: v_mul_f32_e32 v4, 0xcf800000, v15 +; GFX9-NEXT: v_trunc_f32_e32 v5, v4 +; GFX9-NEXT: v_mul_f32_e32 v4, 0xcf800000, v5 ; GFX9-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX9-NEXT: v_cvt_u32_f32_e32 v16, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v15, v3 ; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v16, 0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v15, v15 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v17, v5, v17, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v15, v[4:5] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s17, v12 +; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v5 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s16, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[0:1] +; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v6, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v17, v[4:5] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s17, v12 +; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v18, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v15, v[4:5] +; GFX9-NEXT: v_mul_lo_u32 v5, v17, v3 +; GFX9-NEXT: v_mul_hi_u32 v6, v15, v3 +; GFX9-NEXT: v_mul_lo_u32 v20, v15, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v17, v3 ; GFX9-NEXT: v_add_co_u32_e64 v18, s[0:1], 1, v13 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v20 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, v17, v4 +; GFX9-NEXT: v_mul_hi_u32 v20, v15, v4 ; GFX9-NEXT: v_addc_co_u32_e64 v19, s[0:1], 0, v14, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v16, v[4:5] -; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v6, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v15, v3 -; GFX9-NEXT: v_mul_lo_u32 v7, v16, v4 -; GFX9-NEXT: v_subrev_co_u32_e32 v20, vcc, s12, v11 -; GFX9-NEXT: v_subbrev_co_u32_e32 v21, vcc, 0, v5, vcc -; GFX9-NEXT: v_mul_hi_u32 v5, v16, v3 -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v15, v4 -; GFX9-NEXT: v_mul_hi_u32 v3, v15, v3 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_mul_hi_u32 v7, v16, v4 -; GFX9-NEXT: v_mul_hi_u32 v4, v15, v4 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v6, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v5 -; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v16, vcc, v16, v3 -; GFX9-NEXT: v_add3_u32 v4, v6, v5, v4 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v16, 0 -; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, v15, v4, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v13, v18, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v5, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v3, v20, vcc +; GFX9-NEXT: v_mul_hi_u32 v4, v17, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v15, vcc, v15, v3 +; GFX9-NEXT: v_add_u32_e32 v4, v4, v5 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v15, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v17, vcc, v17, v4, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v15, v[3:4] -; GFX9-NEXT: v_cndmask_b32_e32 v13, v14, v19, vcc +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v17, v[3:4] +; GFX9-NEXT: v_subrev_co_u32_e32 v20, vcc, s16, v11 +; GFX9-NEXT: v_subbrev_co_u32_e32 v21, vcc, 0, v7, vcc +; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v15, v[3:4] +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v18, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[2:3], s3, v16, v[6:7] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v4, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v13, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v8, v15, v5 -; GFX9-NEXT: v_mul_lo_u32 v9, v16, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v20, vcc -; GFX9-NEXT: v_mul_hi_u32 v11, v16, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v13, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v7, v17, v5 +; GFX9-NEXT: v_mul_lo_u32 v8, v15, v6 +; GFX9-NEXT: v_mul_hi_u32 v10, v15, v5 +; GFX9-NEXT: v_mul_hi_u32 v5, v17, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v19, vcc +; GFX9-NEXT: v_add_co_u32_e64 v7, s[2:3], v7, v8 +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[2:3], v7, v10, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v7, v17, v6 +; GFX9-NEXT: v_mul_hi_u32 v10, v15, v6 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_add_co_u32_e64 v5, s[2:3], v7, v5 +; GFX9-NEXT: v_mul_hi_u32 v6, v17, v6 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[4:5], v5, v10, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[4:5], 0, 0, s[4:5] +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], v5, v8, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[2:3], 0, v7, s[2:3] +; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 +; GFX9-NEXT: v_add_co_u32_e64 v5, s[2:3], v15, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[2:3], v17, v6, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v7, s15, v5 +; GFX9-NEXT: v_mul_lo_u32 v8, s14, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v14, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v20, vcc +; GFX9-NEXT: v_mul_hi_u32 v11, s14, v5 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v12, v21, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v11, v15, v6 -; GFX9-NEXT: v_mul_hi_u32 v5, v15, v5 -; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 -; GFX9-NEXT: v_mul_hi_u32 v9, v16, v6 -; GFX9-NEXT: v_mul_hi_u32 v6, v15, v6 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v11, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8 -; GFX9-NEXT: v_add_u32_e32 v9, v11, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v6, v9, v8, v6 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v16, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v15, v6, vcc -; GFX9-NEXT: v_mul_lo_u32 v8, s11, v5 -; GFX9-NEXT: v_mul_lo_u32 v9, s10, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v7, v2, v7, s[0:1] -; GFX9-NEXT: v_mul_hi_u32 v2, s10, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, s11, v5 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v8, s11, v6 -; GFX9-NEXT: v_add_u32_e32 v2, v9, v2 -; GFX9-NEXT: v_mul_hi_u32 v9, s10, v6 -; GFX9-NEXT: v_mul_hi_u32 v13, s11, v6 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v8, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v5, v2 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s14, v12, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v8, v1, v10, s[0:1] -; GFX9-NEXT: v_add_u32_e32 v1, v11, v9 -; GFX9-NEXT: v_add3_u32 v9, v1, v2, v13 +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v11, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, s15, v6 +; GFX9-NEXT: v_mul_hi_u32 v5, s15, v5 +; GFX9-NEXT: v_mul_hi_u32 v11, s14, v6 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v7, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], v5, v11, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v5, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_mul_hi_u32 v13, s15, v6 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s18, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v7, v1, v9, s[0:1] +; GFX9-NEXT: v_add_u32_e32 v9, v13, v12 ; GFX9-NEXT: v_mov_b32_e32 v1, v6 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v9, v[1:2] -; GFX9-NEXT: v_mov_b32_e32 v10, s11 -; GFX9-NEXT: v_mov_b32_e32 v6, s15 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v12, v[1:2] -; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s10, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v2, v10, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v9, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v10, s15 +; GFX9-NEXT: v_mov_b32_e32 v6, s19 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v11, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s14, v5 ; GFX9-NEXT: v_subb_co_u32_e64 v10, s[0:1], v10, v1, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v10 -; GFX9-NEXT: v_sub_u32_e32 v1, s11, v1 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v10 +; GFX9-NEXT: v_sub_u32_e32 v1, s15, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v10 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s18, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s19, v10 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s14, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v12, vcc, s18, v2 ; GFX9-NEXT: v_subbrev_co_u32_e64 v13, s[0:1], 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v12 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v11 ; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v13 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v13 ; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s18, v12 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v13 -; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s14, v11 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s19, v13 +; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s18, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v17, s[0:1], 1, v14 ; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc @@ -1498,28 +1404,31 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v14, v17, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v18, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v12, v6, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v6, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v14, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v19, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v19, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[0:1] -; GFX9-NEXT: global_store_dwordx4 v0, v[3:6], s[4:5] -; GFX9-NEXT: global_store_dwordx4 v0, v[7:10], s[6:7] +; GFX9-NEXT: global_store_dwordx4 v0, v[3:6], s[8:9] +; GFX9-NEXT: global_store_dwordx4 v0, v[7:10], s[10:11] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_v2i64: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 +; GFX10-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x20 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s13 -; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 -; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s12 -; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s14 -; GFX10-NEXT: s_sub_u32 s0, 0, s12 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s17 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s19 +; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s16 +; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s18 +; GFX10-NEXT: s_sub_u32 s7, 0, s16 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 -; GFX10-NEXT: s_subb_u32 s1, 0, s13 +; GFX10-NEXT: s_subb_u32 s20, 0, s17 +; GFX10-NEXT: s_sub_u32 s8, 0, s18 +; GFX10-NEXT: s_subb_u32 s21, 0, s19 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -1538,17 +1447,15 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v7, 0 -; GFX10-NEXT: s_sub_u32 s2, 0, s14 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s3, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s7, v7, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s8, v8, 0 ; GFX10-NEXT: v_mul_hi_u32 v11, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s3, s0, v9, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[5:6], s3, s2, v10, v[3:4] +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s7, v9, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s8, v10, v[3:4] ; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0 -; GFX10-NEXT: s_subb_u32 s3, 0, s15 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s6, s1, v7, v[4:5] +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s20, v7, v[4:5] ; GFX10-NEXT: v_mul_hi_u32 v4, v7, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s3, v8, v[5:6] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s21, v8, v[5:6] ; GFX10-NEXT: v_mul_lo_u32 v1, v10, v2 ; GFX10-NEXT: v_mul_hi_u32 v5, v8, v2 ; GFX10-NEXT: v_mul_hi_u32 v2, v10, v2 @@ -1560,46 +1467,38 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 ; GFX10-NEXT: v_mul_hi_u32 v3, v9, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0 -; GFX10-NEXT: v_add_co_u32 v6, s6, v6, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v11, s6, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v1, s6, v1, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v2, s6, v16, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v4, s6, v6, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v6, s6, v11, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v1, s6, v1, v5 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v2, s6, v2, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v4, s6, v6, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v15, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v16, v5 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v4 -; GFX10-NEXT: v_add_co_u32 v1, s6, v2, v1 -; GFX10-NEXT: v_add3_u32 v3, v11, v6, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s6 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v12 +; GFX10-NEXT: v_add_co_u32 v1, s2, v1, v15 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v6, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v4, s0, v13, v11 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, v1, v5, s2 +; GFX10-NEXT: v_add_co_u32 v1, s3, v16, v2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, v4, v14, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s9, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s6, v1, v17, s3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v4, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s1, 0, 0, s2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v11, s1, 0, 0, s6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, vcc_lo, v1, v6, s3 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v11, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo -; GFX10-NEXT: v_add3_u32 v2, v5, v2, v0 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v7, 0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v10, v2, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s6, s2, v8, 0 -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v0, v5 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s7, v7, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s8, v8, 0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v10, v4, vcc_lo ; GFX10-NEXT: v_mul_hi_u32 v11, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s0, v9, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s2, v10, v[3:4] +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s7, v9, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s8, v10, v[3:4] ; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s1, v7, v[4:5] +; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s20, v7, v[4:5] ; GFX10-NEXT: v_mul_hi_u32 v4, v7, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s3, v8, v[5:6] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s21, v8, v[5:6] ; GFX10-NEXT: v_mul_lo_u32 v1, v10, v2 ; GFX10-NEXT: v_mul_hi_u32 v5, v8, v2 ; GFX10-NEXT: v_mul_hi_u32 v2, v10, v2 @@ -1611,156 +1510,142 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 ; GFX10-NEXT: v_mul_hi_u32 v3, v9, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v11, s0, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v16, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v11, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v4 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v15, v1 -; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v16, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX10-NEXT: v_add3_u32 v3, v11, v6, v3 -; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v7, v4 -; GFX10-NEXT: v_add3_u32 v0, v5, v2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v9, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v12 +; GFX10-NEXT: v_add_co_u32 v1, s2, v1, v15 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v6, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v4, s0, v13, v11 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, v1, v5, s2 +; GFX10-NEXT: v_add_co_u32 v1, s3, v16, v2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, v4, v14, s0 +; GFX10-NEXT: s_mov_b32 null, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s5, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s4, v1, v17, s3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s1, 0, 0, s2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v4, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v11, s1, 0, 0, s4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, vcc_lo, v1, v6, s3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v11, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v7, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v3, vcc_lo ; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v8, v1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v0, vcc_lo, v10, v0, vcc_lo ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mul_lo_u32 v3, s9, v4 -; GFX10-NEXT: v_mul_lo_u32 v8, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v5, s8, v4 -; GFX10-NEXT: v_mul_hi_u32 v4, s9, v4 -; GFX10-NEXT: v_mul_lo_u32 v9, s9, v2 -; GFX10-NEXT: v_mul_lo_u32 v6, s11, v1 -; GFX10-NEXT: v_mul_hi_u32 v10, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v11, s9, v2 -; GFX10-NEXT: v_mul_lo_u32 v2, s10, v0 -; GFX10-NEXT: v_mul_hi_u32 v7, s10, v1 -; GFX10-NEXT: v_mul_hi_u32 v1, s11, v1 -; GFX10-NEXT: v_mul_lo_u32 v12, s11, v0 -; GFX10-NEXT: v_mul_hi_u32 v13, s10, v0 -; GFX10-NEXT: v_mul_hi_u32 v14, s11, v0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v3, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v9, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v6, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v12, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v8, v5 -; GFX10-NEXT: v_add_co_u32 v8, s0, v4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v10, s0, v1, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s12, v8, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s14, v10, 0 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v9, v7 -; GFX10-NEXT: v_add3_u32 v9, v5, v4, v11 -; GFX10-NEXT: v_add_co_u32 v12, vcc_lo, v8, 1 +; GFX10-NEXT: v_mul_lo_u32 v4, s13, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v10, v0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v0, s12, v3 +; GFX10-NEXT: v_mul_hi_u32 v5, s12, v2 +; GFX10-NEXT: v_mul_hi_u32 v2, s13, v2 +; GFX10-NEXT: v_mul_lo_u32 v7, s15, v1 +; GFX10-NEXT: v_mul_hi_u32 v8, s14, v1 +; GFX10-NEXT: v_mul_hi_u32 v9, s15, v1 +; GFX10-NEXT: v_mul_lo_u32 v1, s13, v3 +; GFX10-NEXT: v_mul_hi_u32 v10, s12, v3 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v4, v0 +; GFX10-NEXT: v_mul_lo_u32 v11, s14, v6 +; GFX10-NEXT: v_mul_hi_u32 v3, s13, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v0, vcc_lo, v0, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v0, s0, v1, v2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v2, s15, v6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v10, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_hi_u32 v5, s14, v6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v10, vcc_lo, v0, v1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v11 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s16, v10, 0 +; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v9 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v8, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v7, v3, v4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, v2, v5, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, 0, 0, s1 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s16, v7, v[1:2] +; GFX10-NEXT: v_add_co_ci_u32_e64 v8, vcc_lo, v5, v3, s0 +; GFX10-NEXT: v_mul_hi_u32 v6, s15, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s18, v8, 0 ; GFX10-NEXT: v_mov_b32_e32 v11, 0 -; GFX10-NEXT: v_add3_u32 v7, v7, v6, v14 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s12, v9, v[1:2] -; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v9, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s14, v7, v[3:4] -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s13, v8, v[4:5] -; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v12, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v13, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s8, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s15, v10, v[5:6] -; GFX10-NEXT: v_sub_co_ci_u32_e64 v5, s0, s9, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s12, v14 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s9, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, -1, s0 -; GFX10-NEXT: v_sub_co_u32 v15, s0, s10, v2 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s13, v1, vcc_lo -; GFX10-NEXT: v_sub_co_ci_u32_e64 v16, s1, s11, v0, s0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v15 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s11, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v17, vcc_lo, v14, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s1, 0, v1, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s13, v5 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v23, s0, s15, v0, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v18 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s13, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s12, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s13, v18 -; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, -1, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s15, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v21, v20, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, -1, s1 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0 -; GFX10-NEXT: v_sub_co_u32 v0, s0, v17, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v19, s0, 0, v1, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v3 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v6, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v6, s1, v15, s14 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v17, v0, vcc_lo -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s2, 0, v23, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v1, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v3, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v18, v19, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s15, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v14, v4, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v3, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v22, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s15, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc_lo -; GFX10-NEXT: v_add_co_u32 v13, vcc_lo, v10, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s15, v12 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v13, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v14, vcc_lo -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, vcc_lo, s15, v23, s1 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX10-NEXT: v_sub_co_u32 v8, s1, v6, s14 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s1, 0, v18, s1 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v2 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v8, v12, v18, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v9, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v13, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v15, v6, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v16, v8, s1 -; GFX10-NEXT: global_store_dwordx4 v11, v[0:3], s[4:5] -; GFX10-NEXT: global_store_dwordx4 v11, v[4:7], s[6:7] +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s17, v10, v[1:2] +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v10, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v7, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v1, v3 +; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, s12, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, s13, v4 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v13, s0, s13, v4, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s17, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s16, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, v12, s16 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v15, s0, 0, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s17, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s16, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s17, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s18, v6, v[1:2] +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s17, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v16, v4, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s17, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, v17, s0 +; GFX10-NEXT: v_add_co_u32 v16, s0, v5, 1 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v18, vcc_lo, s17, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v9, s0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s19, v8, v[0:1] +; GFX10-NEXT: v_sub_co_u32 v4, s0, v14, s16 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v5, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v17, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v9, s1, s14, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, s15, v3 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v16, s2, s15, v3, s1 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s0, 0, v18, s0 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s1, s19, v2, s1 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX10-NEXT: v_cmp_le_u32_e64 s1, s18, v9 +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s19, v16 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v14, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v18, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v10, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, s1 +; GFX10-NEXT: v_sub_co_u32 v10, s1, v9, s18 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, s2 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s2, 0, v2, s1 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s19, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, v3, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v5, v7, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s19, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s18, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo +; GFX10-NEXT: v_add_co_u32 v12, vcc_lo, v8, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v6, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s19, v14 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v12, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v17, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, vcc_lo, s19, v2, s1 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5 +; GFX10-NEXT: v_sub_co_u32 v5, s1, v10, s18 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s1, 0, v2, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v12, v17, v18, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v15, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v6, v12, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v10, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v16, v14, s1 +; GFX10-NEXT: global_store_dwordx4 v11, v[0:3], s[8:9] +; GFX10-NEXT: global_store_dwordx4 v11, v[4:7], s[10:11] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i64> %x, %y store <2 x i64> %div, ptr addrspace(1) %out0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index cc0f7e2ca5a54..4810540d4453b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -16,10 +16,10 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB0_4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] @@ -50,18 +50,13 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -79,18 +74,13 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc @@ -102,26 +92,21 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v6, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v6, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, v2, v1 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v0, vcc ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v0 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 @@ -151,7 +136,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: ; implicit-def: $vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6 @@ -194,10 +179,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_mov_b32_e32 v0, s3 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 -; CHECK-NEXT: s_sub_u32 s4, 0, s2 +; CHECK-NEXT: s_sub_u32 s10, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 ; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 -; CHECK-NEXT: s_subb_u32 s5, 0, s3 +; CHECK-NEXT: s_subb_u32 s11, 0, s3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 @@ -205,10 +190,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s10, v1 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 @@ -219,25 +204,20 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, vcc, v6, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s10, v4 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 @@ -248,18 +228,13 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v5, vcc, v5, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -271,26 +246,21 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 -; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[6:7], v1, v9, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v1, vcc, v1, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 ; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4 @@ -362,229 +332,199 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v7 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v5, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v7, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v7, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v8, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v8, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v8, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v8 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v13, v8, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v12, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v8 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v8 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v2, v9 +; GISEL-NEXT: v_mul_lo_u32 v19, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v18, v15 +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v16, v19 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v19 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v16, v4, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v5, v10 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[10:11], v10, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v11, s[12:13], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, vcc, 0, v15, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v6, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v7, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v11 +; GISEL-NEXT: v_mul_lo_u32 v17, v7, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v6, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v13 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v16 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v9 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v6 ; GISEL-NEXT: v_sub_i32_e64 v12, s[10:11], v0, v4 ; GISEL-NEXT: v_sub_i32_e64 v13, s[12:13], v2, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v6, v9 +; GISEL-NEXT: v_add_i32_e64 v8, s[14:15], v15, v8 +; GISEL-NEXT: v_add_i32_e64 v9, s[14:15], v17, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v13, v6 ; GISEL-NEXT: v_sub_i32_e64 v4, s[14:15], v12, v4 ; GISEL-NEXT: v_sub_i32_e64 v6, s[16:17], v13, v6 -; GISEL-NEXT: v_add_i32_e64 v8, s[18:19], v17, v8 -; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v19, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v10 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 -; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[18:19], v8, v10 +; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[8:9] +; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v1, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v8 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], v3, v9, s[4:5] ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v16, v5 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v7 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v5 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v8, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, vcc ; GISEL-NEXT: v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11] ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v5, s[10:11] ; GISEL-NEXT: v_subbrev_u32_e64 v19, vcc, 0, v3, s[12:13] ; GISEL-NEXT: v_subb_u32_e64 v3, vcc, v3, v7, s[12:13] ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v14, v17, v15, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v18, v5 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v19, v7 @@ -594,9 +534,9 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[8:9] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[8:9] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7] @@ -605,7 +545,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[8:9] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -622,7 +562,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 @@ -650,18 +590,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v13, v18, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v14, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v13, vcc, v13, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc @@ -679,18 +614,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v2, v1 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v15, vcc +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v13, v12 +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v3, vcc, v3, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -702,26 +632,21 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v3, v4, v0 -; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v13, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v3, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_mul_lo_u32 v2, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v3, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v3 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v2 ; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v11, v0, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v0 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 @@ -752,7 +677,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 ; CGP-NEXT: .LBB2_2: ; %Flow1 -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -780,10 +705,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB2_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB2_8 ; CGP-NEXT: .LBB2_6: ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] @@ -814,18 +739,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc @@ -843,18 +763,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v13, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v10 +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v5, vcc, v5, v10, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -866,26 +781,21 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v5, v6, v2 -; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v6, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v6, v2 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v6, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v6, v3 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v4 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v2, vcc ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v9, v2 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 @@ -915,7 +825,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr6 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_6 ; CGP-NEXT: .LBB2_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -994,18 +904,13 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v12, v3, v6 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -1022,18 +927,13 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, vcc, v6, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc @@ -1045,24 +945,19 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_lo_u32 v6, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v8, v3 +; CHECK-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v9, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v3, vcc, v3, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v6 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v4, vcc, v1, v3, s[4:5] ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 @@ -1097,26 +992,26 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; GISEL-NEXT: s_sub_u32 s4, 0, 0x12d8fb +; GISEL-NEXT: s_sub_u32 s16, 0, 0x12d8fb ; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: s_subb_u32 s5, 0, 0 +; GISEL-NEXT: s_subb_u32 s17, 0, 0 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb +; GISEL-NEXT: s_sub_u32 s18, 0, 0x12d8fb ; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: s_subb_u32 s7, 0, 0 +; GISEL-NEXT: s_subb_u32 s19, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 ; GISEL-NEXT: v_trunc_f32_e32 v6, v6 ; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v7, s4, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v6 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, s5, v5 -; GISEL-NEXT: v_mul_hi_u32 v11, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, s7, v5 -; GISEL-NEXT: v_mul_hi_u32 v14, s6, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, s16, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, s18, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, s16, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, s17, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, s16, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, s18, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, s19, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, s18, v5 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_mul_lo_u32 v10, v6, v9 ; GISEL-NEXT: v_mul_hi_u32 v15, v5, v9 @@ -1136,100 +1031,80 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v20, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v19, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v16 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v15, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v14, v9 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v17, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v13, v18 +; GISEL-NEXT: v_addc_u32_e64 v10, s[8:9], v10, v16, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v19, v12 +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v20, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v11, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v9, v11, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v13, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v14, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v5, v9 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, s4, v9 -; GISEL-NEXT: v_mul_lo_u32 v11, s5, v9 -; GISEL-NEXT: v_mul_hi_u32 v13, s4, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_mul_lo_u32 v11, s16, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, s17, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, s16, v9 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, s7, v5 -; GISEL-NEXT: v_mul_hi_u32 v14, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v15, s4, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, v7, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v9, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, s6, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, s18, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, s19, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, s18, v5 +; GISEL-NEXT: v_mul_lo_u32 v15, s16, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 +; GISEL-NEXT: v_mul_lo_u32 v18, s18, v6 ; GISEL-NEXT: v_mul_lo_u32 v19, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v20, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v11 -; GISEL-NEXT: v_mul_hi_u32 v15, v9, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 -; GISEL-NEXT: v_mul_lo_u32 v18, v5, v12 -; GISEL-NEXT: v_mul_lo_u32 v21, v6, v12 -; GISEL-NEXT: v_mul_hi_u32 v22, v5, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v6, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v21, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v12 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v7, v12 +; GISEL-NEXT: v_mul_lo_u32 v18, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v21, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v22, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v17 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v9 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v9 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v15, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v19, v18 +; GISEL-NEXT: v_addc_u32_e64 v13, s[8:9], v13, v20, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[10:11], v21, v8 +; GISEL-NEXT: v_addc_u32_e64 v8, s[12:13], v8, v22, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v13, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, v11, v13, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v8, v15, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 ; GISEL-NEXT: v_mul_lo_u32 v13, v0, v7 ; GISEL-NEXT: v_mul_lo_u32 v14, v1, v7 @@ -1239,48 +1114,38 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v17, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v18, v2, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v16, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v9, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v12, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v12, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v14, v9 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v15, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v16 +; GISEL-NEXT: v_addc_u32_e64 v8, s[8:9], v8, v10, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v5, s[10:11], v17, v5 +; GISEL-NEXT: v_addc_u32_e64 v5, s[12:13], v5, v18, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v8, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v9, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, vcc, v5, v11, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v4 ; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v4 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v9 ; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5] ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12 +; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v10 ; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 @@ -1350,18 +1215,13 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_hi_u32 v14, v5, v8 ; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CGP-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v10, s[8:9], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -1378,18 +1238,13 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_hi_u32 v13, v5, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v12, v8 +; CGP-NEXT: v_addc_u32_e64 v8, s[6:7], v8, v13, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v9, s[8:9], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v10, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v8, vcc, v8, v9, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc @@ -1408,47 +1263,37 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_hi_u32 v18, v2, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v14 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v18 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v13, v9 +; CGP-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v14, s[4:5] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v10, v16 +; CGP-NEXT: v_addc_u32_e64 v8, s[8:9], v8, v11, s[8:9] +; CGP-NEXT: v_add_i32_e64 v5, s[10:11], v17, v5 +; CGP-NEXT: v_addc_u32_e64 v5, s[12:13], v5, v18, s[10:11] +; CGP-NEXT: v_addc_u32_e64 v8, s[14:15], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v10, s[6:7], 0, 0, s[8:9] +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[12:13] +; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v5, vcc, v5, v10, s[10:11] +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v4 ; CGP-NEXT: v_mul_hi_u32 v7, v7, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v9, v5, v4 ; CGP-NEXT: v_mul_hi_u32 v5, v5, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_mul_lo_u32 v8, v8, v4 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v9 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v10 ; CGP-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5] ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v11 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v9 ; CGP-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 @@ -1508,10 +1353,10 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB7_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB7_4 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] @@ -1542,18 +1387,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc @@ -1571,18 +1411,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -1594,26 +1429,21 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v6, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, v5, v1 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v2 ; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v0, vcc ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5 @@ -1643,7 +1473,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB7_2 ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -1681,229 +1511,199 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v8 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v5 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v4 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v8, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v5, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v6, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v6, v6, v13 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v6 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v6 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v6 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v6 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v12, v6, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_mul_lo_u32 v15, v2, v9 +; GISEL-NEXT: v_mul_lo_u32 v19, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v18, v15 +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v16, v19 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v19 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v16, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[10:11], v10, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v11, s[12:13], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, vcc, 0, v15, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v4, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v4, v11 +; GISEL-NEXT: v_mul_lo_u32 v17, v5, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v4, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v13 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v16 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v7 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v4 ; GISEL-NEXT: v_sub_i32_e64 v12, s[10:11], v0, v7 ; GISEL-NEXT: v_sub_i32_e64 v13, s[12:13], v2, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 +; GISEL-NEXT: v_add_i32_e64 v6, s[14:15], v15, v6 +; GISEL-NEXT: v_add_i32_e64 v9, s[14:15], v17, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v7 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v13, v4 ; GISEL-NEXT: v_sub_i32_e64 v7, s[14:15], v12, v7 ; GISEL-NEXT: v_sub_i32_e64 v4, s[16:17], v13, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[18:19], v17, v6 -; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v19, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v10 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 -; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v6, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[18:19], v6, v10 +; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[8:9] +; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v1, v6, vcc ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v6 ; GISEL-NEXT: v_subb_u32_e64 v6, s[6:7], v3, v9, s[4:5] ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v16, v8 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v5 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v8 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v8 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, vcc ; GISEL-NEXT: v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11] ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v8, s[10:11] ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e64 v14, vcc, 0, v3, s[12:13] ; GISEL-NEXT: v_subb_u32_e64 v3, vcc, v3, v5, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v15, v17, v15, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v18, v8 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5 @@ -1913,9 +1713,9 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v17, s[8:9] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[8:9] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v12, v7, s[6:7] @@ -1924,7 +1724,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v3, s[8:9] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1944,7 +1744,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 @@ -1972,18 +1772,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v19, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v15, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v16, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v14, vcc, v14, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v16, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc @@ -2001,18 +1796,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v16, vcc +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v13 +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], v12, v17, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v12, vcc, v12, v13, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -2024,26 +1814,21 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 -; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v2, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v14, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v3, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v2, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v12 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v4 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v0, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 @@ -2074,7 +1859,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_4 @@ -2104,10 +1889,10 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB8_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB8_8 ; CGP-NEXT: .LBB8_6: ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] @@ -2138,18 +1923,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc @@ -2167,18 +1947,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v11, v8 +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v6, vcc, v6, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -2190,26 +1965,21 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_mul_lo_u32 v6, v9, v2 -; CGP-NEXT: v_mul_lo_u32 v8, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v6, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v9, v2 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v9, v3 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v4 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v2, vcc ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v7, v2 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v9 @@ -2239,7 +2009,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB8_6 ; CGP-NEXT: .LBB8_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -2376,39 +2146,29 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v20, vcc +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v19, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v22, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v23 +; GISEL-NEXT: v_addc_u32_e64 v15, s[8:9], v15, v21, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v15, s[10:11], v24, v17 +; GISEL-NEXT: v_addc_u32_e64 v15, s[12:13], v15, v25, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v18, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v19, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 @@ -2434,39 +2194,29 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v17, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v12 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v18, v15 +; GISEL-NEXT: v_addc_u32_e64 v10, s[8:9], v10, v19, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v20, v13 +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v21, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v9, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v14, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc ; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 @@ -2484,39 +2234,37 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v10, 0, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7 -; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v11, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v7, v0, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 ; GISEL-NEXT: v_mul_lo_u32 v5, v0, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v4, vcc ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 0, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v9 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5] ; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index 674d7b68bfae6..2e61d8771988a 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -270,6 +270,7 @@ body: | ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] @@ -277,33 +278,67 @@ body: | ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH]] - ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY4]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND1]] + ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) ; MIPS32-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] ; MIPS32-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] ; MIPS32-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] - ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] - ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) - ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[MUL5]] - ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL4]] ; MIPS32-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32) - ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] - ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[UMULH1]] - ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[MUL5]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD5]](s32), [[COPY7]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD6]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[ICMP3]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP4]], [[AND2]] ; MIPS32-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) - ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] - ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[UMULH2]] - ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] - ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD8]](s32) - ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP5]] - ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[ADD2]] - ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] - ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD10]](s32) - ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP6]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND3]] + ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32) + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[COPY8]] + ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD9]](s32), [[C]] + ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[ICMP3]] + ; MIPS32-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ICMP6]], [[AND4]] + ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD9]](s32) + ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[C]] + ; MIPS32-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C1]] + ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[AND5]] + ; MIPS32-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD11]](s32) + ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[COPY10]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD12]](s32), [[COPY10]] + ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD13]](s32), [[C]] + ; MIPS32-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[ICMP3]] + ; MIPS32-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ICMP8]], [[AND6]] + ; MIPS32-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD13]](s32) + ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[COPY11]], [[C]] + ; MIPS32-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C1]] + ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[AND7]] + ; MIPS32-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ADD15]](s32) + ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[COPY12]], [[COPY6]] + ; MIPS32-NEXT: [[ICMP10:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD16]](s32), [[COPY12]] + ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP11:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD17]](s32), [[C]] + ; MIPS32-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP11]], [[ICMP3]] + ; MIPS32-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ICMP10]], [[AND8]] + ; MIPS32-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ADD17]](s32) + ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[COPY13]], [[C]] + ; MIPS32-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C1]] + ; MIPS32-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[AND9]] + ; MIPS32-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[ADD19]](s32) ; MIPS32-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] ; MIPS32-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] ; MIPS32-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] @@ -311,17 +346,17 @@ body: | ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] ; MIPS32-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] ; MIPS32-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] - ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] - ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] - ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] - ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] - ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] - ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] - ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] + ; MIPS32-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] + ; MIPS32-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ADD20]], [[MUL8]] + ; MIPS32-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[MUL9]] + ; MIPS32-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[UMULH3]] + ; MIPS32-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[ADD23]], [[UMULH4]] + ; MIPS32-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH5]] + ; MIPS32-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ADD25]], [[COPY15]] ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) ; MIPS32-NEXT: $v1 = COPY [[COPY5]](s32) - ; MIPS32-NEXT: $a0 = COPY [[COPY10]](s32) - ; MIPS32-NEXT: $a1 = COPY [[ADD18]](s32) + ; MIPS32-NEXT: $a0 = COPY [[COPY14]](s32) + ; MIPS32-NEXT: $a1 = COPY [[ADD26]](s32) ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 @@ -361,6 +396,7 @@ body: | ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] @@ -368,27 +404,47 @@ body: | ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]] ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH]] - ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY4]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND1]] + ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]] ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]] ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]] - ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] - ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]] - ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) - ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[UMULH2]] - ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH1]] ; MIPS32-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32) - ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] - ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[ADD2]] - ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD5]](s32), [[COPY7]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD6]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[ICMP3]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP4]], [[AND2]] ; MIPS32-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) - ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND3]] + ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32) + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY6]] + ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[COPY8]] + ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD9]](s32), [[C]] + ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[ICMP3]] + ; MIPS32-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ICMP6]], [[AND4]] + ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD9]](s32) + ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[C]] + ; MIPS32-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C1]] + ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[AND5]] + ; MIPS32-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD11]](s32) ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]] - ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]] - ; MIPS32-NEXT: $v0 = COPY [[COPY8]](s32) - ; MIPS32-NEXT: $v1 = COPY [[ADD8]](s32) + ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[COPY11]] + ; MIPS32-NEXT: $v0 = COPY [[COPY10]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD12]](s32) ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index c8c66fabf202b..9ef2228c2424c 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -117,35 +117,64 @@ define i128 @mul_i128(i128 %a, i128 %b) { ; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: mul $2, $6, $14 -; MIPS32-NEXT: mul $3, $7, $14 -; MIPS32-NEXT: mul $4, $6, $13 +; MIPS32-NEXT: ori $5, $zero, 0 +; MIPS32-NEXT: mul $4, $7, $14 +; MIPS32-NEXT: mul $10, $6, $13 ; MIPS32-NEXT: multu $6, $14 -; MIPS32-NEXT: mfhi $5 -; MIPS32-NEXT: addu $3, $3, $4 +; MIPS32-NEXT: mfhi $3 +; MIPS32-NEXT: addu $4, $4, $10 +; MIPS32-NEXT: sltu $11, $4, $10 +; MIPS32-NEXT: addu $3, $4, $3 ; MIPS32-NEXT: sltu $4, $3, $4 -; MIPS32-NEXT: addu $3, $3, $5 -; MIPS32-NEXT: sltu $5, $3, $5 -; MIPS32-NEXT: addu $10, $4, $5 -; MIPS32-NEXT: mul $4, $8, $14 -; MIPS32-NEXT: mul $5, $7, $13 +; MIPS32-NEXT: addu $3, $3, $11 +; MIPS32-NEXT: sltiu $10, $3, 1 +; MIPS32-NEXT: and $10, $10, $11 +; MIPS32-NEXT: or $4, $4, $10 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: addu $4, $5, $4 +; MIPS32-NEXT: mul $25, $8, $14 +; MIPS32-NEXT: mul $15, $7, $13 ; MIPS32-NEXT: mul $24, $6, $12 ; MIPS32-NEXT: multu $7, $14 -; MIPS32-NEXT: mfhi $15 -; MIPS32-NEXT: multu $6, $13 ; MIPS32-NEXT: mfhi $11 -; MIPS32-NEXT: addu $4, $4, $5 -; MIPS32-NEXT: sltu $5, $4, $5 -; MIPS32-NEXT: addu $4, $4, $24 -; MIPS32-NEXT: sltu $24, $4, $24 +; MIPS32-NEXT: multu $6, $13 +; MIPS32-NEXT: mfhi $10 +; MIPS32-NEXT: addu $25, $25, $15 +; MIPS32-NEXT: sltu $15, $25, $15 +; MIPS32-NEXT: addu $24, $25, $24 +; MIPS32-NEXT: sltu $25, $24, $25 +; MIPS32-NEXT: addu $24, $24, $15 +; MIPS32-NEXT: sltiu $gp, $24, 1 +; MIPS32-NEXT: and $gp, $gp, $15 +; MIPS32-NEXT: or $25, $25, $gp +; MIPS32-NEXT: andi $25, $25, 1 +; MIPS32-NEXT: addu $5, $5, $25 +; MIPS32-NEXT: addu $11, $24, $11 +; MIPS32-NEXT: sltu $24, $11, $24 +; MIPS32-NEXT: addu $11, $11, $15 +; MIPS32-NEXT: sltiu $25, $11, 1 +; MIPS32-NEXT: and $25, $25, $15 +; MIPS32-NEXT: or $24, $24, $25 +; MIPS32-NEXT: addiu $5, $5, 0 +; MIPS32-NEXT: andi $24, $24, 1 ; MIPS32-NEXT: addu $5, $5, $24 -; MIPS32-NEXT: addu $4, $4, $15 -; MIPS32-NEXT: sltu $15, $4, $15 -; MIPS32-NEXT: addu $5, $5, $15 -; MIPS32-NEXT: addu $4, $4, $11 -; MIPS32-NEXT: sltu $11, $4, $11 +; MIPS32-NEXT: addu $10, $11, $10 +; MIPS32-NEXT: sltu $11, $10, $11 +; MIPS32-NEXT: addu $10, $10, $15 +; MIPS32-NEXT: sltiu $24, $10, 1 +; MIPS32-NEXT: and $24, $24, $15 +; MIPS32-NEXT: or $11, $11, $24 +; MIPS32-NEXT: addiu $5, $5, 0 +; MIPS32-NEXT: andi $11, $11, 1 ; MIPS32-NEXT: addu $5, $5, $11 -; MIPS32-NEXT: addu $4, $4, $10 +; MIPS32-NEXT: addu $4, $10, $4 ; MIPS32-NEXT: sltu $10, $4, $10 +; MIPS32-NEXT: addu $4, $4, $15 +; MIPS32-NEXT: sltiu $11, $4, 1 +; MIPS32-NEXT: and $11, $11, $15 +; MIPS32-NEXT: or $10, $10, $11 +; MIPS32-NEXT: addiu $5, $5, 0 +; MIPS32-NEXT: andi $10, $10, 1 ; MIPS32-NEXT: addu $5, $5, $10 ; MIPS32-NEXT: mul $1, $1, $14 ; MIPS32-NEXT: mul $11, $8, $13 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir index ec2dc568a5ec3..d63d03e185b82 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir @@ -164,27 +164,39 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMULH]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[OR]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL %lo1, %hi2 ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH %mid1, %lo2 ; CHECK-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH %lo1, %mid2 - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[MUL5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL4]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[UMULH1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD5]](s32) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[ADD2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[MUL5]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH1]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP3]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD8]](s32) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[UMULH2]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP3]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD10]](s32) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY2]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ADD11]], [[ICMP3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD12]](s32) ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) ; CHECK-NEXT: $x11 = COPY [[COPY1]](s32) - ; CHECK-NEXT: $x12 = COPY [[COPY6]](s32) + ; CHECK-NEXT: $x12 = COPY [[COPY7]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12 %lo1:_(s32) = COPY $x10 %mid1:_(s32) = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir index 39d9c5b7dfd1e..931e3f5c279af 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir @@ -196,27 +196,45 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), [[MUL2]] ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[ADD]](s64) ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[UMULH]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[UMULH]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ADD1]](s64) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[COPY]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ADD2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[C1]], [[AND1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ADD3]](s64) ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s64) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s64) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s64) = G_MUL %lo1, %hi2 ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(s64) = G_UMULH %mid1, %lo2 ; CHECK-NEXT: [[UMULH2:%[0-9]+]]:_(s64) = G_UMULH %lo1, %mid2 - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[MUL3]], [[MUL4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ADD3]](s64) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[COPY2]], [[MUL5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[MUL3]], [[MUL4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD4]](s64), [[MUL4]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ADD4]](s64) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s64) = G_ADD [[COPY3]], [[UMULH1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[ADD5]](s64) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s64) = G_ADD [[COPY4]], [[UMULH2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[ADD6]](s64) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s64) = G_ADD [[COPY5]], [[ADD2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[ADD7]](s64) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s64) = G_ADD [[COPY3]], [[MUL5]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s64) = G_ADD [[ADD5]], [[ICMP3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[ADD6]](s64) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s64) = G_ADD [[COPY4]], [[UMULH1]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s64) = G_ADD [[ADD7]], [[ICMP3]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[ADD8]](s64) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s64) = G_ADD [[COPY5]], [[UMULH2]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s64) = G_ADD [[ADD9]], [[ICMP3]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[ADD10]](s64) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s64) = G_ADD [[COPY6]], [[COPY2]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s64) = G_ADD [[ADD11]], [[ICMP3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY [[ADD12]](s64) ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) ; CHECK-NEXT: $x11 = COPY [[COPY1]](s64) - ; CHECK-NEXT: $x12 = COPY [[COPY6]](s64) + ; CHECK-NEXT: $x12 = COPY [[COPY7]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12 %lo1:_(s64) = COPY $x10 %mid1:_(s64) = COPY $x11