Skip to content

AMDGPU instruction selection failure #67574

Closed
@upsj

Description

@upsj

Compiling the following file

#include <hip/hip_runtime.h>


__global__ void block_transpose(const int* __restrict__ blocks,
                                const int* __restrict__ block_ptrs,
                                int* __restrict__ out_blocks)
{
    const auto block_size = block_ptrs[1] - block_ptrs[0];

    const auto block_stride = 1;
    const auto rank =
        unsigned(threadIdx.x +
                 blockDim.x * (threadIdx.y + blockDim.y * threadIdx.z)) %
        2;
    for (int i = 0; i < block_size; ++i) {
        auto val = blocks[i * block_stride + rank];
        out_blocks[i + rank * block_stride] = val;
    }
}


__global__ void block_transpose(const short* __restrict__ blocks,
                                const int* __restrict__ block_ptrs,
                                short* __restrict__ out_blocks)
{
    const auto block_size = block_ptrs[1] - block_ptrs[0];

    const auto block_stride = 1;
    const auto rank =
        unsigned(threadIdx.x +
                 blockDim.x * (threadIdx.y + blockDim.y * threadIdx.z)) %
        2;
    for (int i = 0; i < block_size; ++i) {
        auto val = blocks[i * block_stride + rank];
        out_blocks[i + rank * block_stride] = val;
    }
}

using clang++ -x hip -O3 -c file.cpp causes a fatal error when lowering to device code:

fatal error: error in backend: Cannot select: 0xc0314f0: i1 = mul # D:1 0xba424c0, 0xc0315d0
  0xba424c0: i1 = truncate 0xc031a30
    0xc031a30: i32,ch = load<(dereferenceable invariant load (s32) from %ir.13, addrspace 4)> 0xca6fa58, 0xba41ea0, undef:i64
      0xba41ea0: i64 = add 0xba41d50, Constant:i64<4>
        0xba41d50: i64 = AssertAlign<4> 0xba426f0
          0xba426f0: i64,ch = CopyFromReg 0xca6fa58, Register:i64 %13
            0xc031640: i64 = Register %13
        0xc031020: i64 = Constant<4>
      0xc030f40: i64 = undef
  0xc0315d0: i1 = truncate # D:1 0xc031b10
    0xc031b10: i32 = add # D:1 0xc031720, 0xba42680
      0xc031720: i32 = MUL_U24 # D:1 0xba425a0, 0xc031db0
        0xba425a0: i32 = AssertZext # D:1 0xba42530, ValueType:ch:i10
          0xba42530: i32,ch = CopyFromReg # D:1 0xca6fa58, Register:i32 %11
            0xc031b80: i32 = Register %11
        0xc031db0: i32 = srl 0xc031a30, Constant:i32<16>
          0xc031a30: i32,ch = load<(dereferenceable invariant load (s32) from %ir.13, addrspace 4)> 0xca6fa58, 0xba41ea0, undef:i64
            0xba41ea0: i64 = add 0xba41d50, Constant:i64<4>
              0xba41d50: i64 = AssertAlign<4> 0xba426f0
                0xba426f0: i64,ch = CopyFromReg 0xca6fa58, Register:i64 %13
                  0xc031640: i64 = Register %13
              0xc031020: i64 = Constant<4>
            0xc030f40: i64 = undef
          0xc031aa0: i32 = Constant<16>
      0xba42680: i32 = AssertZext # D:1 0xba42610, ValueType:ch:i10
        0xba42610: i32,ch = CopyFromReg # D:1 0xca6fa58, Register:i32 %10
          0xc0313a0: i32 = Register %10
In function: _Z15block_transposePKiS0_Pi
clang-16: error: clang frontend command failed with exit code 70 (use -v to see invocation)
clang version 16.0.0 (https://github.com/llvm/llvm-project dee4bc4a4ecc56623d511ea571355d1e1ad02159)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/tribizel/llvm-project/build/bin
clang-16: note: diagnostic msg: Error generating preprocessed source(s).

When compiling only the short overload, the IR is a bit more manageable

fatal error: error in backend: Cannot select: 0x9cd80f0: i1 = mul # D:1 0x9cd8320, 0x9cd8550
  0x9cd8320: i1 = truncate 0xa608960
    0xa608960: i16,ch = CopyFromReg 0xacb1ac8, Register:i16 %2
      0xa608810: i16 = Register %2
  0x9cd8550: i1 = truncate # D:1 0xa608c00
    0xa608c00: i32 = AssertZext # D:1 0x9cd88d0, ValueType:ch:i27
      0x9cd88d0: i32,ch = CopyFromReg # D:1 0xacb1ac8, Register:i32 %3
        0x9cd8470: i32 = Register %3
In function: _Z15block_transposePKsPKiPs

I bisected this down to dee4bc4 (https://reviews.llvm.org/D134596)

cc @changpeng

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions