Skip to content

[AMDGPU] Emit AMDHSA kernel descriptors to .amdhsa.kd section #122930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,12 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);

Streamer.pushSection();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This functions seems to be called during parser time. Otherwise, if it's only called at finish time`, pushSection/popSection would be unnecessary

Streamer.switchSection(Context.getELFSection(
".amdhsa.kd", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_GNU_RETAIN));
Streamer.emitLabel(KernelDescriptorSymbol);
Streamer.emitValueToAlignment(Align(alignof(amdhsa::kernel_descriptor_t)), 0,
1, 0);
Streamer.emitValue(
KernelDescriptor.group_segment_fixed_size,
sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
Expand Down Expand Up @@ -1020,4 +1025,5 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
Streamer.emitInt8(0u);
Streamer.popSection();
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .amdhsa.kd - | FileCheck --check-prefix=OBJDUMP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s

; OBJDUMP: Contents of section .rodata:
; OBJDUMP: Contents of section .amdhsa.kd:
; OBJDUMP-NEXT: 0000 00000000 00000000 10010000 00000000 ................
; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ................
; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ................
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/code-object-v3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
; OSABI-AMDHSA-ELF: .text PROGBITS {{[0-9]+}} {{[0-9]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
; OSABI-AMDHSA-ELF: .rodata PROGBITS {{[0-9]+}} {{[0-9]+}} {{[0-9a-f]+}} {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64

; OSABI-AMDHSA-ELF: Relocation section '.rela.rodata' at offset
; OSABI-AMDHSA-ELF: Relocation section '.rela.amdhsa.kd' at offset
; OSABI-AMDHSA-ELF: R_AMDGPU_REL64 0000000000000000 fadd + 10
; OSABI-AMDHSA-ELF: R_AMDGPU_REL64 0000000000000100 fsub + 10
; OSABI-AMDHSA-ELF: R_AMDGPU_REL64 0000000000000200 empty + 10
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-objdump -s -j .amdhsa.kd - | FileCheck --check-prefixes=OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s

; TODO: Update to check for granulated sgpr count directive once one is added.
Expand All @@ -10,7 +10,7 @@ define amdgpu_kernel void @kern() #0 {
; ASM: .amdhsa_reserve_xnack_mask 1

; Verify that an extra SGPR block is reserved with XNACK "any" tid setting.
; OBJ: Contents of section .rodata:
; OBJ: Contents of section .amdhsa.kd:
; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-objdump -s -j .amdhsa.kd - | FileCheck --check-prefixes=OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s

; TODO: Update to check for granulated sgpr count directive once one is added.
Expand All @@ -10,7 +10,7 @@ define amdgpu_kernel void @kern() #0 {
; ASM: .amdhsa_reserve_xnack_mask 0

; Verify that an extra SGPR block is not reserved with XNACK "off" tid setting.
; OBJ: Contents of section .rodata:
; OBJ: Contents of section .amdhsa.kd:
; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-objdump -s -j .amdhsa.kd - | FileCheck --check-prefixes=OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s

; TODO: Update to check for granulated sgpr count directive once one is added.
Expand All @@ -10,7 +10,7 @@ define amdgpu_kernel void @kern() #0 {
; ASM: .amdhsa_reserve_xnack_mask 1

; Verify that an extra SGPR block is reserved with XNACK "on" tid setting.
; OBJ: Contents of section .rodata:
; OBJ: Contents of section .amdhsa.kd:
; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/amdhsa-kd-kernarg-preload.s
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s -o - | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s -o - | llvm-objdump -s -j .amdhsa.kd - | FileCheck --check-prefix=OBJDUMP %s

.amdgcn_target "amdgcn-amd-amdhsa--gfx940"

.rodata

// Account for preload kernarg SGPRs in KD field GRANULATED_WAVEFRONT_SGPR_COUNT.

// OBJDUMP: Contents of section .rodata:
// OBJDUMP: Contents of section .amdhsa.kd:
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 ................
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ................
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ................
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// OBJDUMP: 0000 00000000 0f000000 00000000 00000000

Expand Down
17 changes: 9 additions & 8 deletions llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: .amdhsa.kd PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} AR {{[0-9]+}} {{[0-9]+}} 8

// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: Relocation section '.rela.amdhsa.kd' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
Expand All @@ -18,12 +19,12 @@
// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 4 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 4 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 4 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 4 disabled_user_sgpr.kd

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
// minimal
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sgpr-init-bug-v3.s
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// Check that SGPR init bug on gfx803 is corrected by the assembler, setting
// GRANULATED_WAVEFRONT_SGPR_COUNT to 11.

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
// When going from asm -> obj, the expressions should get resolved (through fixups),

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
// When going from asm -> obj, the expressions should get resolved (through fixups),

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
// When going from asm -> obj, the expressions should get resolved (through fixups),

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
// When going from asm -> obj, the expressions should get resolved (through fixups),

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefix=ASM %s

// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
// When going from asm -> obj, the expressions should get resolved (through fixups),

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
// When going from asm -> obj, the expressions should get resolved (through fixups),

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// expr_defined_later
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-tg-split.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
Expand Down
17 changes: 9 additions & 8 deletions llvm/test/MC/AMDGPU/hsa-v4.s
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: .amdhsa.kd PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} AR {{[0-9]+}} {{[0-9]+}} 8

// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: Relocation section '.rela.amdhsa.kd' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
Expand All @@ -18,12 +19,12 @@
// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 4 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 4 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 4 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 4 disabled_user_sgpr.kd

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
// minimal
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
Expand Down
19 changes: 10 additions & 9 deletions llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// RUN: llvm-objdump -s -j .amdhsa.kd %t | FileCheck --check-prefix=OBJDUMP %s

// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: .amdhsa.kd PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} AR {{[0-9]+}} {{[0-9]+}} 8

// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: Relocation section '.rela.amdhsa.kd' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
Expand All @@ -23,12 +24,12 @@
// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 4 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 4 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 4 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 4 disabled_user_sgpr.kd

// OBJDUMP: Contents of section .rodata
// OBJDUMP: Contents of section .amdhsa.kd
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
// minimal
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
Expand Down
Loading
Loading