Skip to content

[AMDGPU][MC] Disallow op_sel in some VOP3P dot instructions #100485

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -405,16 +405,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",

let OtherPredicates = [HasDot7Insts] in {
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
} // End OtherPredicates = [HasDot7Insts]

let OtherPredicates = [HasDot1Insts] in {
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
} // End OtherPredicates = [HasDot1Insts]

def DOT2_BF16_Profile
Expand All @@ -433,7 +433,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,

multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
let IsDOT = 1 in
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
null_frag, 1>;
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1454,12 +1454,13 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
bit IsMAI = MAI;
}

def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>;
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;

// Packed is misleading, but it enables the appropriate op_sel
// modifiers.
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write
# GCN: V_MFMA
Expand Down Expand Up @@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
# GCN: V_DOT
Expand All @@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
# GCN: V_DOT
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
# GCN: V_MFMA
Expand Down Expand Up @@ -1265,7 +1265,7 @@ name: dot_write_vgpr_different_dot_read_srcc
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
# GCN: V_DOT
Expand All @@ -1275,7 +1275,7 @@ name: dot_write_vgpr_different_dot_write
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
# GCN: V_DOT
Expand Down
192 changes: 0 additions & 192 deletions llvm/test/MC/AMDGPU/dl-insts.s
Original file line number Diff line number Diff line change
Expand Up @@ -536,198 +536,6 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
// CHECK: encoding: [0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
// CHECK: encoding: [0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
// CHECK: encoding: [0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
// CHECK: encoding: [0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
// CHECK: encoding: [0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
// CHECK: encoding: [0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
// CHECK: encoding: [0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
// CHECK: encoding: [0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
// CHECK: encoding: [0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
// CHECK: encoding: [0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
// CHECK: encoding: [0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
// CHECK: encoding: [0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]

//
// Test clamp.
Expand Down
Loading
Loading