Skip to content

Commit c556e40

Browse files
committed
[RFC][WIP][AMDGPU] Use bf16 instead of i16 for bfloat
Currently it looks like we generally use `i16` to represent `bf16` in those tablegen files. I'm not sure of the reason behind it. My wild guess is the type `bf16` was not available when we enabled the support. This patch is trying to use `bf16` directly in those tablegen files, aiming at fixing #79369. Of course for #79369 a workaround can be to treat all `INT16` variants as `BFloat` in `getOpFltSemantics`, but it doesn't look good IMHO. Since I'm fairly new to AMDGPU backend, I'd appreciate it if you can point out where I don't understand correctly.
1 parent ac97562 commit c556e40

16 files changed

+292
-56
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -5912,8 +5912,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59125912
}
59135913
}
59145914

5915-
assert(ArgValue->getType()->canLosslesslyBitCastTo(PTy) &&
5916-
"Must be able to losslessly bit cast to param");
59175915
// Cast vector type (e.g., v256i32) to x86_amx, this only happen
59185916
// in amx intrinsics.
59195917
if (PTy->isX86_AMXTy())
@@ -5943,8 +5941,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59435941
}
59445942
}
59455943

5946-
assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
5947-
"Must be able to losslessly bit cast result type");
59485944
// Cast x86_amx to vector type (e.g., v256i32), this only happen
59495945
// in amx intrinsics.
59505946
if (V->getType()->isX86_AMXTy())

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+4-4
Original file line numberDiff line numberDiff line change
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
28192819
def int_amdgcn_fdot2_bf16_bf16 :
28202820
ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
28212821
DefaultAttrsIntrinsic<
2822-
[llvm_i16_ty], // %r
2822+
[llvm_bfloat_ty], // %r
28232823
[
2824-
llvm_v2i16_ty, // %a
2825-
llvm_v2i16_ty, // %b
2826-
llvm_i16_ty // %c
2824+
llvm_v2bf16_ty, // %a
2825+
llvm_v2bf16_ty, // %b
2826+
llvm_bfloat_ty // %c
28272827
],
28282828
[IntrNoMem, IntrSpeculatable]
28292829
>;

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User &U,
15621562

15631563
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
15641564
MachineIRBuilder &MIRBuilder) {
1565-
if (U.getType()->getScalarType()->isBFloatTy() ||
1566-
U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
1565+
if (Opcode != TargetOpcode::G_BITCAST &&
1566+
(U.getType()->getScalarType()->isBFloatTy() ||
1567+
U.getOperand(0)->getType()->getScalarType()->isBFloatTy()))
15671568
return false;
15681569
Register Op = getOrCreateVReg(*U.getOperand(0));
15691570
Register Res = getOrCreateVReg(U);

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+71
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
475475

476476
bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
477477

478+
bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
479+
478480
bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
479481

480482
bool isSSrcV2F16() const {
@@ -541,22 +543,40 @@ class AMDGPUOperand : public MCParsedAsmOperand {
541543
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
542544
}
543545

546+
bool isVCSrcTBF16() const {
547+
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
548+
}
549+
544550
bool isVCSrcTF16() const {
545551
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
546552
}
547553

554+
bool isVCSrcTBF16_Lo128() const {
555+
return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
556+
}
557+
548558
bool isVCSrcTF16_Lo128() const {
549559
return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
550560
}
551561

562+
bool isVCSrcFake16BF16_Lo128() const {
563+
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
564+
}
565+
552566
bool isVCSrcFake16F16_Lo128() const {
553567
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
554568
}
555569

570+
bool isVCSrc_bf16() const {
571+
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
572+
}
573+
556574
bool isVCSrc_f16() const {
557575
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
558576
}
559577

578+
bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
579+
560580
bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
561581

562582
bool isVSrc_b32() const {
@@ -597,18 +617,34 @@ class AMDGPUOperand : public MCParsedAsmOperand {
597617

598618
bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
599619

620+
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
621+
600622
bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
601623

624+
bool isVSrcT_bf16_Lo128() const {
625+
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
626+
}
627+
602628
bool isVSrcT_f16_Lo128() const {
603629
return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
604630
}
605631

632+
bool isVSrcFake16_bf16_Lo128() const {
633+
return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
634+
}
635+
606636
bool isVSrcFake16_f16_Lo128() const {
607637
return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
608638
}
609639

640+
bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
641+
610642
bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
611643

644+
bool isVSrc_v2bf16() const {
645+
return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
646+
}
647+
612648
bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
613649

614650
bool isVISrcB32() const {
@@ -635,6 +671,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
635671
return isVISrcF16() || isVISrcB32();
636672
}
637673

674+
bool isVISrc_64_bf16() const {
675+
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
676+
}
677+
638678
bool isVISrc_64_f16() const {
639679
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
640680
}
@@ -803,6 +843,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
803843
return isAISrc_128F16() || isAISrc_128_b32();
804844
}
805845

846+
bool isVISrc_128_bf16() const {
847+
return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
848+
}
849+
806850
bool isVISrc_128_f16() const {
807851
return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
808852
}
@@ -1890,6 +1934,14 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
18901934
case AMDGPU::OPERAND_REG_IMM_V2FP16:
18911935
case AMDGPU::OPERAND_KIMM16:
18921936
return &APFloat::IEEEhalf();
1937+
case AMDGPU::OPERAND_REG_IMM_BF16:
1938+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1939+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1940+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1941+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1942+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1943+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
1944+
return &APFloat::BFloat();
18931945
default:
18941946
llvm_unreachable("unsupported fp type");
18951947
}
@@ -2186,17 +2238,24 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
21862238
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
21872239
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
21882240
case AMDGPU::OPERAND_REG_IMM_INT16:
2241+
case AMDGPU::OPERAND_REG_IMM_BF16:
21892242
case AMDGPU::OPERAND_REG_IMM_FP16:
2243+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
21902244
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
21912245
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2246+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
21922247
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
21932248
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2249+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
21942250
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
21952251
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2252+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
21962253
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
21972254
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2255+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
21982256
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
21992257
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2258+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
22002259
case AMDGPU::OPERAND_REG_IMM_V2FP16:
22012260
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
22022261
case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -2240,6 +2299,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22402299
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
22412300
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
22422301
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2302+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
22432303
case AMDGPU::OPERAND_REG_IMM_V2FP16:
22442304
case AMDGPU::OPERAND_REG_IMM_V2FP32:
22452305
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
@@ -2277,11 +2337,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22772337
return;
22782338

22792339
case AMDGPU::OPERAND_REG_IMM_INT16:
2340+
case AMDGPU::OPERAND_REG_IMM_BF16:
22802341
case AMDGPU::OPERAND_REG_IMM_FP16:
2342+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
22812343
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
22822344
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2345+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
22832346
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
22842347
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2348+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
22852349
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
22862350
if (isSafeTruncation(Val, 16) &&
22872351
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
@@ -2296,8 +2360,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22962360
return;
22972361

22982362
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2363+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
22992364
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
23002365
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2366+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
23012367
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
23022368
assert(isSafeTruncation(Val, 16));
23032369
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
@@ -3429,6 +3495,11 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
34293495
OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
34303496
return AMDGPU::isInlinableLiteralV2F16(Val);
34313497

3498+
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3499+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 ||
3500+
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3501+
return AMDGPU::isInlinableLiteralV2BF16(Val);
3502+
34323503
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
34333504
}
34343505
default:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

+59
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,49 @@ static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
488488
return true;
489489
}
490490

491+
static bool printImmediateBFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
492+
raw_ostream &O) {
493+
if (Imm == 0x3F80)
494+
O << "1.0";
495+
else if (Imm == 0xBF80)
496+
O << "-1.0";
497+
else if (Imm == 0x3F00)
498+
O << "0.5";
499+
else if (Imm == 0xBF00)
500+
O << "-0.5";
501+
else if (Imm == 0x4000)
502+
O << "2.0";
503+
else if (Imm == 0xC000)
504+
O << "-2.0";
505+
else if (Imm == 0x4080)
506+
O << "4.0";
507+
else if (Imm == 0xC080)
508+
O << "-4.0";
509+
else if (Imm == 0x3E22 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
510+
O << "0.15915494";
511+
else
512+
return false;
513+
514+
return true;
515+
}
516+
517+
void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
518+
const MCSubtargetInfo &STI,
519+
raw_ostream &O) {
520+
int16_t SImm = static_cast<int16_t>(Imm);
521+
if (isInlinableIntLiteral(SImm)) {
522+
O << SImm;
523+
return;
524+
}
525+
526+
uint16_t HImm = static_cast<uint16_t>(Imm);
527+
if (printImmediateBFloat16(HImm, STI, O))
528+
return;
529+
530+
uint64_t Imm16 = static_cast<uint16_t>(Imm);
531+
O << formatHex(Imm16);
532+
}
533+
491534
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
492535
const MCSubtargetInfo &STI,
493536
raw_ostream &O) {
@@ -528,6 +571,13 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
528571
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
529572
return;
530573
break;
574+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
575+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
576+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
577+
if (isUInt<16>(Imm) &&
578+
printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
579+
return;
580+
break;
531581
default:
532582
llvm_unreachable("bad operand type");
533583
}
@@ -799,11 +849,20 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
799849
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
800850
printImmediate16(Op.getImm(), STI, O);
801851
break;
852+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
853+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
854+
case AMDGPU::OPERAND_REG_IMM_BF16:
855+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
856+
printImmediateBF16(Op.getImm(), STI, O);
857+
break;
802858
case AMDGPU::OPERAND_REG_IMM_V2INT16:
859+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
803860
case AMDGPU::OPERAND_REG_IMM_V2FP16:
804861
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
805862
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
863+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
806864
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
865+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
807866
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
808867
printImmediateV216(Op.getImm(), OpTy, STI, O);
809868
break;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8888
raw_ostream &O);
8989
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
9090
raw_ostream &O);
91+
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
92+
raw_ostream &O);
9193
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9294
const MCSubtargetInfo &STI, raw_ostream &O);
9395
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,13 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
276276
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
277277
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
278278
return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
279+
case AMDGPU::OPERAND_REG_IMM_BF16:
279280
case AMDGPU::OPERAND_REG_IMM_FP16:
281+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
280282
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
283+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
281284
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
285+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
282286
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
283287
// FIXME Is this correct? What do inline immediates do on SI for f16 src
284288
// which does not have f16 support?
@@ -288,8 +292,11 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
288292
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
289293
return AMDGPU::getInlineEncodingV2I16(static_cast<uint32_t>(Imm))
290294
.value_or(255);
295+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
291296
case AMDGPU::OPERAND_REG_IMM_V2FP16:
297+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
292298
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
299+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
293300
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
294301
return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm))
295302
.value_or(255);

llvm/lib/Target/AMDGPU/SIDefines.h

+7
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,12 @@ enum OperandType : unsigned {
202202
OPERAND_REG_IMM_INT16,
203203
OPERAND_REG_IMM_FP32,
204204
OPERAND_REG_IMM_FP64,
205+
OPERAND_REG_IMM_BF16,
205206
OPERAND_REG_IMM_FP16,
207+
OPERAND_REG_IMM_BF16_DEFERRED,
206208
OPERAND_REG_IMM_FP16_DEFERRED,
207209
OPERAND_REG_IMM_FP32_DEFERRED,
210+
OPERAND_REG_IMM_V2BF16,
208211
OPERAND_REG_IMM_V2FP16,
209212
OPERAND_REG_IMM_V2INT16,
210213
OPERAND_REG_IMM_V2INT32,
@@ -214,10 +217,12 @@ enum OperandType : unsigned {
214217
OPERAND_REG_INLINE_C_INT16,
215218
OPERAND_REG_INLINE_C_INT32,
216219
OPERAND_REG_INLINE_C_INT64,
220+
OPERAND_REG_INLINE_C_BF16,
217221
OPERAND_REG_INLINE_C_FP16,
218222
OPERAND_REG_INLINE_C_FP32,
219223
OPERAND_REG_INLINE_C_FP64,
220224
OPERAND_REG_INLINE_C_V2INT16,
225+
OPERAND_REG_INLINE_C_V2BF16,
221226
OPERAND_REG_INLINE_C_V2FP16,
222227
OPERAND_REG_INLINE_C_V2INT32,
223228
OPERAND_REG_INLINE_C_V2FP32,
@@ -232,10 +237,12 @@ enum OperandType : unsigned {
232237
/// Operands with an AccVGPR register or inline constant
233238
OPERAND_REG_INLINE_AC_INT16,
234239
OPERAND_REG_INLINE_AC_INT32,
240+
OPERAND_REG_INLINE_AC_BF16,
235241
OPERAND_REG_INLINE_AC_FP16,
236242
OPERAND_REG_INLINE_AC_FP32,
237243
OPERAND_REG_INLINE_AC_FP64,
238244
OPERAND_REG_INLINE_AC_V2INT16,
245+
OPERAND_REG_INLINE_AC_V2BF16,
239246
OPERAND_REG_INLINE_AC_V2FP16,
240247
OPERAND_REG_INLINE_AC_V2INT32,
241248
OPERAND_REG_INLINE_AC_V2FP32,

0 commit comments

Comments
 (0)