diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b20383e72e66a..2c98b2f2e26b1 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -19,7 +19,7 @@ include "arm_sve_sme_incl.td" // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -33,7 +33,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -247,10 +247,10 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { } // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; @@ -265,7 +265,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } multiclass StructLoad { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -314,11 +314,11 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; // Load one vector (vector base + scalar offset) - def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; - def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; // Load one vector (scalar base + vector offset) - def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; + def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; // Load N-element structure into N vectors (scalar base) defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">; @@ -341,7 +341,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -350,7 +350,7 @@ def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, Verify def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -435,7 +435,7 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "v } // let SVETargetGuard = "sve" multiclass StructStore { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -451,10 +451,10 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">; defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; @@ -470,12 +470,12 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPpld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; // Store one vector (vector base + scalar offset) - def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; - def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; // Store one vector (scalar base + vector offset) - def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; - def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; // Store N vectors into N-element structure (scalar base) defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">; @@ -2042,20 +2042,20 @@ def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNo } multiclass MultiVecLoad { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; @@ -2067,20 +2067,20 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { } multiclass MultiVecStore { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 361e4c4bf2e2e..b04becf006569 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10199,6 +10199,7 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { default: llvm_unreachable("Invalid SVETypeFlag!"); + case SVETypeFlags::EltTyMFloat8: case SVETypeFlags::EltTyInt8: return Builder.getInt8Ty(); case SVETypeFlags::EltTyInt16: @@ -10627,7 +10628,7 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is returned may be different from the @@ -10674,7 +10675,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is stored may be different from the diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 405242e97e75c..bd625052cb5ed 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -107,6 +107,9 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { MT->getNumRows() * MT->getNumColumns()); } + if (T->isMFloat8Type()) + return llvm::Type::getInt8Ty(getLLVMContext()); + llvm::Type *R = ConvertType(T); // Check for the boolean vector case. diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c index 2f3994df03784..0b355db4b2073 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c @@ -49,8 +49,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c index 425e6a57ffe3c..0daeeec9e7dd7 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c @@ -49,8 +49,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalb_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalb_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalt_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalt_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -169,8 +169,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -179,8 +179,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -211,8 +211,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -221,8 +221,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -253,8 +253,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -263,8 +263,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -295,8 +295,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -305,8 +305,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c index 276ef64736bc3..40dcd65f6c609 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c @@ -205,6 +205,21 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, [[PG:%.*]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svld1_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -476,6 +491,29 @@ svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svmfloat8_t test_svld1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,,)(pg, base, vnum); +} + #ifndef __ARM_FEATURE_SME // CHECK-LABEL: @test_svld1_gather_u32base_s32( diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c index 3097cb9cbcaab..abe1c87b6f2c3 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c @@ -206,6 +206,21 @@ svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld2_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld2_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld2_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld2_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld2_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld2_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , } [[TMP1]] +// +svmfloat8x2_t test_svld2_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c index 2deb5a1d4930c..5ff7ad9de483b 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c @@ -205,6 +205,21 @@ svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld3_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld3_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] +// +svmfloat8x3_t test_svld3_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld3_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -409,3 +424,20 @@ svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld3_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld3_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] +// +svmfloat8x3_t test_svld3_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c index 30796a4f46a72..650fd5986be27 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c @@ -205,6 +205,21 @@ svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld4_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld4_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld4_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld4_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -409,3 +424,20 @@ svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld4_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld4_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] +// +svmfloat8x4_t test_svld4_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c index d343c124fe6a7..b96bf0cb23d12 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c @@ -206,6 +206,21 @@ svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svldnt1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svldnt1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svldnt1_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svldnt1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svldnt1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c index 29afdaf3eb0c7..21350007da86f 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c @@ -205,6 +205,21 @@ void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[PG:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst1_mf8u10__SVBool_tPu6__mfp8u13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[PG:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8(svbool_t pg, mfloat8_t *base, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -476,6 +491,29 @@ void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data); } +// CHECK-LABEL: @test_svst1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[TMP2]], i32 1, [[PG:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_mf8u10__SVBool_tPu6__mfp8lu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[TMP2]], i32 1, [[PG:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8,,)(pg, base, vnum, data); +} + #ifndef __ARM_FEATURE_SME // CHECK-LABEL: @test_svst1_scatter_u32base_s32( diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c index d1511b4c363d0..9e73e4464c6f9 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c @@ -293,6 +293,29 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) MODE_ATTR return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst2_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst2_mf8u10__SVBool_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst2_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 @@ -585,3 +608,28 @@ void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2 { return SVE_ACLE_FUNC(svst2_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst2_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c index 4198a325f5fb6..b693b693b1ebb 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c @@ -337,6 +337,33 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) MODE_ATTR return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst3_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst3_mf8u10__SVBool_tPu6__mfp813svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst3_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 @@ -673,3 +700,32 @@ void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3 { return SVE_ACLE_FUNC(svst3_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst3_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c index 160a21d93e416..f8c3b60682573 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c @@ -381,6 +381,37 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) MODE_ATTR return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst4_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst4_mf8u10__SVBool_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst4_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 @@ -761,3 +792,36 @@ void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4 { return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst4_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c index 5e0869557c8d7..f739ea5dca641 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c @@ -206,6 +206,21 @@ void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svstnt1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svstnt1_mf8u10__SVBool_tPu6__mfp8u13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8(svbool_t pg, mfloat8_t *base, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svstnt1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64 { return SVE_ACLE_FUNC(svstnt1_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svstnt1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_mf8u10__SVBool_tPu6__mfp8lu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c index 93cb653032df7..ee5c2c592c61d 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -309,6 +309,21 @@ svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svld1,_f64,_x2,)(pn, base); } +// CHECK-LABEL: @test_svld1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x2u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,_x2,)(pn, base); +} + // CHECK-LABEL: @test_svld1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -354,6 +369,20 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svld1,_f64,_x4,)(pn, base); } +// CHECK-LABEL: @test_svld1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x4u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,_x4,)(pn, base); +} // == VNUM variants == @@ -795,6 +824,29 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ return SVE_ACLE_FUNC(svld1_vnum,_f64,_x2,)(pn, base, vnum); } +// CHECK-LABEL: @test_svld1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svld1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x2,)(pn, base, vnum); +} + // CHECK-LABEL: @test_svld1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -863,3 +915,26 @@ svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_ { return SVE_ACLE_FUNC(svld1_vnum,_f64,_x4,)(pn, base, vnum); } + +// CHECK-LABEL: @test_svld1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svld1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 8254c6aec5dc1..692af131e69de 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -307,6 +307,21 @@ svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svldnt1,_f64,_x2,)(pn, base); } +// CHECK-LABEL: @test_svldnt1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x2u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svldnt1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,_x2,)(pn, base); +} + // CHECK-LABEL: @test_svldnt1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -352,6 +367,20 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svldnt1,_f64,_x4,)(pn, base); } +// CHECK-LABEL: @test_svldnt1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x4u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svldnt1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,_x4,)(pn, base); +} // == VNUM variants == @@ -793,6 +822,29 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x2,)(pn, base, vnum); } +// CHECK-LABEL: @test_svldnt1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svldnt1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x2,)(pn, base, vnum); +} + // CHECK-LABEL: @test_svldnt1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -861,3 +913,26 @@ svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int6 { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x4,)(pn, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svldnt1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c index 233c9b29e707a..7adb3d4940e7f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c @@ -214,6 +214,21 @@ svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld2q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld2q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld2q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld2q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -509,6 +524,29 @@ svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld2q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld2q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svld2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_mf8,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svld3q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) @@ -709,6 +747,21 @@ svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld3q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld3q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] +// +svmfloat8x3_t test_svld3q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld3q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1005,6 +1058,29 @@ svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld3q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld3q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] +// +svmfloat8x3_t test_svld3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_mf8,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svld4q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) @@ -1190,6 +1266,21 @@ svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld4q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld4q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld4q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld4q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1485,6 +1576,28 @@ svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld4q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld4q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svld4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_mf8,)(pg, base, vnum); +} // Gather for 128 bits // vector base + scalar offset @@ -1692,6 +1805,23 @@ svbfloat16_t test_svld1q_gather_u64base_offset_bf16(svbool_t pg, svuint64_t base return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_bf16,)(pg, base, offset); } +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_mf8u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64base_offset_mf8(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_mf8,)(pg, base, offset); +} + // Vector base and no offset // CHECK-LABEL: @test_svld1q_gather_u64base_u64( // CHECK-NEXT: entry: @@ -1897,6 +2027,23 @@ svbfloat16_t test_svld1q_gather_u64base_bf16(svbool_t pg, svuint64_t base) return SVE_ACLE_FUNC(svld1q_gather,_u64base,_bf16,)(pg, base); } +// CHECK-LABEL: @test_svld1q_gather_u64base_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_mf8u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64base_mf8(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld1q_gather_u64index_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) @@ -2428,3 +2575,19 @@ svfloat32_t test_svdl1q_gather_u64offset_f32(svbool_t pg, const float32_t *base, svfloat64_t test_svdl1q_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t off) { return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_f64)(pg, base, off); } + +// CHECK-LABEL: @test_svld1q_gather_u64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svld1q_gather_u64offset_mf8u10__SVBool_tPKu6__mfp8u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64offset_mf8(svbool_t pg, mfloat8_t const *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_mf8)(pg, base, off); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c index 9db3e5e98975a..e71e68114a5af 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c @@ -306,6 +306,21 @@ void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR return SVE_ACLE_FUNC(svst1,_f64_x2,,)(pn, base, v); } +// CHECK-LABEL: @test_svst1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_mf8_x2u11__SVCount_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8_x2(svcount_t pn, mfloat8_t *base, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8_x2,,)(pn, base, v); +} + // CHECK-LABEL: @test_svst1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -351,6 +366,21 @@ void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR return SVE_ACLE_FUNC(svst1,_f64_x4,,)(pn, base, v); } +// CHECK-LABEL: @test_svst1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_mf8_x4u11__SVCount_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8_x4,,)(pn, base, v); +} + // == VNUM variants == @@ -798,6 +828,29 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo return SVE_ACLE_FUNC(svst1_vnum,_f64_x2,,)(pn, base, vnum, v); } +// CHECK-LABEL: @test_svst1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8_x2,,)(pn, base, vnum, v); +} + // CHECK-LABEL: @test_svst1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 @@ -872,3 +925,26 @@ void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svflo { return SVE_ACLE_FUNC(svst1_vnum,_f64_x4,,)(pn, base, vnum, v); } + +// CHECK-LABEL: @test_svst1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8_x4(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8_x4,,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c index ed1959327a611..1544260377a20 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -325,6 +325,21 @@ void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR return SVE_ACLE_FUNC(svstnt1,_f64_x2,,)(pn, base, v); } +// CHECK-LABEL: @test_svstnt1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_mf8_x2u11__SVCount_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8_x2(svcount_t pn, mfloat8_t *base, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8_x2,,)(pn, base, v); +} + // CHECK-LABEL: @test_svstnt1_f16_x4( // CHECK-NEXT: entry: @@ -373,6 +388,21 @@ void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR return SVE_ACLE_FUNC(svstnt1,_f64_x4,,)(pn, base, v); } +// CHECK-LABEL: @test_svstnt1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_mf8_x4u11__SVCount_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8_x4,,)(pn, base, v); +} + // == VNUM variants == @@ -837,6 +867,28 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x2,,)(pn, base, vnum, v); } +// CHECK-LABEL: @test_svstnt1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8_x2,,)(pn, base, vnum, v); +} // CHECK-LABEL: @test_svstnt1_vnum_f16_x4( // CHECK-NEXT: entry: @@ -914,3 +966,26 @@ void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svf { return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x4,,)(pn, base, vnum, v); } + +// CHECK-LABEL: @test_svstnt1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8_x4(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8_x4,,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c index b91780304dacb..517d5f244a46f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c @@ -213,6 +213,21 @@ void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst2q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst2q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -509,6 +524,29 @@ void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst2q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // // ST3Q // CHECK-LABEL: @test_svst3q_u8( @@ -710,6 +748,21 @@ void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst3q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst3q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1006,6 +1059,29 @@ void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst3q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // // ST4Q // CHECK-LABEL: @test_svst4q_u8( @@ -1207,6 +1283,21 @@ void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst4q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst4q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1503,6 +1594,29 @@ void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst4q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // Scatter for 128 bits // vector base + scalar offset // CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u64( @@ -1710,6 +1824,23 @@ void test_svst1q_scatter_u64base_offset_bf16(svbool_t pg, svuint64_t base, int64 SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _bf16)(pg, base, offset, data); } +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_mf8u10__SVBool_tu12__SVUint64_tlu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_mf8(svbool_t pg, svuint64_t base, int64_t offset, svmfloat8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _mf8)(pg, base, offset, data); +} + // Vector Base and no Offset // CHECK-LABEL: @test_svst1q_scatter_u64base_u64( // CHECK-NEXT: entry: @@ -1915,6 +2046,23 @@ void test_svst1q_scatter_u64base_bf16(svbool_t pg, svuint64_t base, svbfloat16_t SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_bf16)(pg, base, data); } +// CHECK-LABEL: @test_svst1q_scatter_u64base_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_mf8u10__SVBool_tu12__SVUint64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_mf8(svbool_t pg, svuint64_t base, svmfloat8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_mf8)(pg, base, data); +} + // CHECK-LABEL: @test_svst1q_scatter_u64index_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) @@ -2798,3 +2946,35 @@ void test_svst1q_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t void test_svst1q_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t off, svfloat64_t data) { SVE_ACLE_FUNC(svst1q_scatter_,s64,offset,_f64)(pg, base, off, data); } + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_mf8u10__SVBool_tPu6__mfp8u12__SVUint64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_mf8(svbool_t pg, mfloat8_t *base, svuint64_t off, svmfloat8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_mf8)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_s64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_s64offset_mf8u10__SVBool_tPu6__mfp8u11__SVInt64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_s64offset_mf8(svbool_t pg, mfloat8_t *base, svint64_t off, svmfloat8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,s64,offset,_mf8)(pg, base, off, data); +} diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index 9385b537f18b3..d9e7b5d4707d8 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -38,22 +38,34 @@ mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) { // CHECK-C-LABEL: define dso_local <1 x i8> @func1n( // CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-C-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-C-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-C-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: ret <1 x i8> [[TMP2]] // // CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-CXX-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-CXX-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-CXX-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-CXX-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: ret <1 x i8> [[TMP2]] // __mfp8 func1n(__mfp8 mfp8) { __mfp8 f1n[10]; @@ -86,14 +98,18 @@ mfloat8_t test_extract_element(mfloat8x16_t x, int i) { // CHECK-C-LABEL: define dso_local <16 x i8> @test_insert_element( // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-C-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-C-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-C-NEXT: ret <16 x i8> [[VECINS]] // // CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z19test_insert_element14__Mfloat8x16_tiu6__mfp8( // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-CXX-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-CXX-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-CXX-NEXT: ret <16 x i8> [[VECINS]] //