-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[AArch64] Codegen for 16/32/64-bit floating-point atomicrmw ops #125686
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-aarch64 Author: Jonathan Thackray (jthackray) ChangesAdd support for AArch64 16/32/64-bit floating-point atomic read-modify-write Patch is 78.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125686.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84f6d421b70f961..a9a7f31441d932c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -974,6 +974,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
#undef LCALLNAME5
}
+ if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
+ setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f64, LibCall);
+
+ setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f64, LibCall);
+
+ setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f64, LibCall);
+ }
+
if (Subtarget->hasLSE128()) {
// Custom lowering because i128 is not legal. Must be replaced by 2x64
// values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
@@ -27825,6 +27839,14 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (CanUseLSE128)
return AtomicExpansionKind::None;
+ // Add support for LDFADD and friends
+ bool CanUseAtomicFP = Subtarget->hasLSFE() &&
+ (AI->getOperation() == AtomicRMWInst::FAdd ||
+ AI->getOperation() == AtomicRMWInst::FMax ||
+ AI->getOperation() == AtomicRMWInst::FMin);
+ if (CanUseAtomicFP)
+ return AtomicExpansionKind::None;
+
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 5e6db9d007a5557..045bebf15a701ab 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -543,6 +543,16 @@ let Predicates = [HasLSE] in {
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
}
+defm atomic_load_fadd : binary_atomic_op_fp<atomic_load_fadd>;
+defm atomic_load_fmin : binary_atomic_op_fp<atomic_load_fmin>;
+defm atomic_load_fmax : binary_atomic_op_fp<atomic_load_fmax>;
+
+let Predicates = [HasLSFE] in {
+ defm : LDFPOPregister_patterns<"LDFADD", "atomic_load_fadd">;
+ defm : LDFPOPregister_patterns<"LDFMAX", "atomic_load_fmax">;
+ defm : LDFPOPregister_patterns<"LDFMIN", "atomic_load_fmin">;
+}
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3bb5d3cb4d09def..810255eabfded8b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12474,6 +12474,32 @@ multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
}
+let Predicates = [HasLSFE] in
+multiclass LDFPOPregister_patterns_ord_dag<string inst, string suffix, string op,
+ ValueType vt, dag SrcRHS, dag DstRHS> {
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_monotonic") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acquire") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "A" # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_release") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "L" # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acq_rel") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_seq_cst") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, FPR64:$Rn)>;
+}
+
+multiclass LDFPOPregister_patterns_ord<string inst, string suffix, string op,
+ ValueType vt, dag RHS> {
+ defm : LDFPOPregister_patterns_ord_dag<inst, suffix, op, vt, RHS, RHS>;
+}
+
+multiclass LDFPOPregister_patterns<string inst, string op> {
+ defm : LDFPOPregister_patterns_ord<inst, "H", op, f16, (f16 FPR16:$Rm)>;
+ defm : LDFPOPregister_patterns_ord<inst, "S", op, f32, (f32 FPR32:$Rm)>;
+ defm : LDFPOPregister_patterns_ord<inst, "D", op, f64, (f64 FPR64:$Rm)>;
+}
+
let Predicates = [HasLSE] in
multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
ValueType vt, dag OLD, dag NEW> {
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
new file mode 100644
index 000000000000000..3d6ad81ea674cc5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
@@ -0,0 +1,949 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
+; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+
+define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic:
+; CHECK: ldfadd h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
+; CHECK: ldfadda h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value acquire, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release:
+; CHECK: ldfaddl h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value release, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
+; CHECK: ldfaddal h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
+; CHECK: ldfaddal h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
+ ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic:
+; CHECK: ldfadd s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
+; CHECK: ldfadda s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value acquire, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release:
+; CHECK: ldfaddl s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value release, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
+; CHECK: ldfaddal s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
+; CHECK: ldfaddal s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
+ ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic:
+; CHECK: ldfadd d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
+; CHECK: ldfadda d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value acquire, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release:
+; CHECK: ldfaddl d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value release, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
+; CHECK: ldfaddal d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
+; CHECK: ldfaddal d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
+ ret double %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_monotonic:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acquire:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value acquire, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_release:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value release, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acq_rel:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_seq_cst:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 1
+ ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_monotonic:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acquire:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value acquire, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_release:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value release, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acq_rel:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_seq_cst:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 1
+ ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_monotonic:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acquire:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value acquire, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_release:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value release, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acq_rel:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_seq_cst:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 1
+ ret double %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O1: ldxrh w8, [x0]
+; -O1: stxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value monotonic, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O1: ldaxrh w8, [x0]
+; -O1: stxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value acquire, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O1: ldxrh w8, [x0]
+; -O1: stlxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value release, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O1: ldaxrh w8, [x0]
+; -O1: stlxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value acq_rel, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O1: ldaxrh w8, [x0]
+; -O1: stlxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value seq_cst, align 2
+ ret half %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O1: ldxr w8, [x0]
+; -O1: stxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value monotonic, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O1: ldaxr w8, [x0]
+; -O1: stxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value acquire, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O1: ldxr w8, [x0]
+; -O1: stlxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value release, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O1: ldaxr w8, [x0]
+; -O1: stlxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value acq_rel, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O1: ldaxr w8, [x0]
+; -O1: stlxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
+ ret float %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O1: ldxr x8, [x0]
+; -O1: stxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value monotonic, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O1: ldaxr x8, [x0]
+; -O1: stxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value acquire, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O1: ldxr x8, [x0]
+; -O1: stlxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value release, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O1: ldaxr x8, [x0]
+; -O1: stlxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value acq_rel, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrm...
[truncated]
|
80e63d2
to
74f93aa
Compare
02d9090
to
912238c
Compare
Stylie777
reviewed
Feb 10, 2025
jthackray
added a commit
to jthackray/llvm-project
that referenced
this pull request
Feb 11, 2025
Update the `generate-tests.py` script to create new tests for `atomicrmw {fadd,fmin,fmax}` and test these with `half`, `float`, `bfloat` and `double`. Generate auto-tests to check both with and without `+lsfe`, so that when llvm#125868 is merged, `+lsfe` will use a single atomic floating-point instruction.
jthackray
added a commit
that referenced
this pull request
Feb 12, 2025
Update the `generate-tests.py` script to create new tests for `atomicrmw {fadd,fmin,fmax}` and test these with `half`, `float`, `bfloat` and `double`. Generate fp auto-tests to check both with and without `+lsfe`, so that when #125686 is merged, `+lsfe` will use a single atomic floating-point instruction.
…ify-write ops Add support for 16/32/64-bit floating-point atomic read-modify-write operations (FADD, FMAX, FMIN) using LDFADD, LDFMAX, LDFMIN atomic instructions.
flovent
pushed a commit
to flovent/llvm-project
that referenced
this pull request
Feb 13, 2025
Update the `generate-tests.py` script to create new tests for `atomicrmw {fadd,fmin,fmax}` and test these with `half`, `float`, `bfloat` and `double`. Generate fp auto-tests to check both with and without `+lsfe`, so that when llvm#125686 is merged, `+lsfe` will use a single atomic floating-point instruction.
joaosaffran
pushed a commit
to joaosaffran/llvm-project
that referenced
this pull request
Feb 14, 2025
Update the `generate-tests.py` script to create new tests for `atomicrmw {fadd,fmin,fmax}` and test these with `half`, `float`, `bfloat` and `double`. Generate fp auto-tests to check both with and without `+lsfe`, so that when llvm#125686 is merged, `+lsfe` will use a single atomic floating-point instruction.
sivan-shani
pushed a commit
to sivan-shani/llvm-project
that referenced
this pull request
Feb 24, 2025
Update the `generate-tests.py` script to create new tests for `atomicrmw {fadd,fmin,fmax}` and test these with `half`, `float`, `bfloat` and `double`. Generate fp auto-tests to check both with and without `+lsfe`, so that when llvm#125686 is merged, `+lsfe` will use a single atomic floating-point instruction.
kmclaughlin-arm
approved these changes
Mar 4, 2025
jph-13
pushed a commit
to jph-13/llvm-project
that referenced
this pull request
Mar 21, 2025
…#125686) Codegen for AArch64 16/32/64-bit floating-point atomic read-modify-write operations (`atomicrmw {fadd,fmin,fmax}`) using LD{B}FADD, LD{B}FMAX and LD{B}FMIN atomic instructions.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Codegen for AArch64 16/32/64-bit floating-point atomic read-modify-write
operations (
atomicrmw {fadd,fmin,fmax}
) using LD{B}FADD, LD{B}FMAX andLD{B}FMIN atomic instructions.