llvm · ytmukai · Mar 6, 2024 · Mar 6, 2024
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -62,6 +62,8 @@ STATISTIC(NumUnscaledPairCreated,
           "Number of load/store from unscaled generated");
 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
+STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
+                                   "not passed the alignment check");
 
 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
               "Controls which pairs are considered for renaming");
@@ -2337,9 +2339,6 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
   MachineBasicBlock::iterator Paired =
       findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
   if (Paired != E) {
-    ++NumPairCreated;
-    if (TII->hasUnscaledLdStOffset(MI))
-      ++NumUnscaledPairCreated;
     // Keeping the iterator straight is a pain, so we let the merge routine tell
     // us what the next instruction is after it's done mucking about.
     auto Prev = std::prev(MBBI);
@@ -2349,24 +2348,31 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
     MachineMemOperand *MemOp =
         MI.memoperands_empty() ? nullptr : MI.memoperands().front();
 
-    // Get the needed alignments to check them if
-    // ldp-aligned-only/stp-aligned-only features are opted.
-    uint64_t MemAlignment = MemOp ? MemOp->getAlign().value() : -1;
-    uint64_t TypeAlignment = MemOp ? Align(MemOp->getSize()).value() : -1;
+    // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
+    // that the alignment of the source pointer is at least double the alignment
+    // of the type.
+    if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
+        (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
+      // If there is no size/align information, cancel the transformation.
+      if (!MemOp || !MemOp->getMemoryType().isValid()) {
+        NumFailedAlignmentCheck++;
+        return false;
+      }
 
-    // If a load arrives and ldp-aligned-only feature is opted, check that the
-    // alignment of the source pointer is at least double the alignment of the
-    // type.
-    if (MI.mayLoad() && Subtarget->hasLdpAlignedOnly() && MemOp &&
-        MemAlignment < 2 * TypeAlignment)
-      return false;
+      // Get the needed alignments to check them if
+      // ldp-aligned-only/stp-aligned-only features are opted.
+      uint64_t MemAlignment = MemOp->getAlign().value();
+      uint64_t TypeAlignment = Align(MemOp->getSize()).value();
 
-    // If a store arrives and stp-aligned-only feature is opted, check that the
-    // alignment of the source pointer is at least double the alignment of the
-    // type.
-    if (MI.mayStore() && Subtarget->hasStpAlignedOnly() && MemOp &&
-        MemAlignment < 2 * TypeAlignment)
-      return false;
+      if (MemAlignment < 2 * TypeAlignment) {
+        NumFailedAlignmentCheck++;
+        return false;
+      }
+    }
+
+    ++NumPairCreated;
+    if (TII->hasUnscaledLdStOffset(MI))
+      ++NumUnscaledPairCreated;
 
     MBBI = mergePairedInsns(MBBI, Paired, Flags);
     // Collect liveness info for instructions between Prev and the new position

diff --git a/llvm/test/CodeGen/AArch64/ldp-stp-unknown-size.mir b/llvm/test/CodeGen/AArch64/ldp-stp-unknown-size.mir
@@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -O2 -mtriple=aarch64 -mcpu=ampere1 -simplify-mir -o - %s -run-pass=aarch64-ldst-opt | FileCheck %s --check-prefixes=CHECK
+# RUN: llc -O2 -mtriple=aarch64 -simplify-mir -o - %s -run-pass=aarch64-ldst-opt | FileCheck %s --check-prefixes=CHECK-DEFAULT
+
+--- |
+  define i32 @ldp_no_size_info(ptr %0) #0 {
+    %2 = ptrtoint ptr %0 to i64
+    %3 = and i64 %2, -64
+    %4 = inttoptr i64 %3 to ptr
+    %5 = load i32, ptr %4, align 4
+    %6 = getelementptr inbounds i32, ptr %4, i64 1
+    %7 = load i32, ptr %6, align 4
+    %8 = add nsw i32 %7, %5
+    ret i32 %8
+  }
+
+...
+---
+name:            ldp_no_size_info
+alignment:       64
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+  - { reg: '$x0' }
+frameInfo:
+  maxAlignment:    1
+  maxCallFrameSize: 0
+machineFunctionInfo:
+  hasRedZone:      false
+body:             |
+  bb.0 (%ir-block.1):
+    liveins: $x0
+
+    ; CHECK-LABEL: name: ldp_no_size_info
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $x8 = ANDXri killed renamable $x0, 7865
+    ; CHECK-NEXT: renamable $w9 = LDRWui renamable $x8, 0 :: (load unknown-size from %ir.4, align 1)
+    ; CHECK-NEXT: renamable $w8 = LDRWui killed renamable $x8, 1 :: (load unknown-size from %ir.6, align 1)
+    ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0
+    ; CHECK-NEXT: RET undef $lr, implicit $w0
+    ;
+    ; CHECK-DEFAULT-LABEL: name: ldp_no_size_info
+    ; CHECK-DEFAULT: liveins: $x0
+    ; CHECK-DEFAULT-NEXT: {{  $}}
+    ; CHECK-DEFAULT-NEXT: renamable $x8 = ANDXri killed renamable $x0, 7865
+    ; CHECK-DEFAULT-NEXT: renamable $w9, renamable $w8 = LDPWi renamable $x8, 0 :: (load unknown-size from %ir.4, align 1), (load unknown-size from %ir.6, align 1)
+    ; CHECK-DEFAULT-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0
+    ; CHECK-DEFAULT-NEXT: RET undef $lr, implicit $w0
+    renamable $x8 = ANDXri killed renamable $x0, 7865
+    renamable $w9 = LDRWui renamable $x8, 0 :: (load unknown-size from %ir.4)
+    renamable $w8 = LDRWui killed renamable $x8, 1 :: (load unknown-size from %ir.6)
+    $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0
+    RET undef $lr, implicit $w0
+
+...