diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4b81185c6e311..3fa8a3578b4fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -28113,7 +28113,10 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { #endif if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && - Size0.hasValue() && Size1.hasValue()) { + Size0.hasValue() && Size1.hasValue() && + // Can't represent a scalable size + fixed offset in LocationSize + (!Size0.isScalable() || SrcValOffset0 == 0) && + (!Size1.isScalable() || SrcValOffset1 == 0)) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t Overlap0 = diff --git a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll index 93a251286cf73..8d330b12055ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll @@ -1,19 +1,43 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s -; FIXME: The i32 load and store pair isn't dead and shouldn't be omitted. +; This showcases a miscompile that was fixed in #90573: +; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores. +; - the load and store of q aliases the upper 128 bits store of p. +; - The aliasing 128 bit store will be between the chain of the scalar +; load/store: +; +; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... +; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... +; +; t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ... +; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... +; t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ... +; +; Previously, the scalar load/store was incorrectly combined away: +; +; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... +; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... +; +; // MISSING +; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... +; // MISSING +; See also pr83017.ll: This is the same code, but relies on vscale_range instead +; of -riscv-v-vector-bits-max=128. define void @f(ptr %p) vscale_range(2,2) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vs4r.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 80 +; CHECK-NEXT: lw a1, 84(a0) +; CHECK-NEXT: addi a2, a0, 80 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vs1r.v v8, (a1) -; CHECK-NEXT: addi a0, a0, 64 -; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: vs1r.v v8, (a2) +; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vs4r.v v12, (a0) +; CHECK-NEXT: addi a2, a0, 64 +; CHECK-NEXT: vs1r.v v8, (a2) +; CHECK-NEXT: sw a1, 84(a0) ; CHECK-NEXT: ret %q = getelementptr inbounds i8, ptr %p, i64 84 %x = load i32, ptr %q