diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 75cddfa16d6db..aea2e06af6aa4 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3276,6 +3276,40 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     // memmove with memcpy, and we don't need to worry about all manner of
     // downsides to splitting and transforming the operations.
 
+    // The tbaa.struct is only being explicit about byte padding. Here we assume
+    // that if the derived type used for the NewAI maps to a single scalar type,
+    // as given by the tbaa.struct, then it is safe to assume that we can use
+    // that type when doing the copying even if it include bit padding. If there
+    // for example would be a union of "_BitInt(3)" and "char" types the
+    // tbaa.struct would have multiple entries indicating the different types
+    // (or there wouldn't be any tbaa.struct)..
+    auto IsSingleTypeAccordingToTBAA = [&]() -> bool {
+      // Only consider the case when we have a tbaa.struct.
+      if (!(AATags && AATags.TBAAStruct))
+        return false;
+      MDNode *MD = AATags.TBAAStruct;
+      uint64_t Offset = NewBeginOffset - BeginOffset;
+      unsigned Count = 0;
+      for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
+        uint64_t InnerOffset =
+            mdconst::extract<ConstantInt>(MD->getOperand(i))->getZExtValue();
+        uint64_t InnerSize =
+            mdconst::extract<ConstantInt>(MD->getOperand(i + 1))
+                ->getZExtValue();
+        // Ignore entries that aren't overlapping with our slice.
+        if (InnerOffset + InnerSize <= Offset ||
+            InnerOffset >= Offset + SliceSize)
+          continue;
+        // Only allow a single match (no unions).
+        if (++Count > 1)
+          return false;
+        // Size/offset must match up.
+        if (InnerSize != SliceSize || Offset != InnerOffset)
+          return false;
+      }
+      return Count == 1;
+    };
+
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memcpy.
     bool EmitMemCpy =
@@ -3283,8 +3317,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
         (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
          SliceSize !=
              DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
-         !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
-         !NewAI.getAllocatedType()->isSingleValueType());
+         !NewAI.getAllocatedType()->isSingleValueType() ||
+         (!DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) &&
+          !IsSingleTypeAccordingToTBAA()));
 
     // If we're just going to emit a memcpy, the alloca hasn't changed, and the
     // size hasn't been shrunk based on analysis of the viable range, this is
diff --git a/llvm/test/Transforms/SROA/pr64081.ll b/llvm/test/Transforms/SROA/pr64081.ll
index 4b89384213826..ba83e495f56c2 100644
--- a/llvm/test/Transforms/SROA/pr64081.ll
+++ b/llvm/test/Transforms/SROA/pr64081.ll
@@ -30,3 +30,77 @@ bb:
 declare void @use(ptr)
 
 declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+
+; No unions or overlaps in the tbaa.struct. So we can rely on the types
+define void @test2(i3 %x) {
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: i3 [[X:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[RES:%.*]] = alloca [[B:%.*]], align 8
+; CHECK-NEXT:    store i1 true, ptr [[RES]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
+; CHECK-NEXT:    [[TMP_SROA_2_0_RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 1
+; CHECK-NEXT:    store i3 [[X]], ptr [[TMP_SROA_2_0_RES_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i8 @use(ptr [[RES]])
+; CHECK-NEXT:    ret void
+;
+bb:
+  %res = alloca %B
+  %tmp = alloca %B
+  %tmp.1 = getelementptr i8, ptr %tmp, i64 1
+  store i1 1, ptr %tmp
+  store i3 %x, ptr %tmp.1
+  call void @llvm.memcpy.p0.p0.i64(ptr %res, ptr %tmp, i64 2, i1 false), !tbaa.struct !6
+  call i8 @use(ptr %res)
+  ret void
+}
+
+; Union preventing SROA from removing the memcpy for the first byte.
+define void @test3(i3 %x) {
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: i3 [[X:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[RES:%.*]] = alloca [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP_SROA_0:%.*]] = alloca i1, align 8
+; CHECK-NEXT:    store i1 true, ptr [[TMP_SROA_0]], align 8
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RES]], ptr align 8 [[TMP_SROA_0]], i64 1, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
+; CHECK-NEXT:    [[TMP_SROA_2_0_RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 1
+; CHECK-NEXT:    store i3 [[X]], ptr [[TMP_SROA_2_0_RES_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT7]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i8 @use(ptr [[RES]])
+; CHECK-NEXT:    ret void
+;
+bb:
+  %res = alloca %B
+  %tmp = alloca %B
+  %tmp.1 = getelementptr i8, ptr %tmp, i64 1
+  store i1 1, ptr %tmp
+  store i3 %x, ptr %tmp.1
+  call void @llvm.memcpy.p0.p0.i64(ptr %res, ptr %tmp, i64 2, i1 false), !tbaa.struct !9
+  call i8 @use(ptr %res)
+  ret void
+}
+
+!1 = !{!"_BitInt(7)", !4, i64 0}
+!2 = !{!"_BitInt(1)", !4, i64 0}
+!3 = !{!"_BitInt(3)", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
+!6 = !{i64 0, i64 1, !7, i64 1, i64 1, !8}
+!7 = !{!2, !2, i64 0}
+!8 = !{!3, !3, i64 0}
+!9 = !{i64 0, i64 1, !10, i64 0, i64 1, !7, i64 1, i64 1, !8}
+!10 = !{!1, !1, i64 0}
+
+;.
+; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 1, [[META1:![0-9]+]], i64 1, i64 1, [[META5:![0-9]+]]}
+; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"_BitInt(1)", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+; CHECK: [[META5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"_BitInt(3)", [[META3]], i64 0}
+; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 1, [[META5]]}
+; CHECK: [[TBAA_STRUCT8]] = !{i64 0, i64 1, [[META9:![0-9]+]], i64 0, i64 1, [[META1]], i64 1, i64 1, [[META5]]}
+; CHECK: [[META9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; CHECK: [[META10]] = !{!"_BitInt(7)", [[META3]], i64 0}
+;.