Skip to content

Commit d7f72c5

Browse files
Nicoshevfacebook-github-bot
authored andcommitted
Pull ARM's matrix transpose PR (#3660)
Summary: X-link: facebookresearch/FBGEMM#736 Bring ARM's PR: #3510 Differential Revision: D67396309
1 parent 7017af2 commit d7f72c5

File tree

5 files changed

+822
-4
lines changed

5 files changed

+822
-4
lines changed

defs.bzl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# Copyright 2024 Arm Limited and/or its affiliates <[email protected]>
23
# All rights reserved.
34
# This source code is licensed under the BSD-style license found in the
45
# LICENSE file in the root directory of this source tree.
@@ -180,6 +181,19 @@ def get_fbgemm_inline_sve_srcs(msvc = False, buck = False):
180181
})
181182
return asm_srcs if not msvc else intrinsics_srcs
182183

184+
def get_fbgemm_inline_neon_srcs(msvc = False, buck = False):
185+
intrinsics_srcs = ["src/UtilsNeon.cc"]
186+
187+
#FP16 kernels contain inline assembly and inline assembly syntax for MSVC is different.
188+
asm_srcs = ["src/UtilsNeon.cc"]
189+
if buck:
190+
return select({
191+
"DEFAULT": asm_srcs,
192+
"ovr_config//compiler:cl": intrinsics_srcs,
193+
"ovr_config//cpu:arm64": intrinsics_srcs,
194+
})
195+
return asm_srcs if not msvc else intrinsics_srcs
196+
183197
def get_fbgemm_autovec_srcs():
184198
return [
185199
"src/EmbeddingSpMDMAutovec.cc",

src/TransposeUtils.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* Copyright 2024 Arm Limited and/or its affiliates <[email protected]>
34
* All rights reserved.
45
*
56
* This source code is licensed under the BSD-style license found in the
@@ -47,9 +48,9 @@ void transpose_simd(
4748
return;
4849
}
4950

50-
#if HAVE_SVE
51+
#ifdef __aarch64__
5152
if constexpr (std::is_same<T, float>::value) {
52-
internal::transpose_sve<T>(M, N, src, ld_src, dst, ld_dst);
53+
internal::transpose_neon<T>(M, N, src, ld_src, dst, ld_dst);
5354
} else {
5455
transpose_ref<T>(M, N, src, ld_src, dst, ld_dst);
5556
}

src/TransposeUtils.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* Copyright 2024 Arm Limited and/or its affiliates <[email protected]>
34
* All rights reserved.
45
*
56
* This source code is licensed under the BSD-style license found in the
@@ -64,9 +65,9 @@ void transpose_avx512(
6465

6566
#ifdef __aarch64__
6667
/**
67-
* @brief Transpose a matrix using Intel AVX2.
68+
* @brief Transpose a matrix using SVE.
6869
*
69-
* This is called if the code is running on a CPU with Intel AVX2 support.
70+
* This is called if the code is running on a CPU with SVE support.
7071
*/
7172
template <typename T>
7273
void transpose_sve(
@@ -76,6 +77,20 @@ void transpose_sve(
7677
int64_t ld_src,
7778
T* dst,
7879
int64_t ld_dst);
80+
81+
/**
82+
* @brief Transpose a matrix using NEON.
83+
*
84+
* This is called if the code is running on a CPU with NEON support.
85+
*/
86+
template <typename T>
87+
void transpose_neon(
88+
int64_t M,
89+
int64_t N,
90+
const T* src,
91+
int64_t ld_src,
92+
T* dst,
93+
int64_t ld_dst);
7994
#endif // __aarch64__
8095

8196
} // namespace internal

0 commit comments

Comments
 (0)