Skip to content

Commit a5e7c0e

Browse files
authored
Merge pull request #5396 from abhishek-iitmadras/abhishekk_bfloat16
ARM64: Enable bfloat16 kernels by default
2 parents 33b5054 + 6356190 commit a5e7c0e

File tree

7 files changed

+20
-3
lines changed

7 files changed

+20
-3
lines changed

.github/workflows/apple_m.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,16 @@ jobs:
8787
echo "max_size = 300M" > ~/.ccache/ccache.conf
8888
echo "compression = true" >> ~/.ccache/ccache.conf
8989
ccache -s
90+
91+
- name: Add gfortran runtime to link path
92+
if: matrix.build == 'make' && runner.os == 'macOS'
93+
run: |
94+
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
95+
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
96+
echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV
9097
9198
- name: Build OpenBLAS
9299
run: |
93-
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
94100
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
95101
export CC="/opt/homebrew/opt/llvm/bin/clang"
96102
case "${{ matrix.build }}" in

.github/workflows/dynamic_arch.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ jobs:
8989
echo "max_size = 300M" > ~/.ccache/ccache.conf
9090
echo "compression = true" >> ~/.ccache/ccache.conf
9191
ccache -s
92+
93+
- name: Add gfortran runtime to link path
94+
if: matrix.build == 'make' && runner.os == 'macOS'
95+
run: |
96+
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
97+
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
98+
# Preserve whatever LDFLAGS may already contain
99+
echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV"
92100
93101
- name: Build OpenBLAS
94102
run: |

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ In chronological order:
255255

256256
* Abhishek Kumar <https://github.com/abhishek-iitmadras>
257257
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
258+
* [2025-07-23] ARM64-Enable bfloat16 kernels by default
258259

259260
* Sharif Inamdar <[email protected]>
260261
* [2025-06-05] Optimize gemv_n_sve_v1x3 kernel

Makefile.system

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ SMALL_MATRIX_OPT = 1
270270
BUILD_BFLOAT16 = 1
271271
else ifeq ($(ARCH), arm64)
272272
SMALL_MATRIX_OPT = 1
273+
BUILD_BFLOAT16 = 1
273274
endif
274275
ifeq ($(ARCH), loongarch64)
275276
SMALL_MATRIX_OPT = 1
@@ -425,10 +426,8 @@ ifeq ($(OSNAME), Darwin)
425426
ifndef MACOSX_DEPLOYMENT_TARGET
426427
ifeq ($(ARCH), arm64)
427428
export MACOSX_DEPLOYMENT_TARGET=11.0
428-
ifeq ($(C_COMPILER), GCC)
429429
export NO_SVE = 1
430430
export NO_SME = 1
431-
endif
432431
else
433432
export MACOSX_DEPLOYMENT_TARGET=10.8
434433
endif

kernel/arm64/bgemm_kernel_4x4_neoversev1.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* *****************************************************************************/
2828

2929
#include <arm_sve.h>
30+
#include <arm_neon.h>
3031

3132
#include "common.h"
3233

kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* *****************************************************************************/
2828

2929
#include <arm_sve.h>
30+
#include <arm_neon.h>
3031

3132
#include "common.h"
3233

kernel/arm64/bgemv_n_sve_v3x4.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
#include "common.h"
2929

3030
#include <arm_sve.h>
31+
#include <arm_neon.h>
3132

3233
#define UPDATE_PTRSx2 \
3334
a_ptr1 = a_ptr0 + lda;

0 commit comments

Comments
 (0)