Skip to content

Commit 55d4d47

Browse files
authored
Merge pull request #83 from xianyi/develop
rebase
2 parents c8f029a + a270894 commit 55d4d47

File tree

9 files changed

+81
-67
lines changed

9 files changed

+81
-67
lines changed

.travis.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ matrix:
4343
- TARGET_BOX=IBMZ_LINUX
4444
- BTYPE="BINARY=64 USE_OPENMP=1"
4545

46+
- <<: *test-ubuntu
47+
os: linux
48+
dist: focal
49+
arch: s390x
50+
compiler: clang
51+
before_script:
52+
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
53+
env:
54+
# for matrix annotation only
55+
- TARGET_BOX=IBMZ_LINUX
56+
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
57+
4658
- <<: *test-ubuntu
4759
env:
4860
- TARGET_BOX=LINUX64

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ In chronological order:
187187
* Marius Hillenbrand <https://github.com/mhillenibm>
188188
* [2020-05-12] Revise dynamic architecture detection for IBM z
189189
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
190+
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
190191

191192
* Danfeng Zhang <https://github.com/craft-zhang>
192193
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

Makefile.system

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,6 @@ endif
295295
ifeq ($(C_COMPILER), GCC)
296296
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
297297
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
298-
GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
299298
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
300299
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
301300
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
@@ -594,34 +593,36 @@ endif
594593
ifeq ($(ARCH), zarch)
595594
DYNAMIC_CORE = ZARCH_GENERIC
596595

597-
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
598-
ifeq ($(GCCVERSIONGT5), 1)
599-
ZARCH_SUPPORT_Z13 := 1
600-
else ifeq ($(GCCVERSIONEQ5), 1)
601-
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
602-
ZARCH_SUPPORT_Z13 := 1
603-
endif
604-
endif
605-
606-
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
607-
ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1)
608-
ZARCH_SUPPORT_Z13 := 1
609-
endif
610-
endif
611-
612-
ifeq ($(ZARCH_SUPPORT_Z13), 1)
596+
# if the compiler accepts -march=arch11 or -march=z13 and can compile a file
597+
# with z13-specific inline assembly, then we can include support for Z13.
598+
# note: -march=z13 is equivalent to -march=arch11 yet some compiler releases
599+
# only support one or the other.
600+
# note: LLVM version 6.x supported -march=z13 yet could not handle vector
601+
# registers in inline assembly, so the check for supporting the -march flag is
602+
# not enough.
603+
ZARCH_TEST_COMPILE=-c $(TOPDIR)/kernel/zarch/damin_z13.c -I$(TOPDIR) -o /dev/null > /dev/null 2> /dev/null
604+
ZARCH_CC_SUPPORTS_ARCH11=$(shell $(CC) -march=arch11 $(ZARCH_TEST_COMPILE) && echo 1)
605+
ZARCH_CC_SUPPORTS_Z13=$(shell $(CC) -march=z13 $(ZARCH_TEST_COMPILE) && echo 1)
606+
607+
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH11), $(ZARCH_CC_SUPPORTS_Z13)), 1)
613608
DYNAMIC_CORE += Z13
609+
CCOMMON_OPT += -DDYN_Z13
614610
else
615-
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
611+
$(info OpenBLAS: Not building Z13 kernels because the compiler $(CC) does not support it)
616612
endif
617613

618-
ifeq ($(GCCVERSIONGTEQ7), 1)
614+
# as above for z13, check for -march=arch12 and z14 support in the compiler.
615+
ZARCH_CC_SUPPORTS_ARCH12=$(shell $(CC) -march=arch12 $(ZARCH_TEST_COMPILE) && echo 1)
616+
ZARCH_CC_SUPPORTS_Z14=$(shell $(CC) -march=z14 $(ZARCH_TEST_COMPILE) && echo 1)
617+
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH12), $(ZARCH_CC_SUPPORTS_Z14)), 1)
619618
DYNAMIC_CORE += Z14
619+
CCOMMON_OPT += -DDYN_Z14
620620
else
621-
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
622-
endif
621+
$(info OpenBLAS: Not building Z14 kernels because the compiler $(CC) does not support it)
623622
endif
624623

624+
endif # ARCH zarch
625+
625626
ifeq ($(ARCH), power)
626627
DYNAMIC_CORE = POWER6
627628
DYNAMIC_CORE += POWER8

driver/others/dynamic_zarch.c

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,6 @@
11
#include "common.h"
22
#include <stdbool.h>
33

4-
// Gate kernels for z13 and z14 on gcc version
5-
#if (__GNUC__ == 5 && __GNUC_MINOR__ >= 2) || __GNUC__ >= 6 || \
6-
/* RHEL 7 since 7.3: */ \
7-
(__GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 5 && \
8-
__GNUC_RH_RELEASE__ >= 11)
9-
#define HAVE_Z13_SUPPORT
10-
#endif
11-
12-
#if __GNUC__ >= 7
13-
#define HAVE_Z14_SUPPORT
14-
#endif
15-
164
// Guard the use of getauxval() on glibc version >= 2.16
175
#ifdef __GLIBC__
186
#include <features.h>
@@ -47,10 +35,10 @@ static unsigned long get_hwcap(void) {
4735
#endif // __GLIBC
4836

4937
extern gotoblas_t gotoblas_ZARCH_GENERIC;
50-
#ifdef HAVE_Z13_SUPPORT
38+
#ifdef DYN_Z13
5139
extern gotoblas_t gotoblas_Z13;
5240
#endif
53-
#ifdef HAVE_Z14_SUPPORT
41+
#ifdef DYN_Z14
5442
extern gotoblas_t gotoblas_Z14;
5543
#endif
5644

@@ -66,17 +54,21 @@ static char* corename[] = {
6654
};
6755

6856
char* gotoblas_corename(void) {
69-
#ifdef HAVE_Z13_SUPPORT
57+
#ifdef DYN_Z13
7058
if (gotoblas == &gotoblas_Z13) return corename[1];
7159
#endif
72-
#ifdef HAVE_Z14_SUPPORT
60+
#ifdef DYN_Z14
7361
if (gotoblas == &gotoblas_Z14) return corename[2];
7462
#endif
7563
if (gotoblas == &gotoblas_ZARCH_GENERIC) return corename[3];
7664

7765
return corename[0];
7866
}
7967

68+
#ifndef HWCAP_S390_VXE
69+
#define HWCAP_S390_VXE 8192
70+
#endif
71+
8072
/**
8173
* Detect the fitting set of kernels by retrieving the CPU features supported by
8274
* OS from the auxiliary value AT_HWCAP and choosing the set of kernels
@@ -89,15 +81,15 @@ static gotoblas_t* get_coretype(void) {
8981

9082
unsigned long hwcap __attribute__((unused)) = get_hwcap();
9183

84+
#ifdef DYN_Z14
9285
// z14 and z15 systems: exploit Vector Facility (SIMD) and
9386
// Vector-Enhancements Facility 1 (float SIMD instructions), if present.
94-
#ifdef HAVE_Z14_SUPPORT
9587
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
9688
return &gotoblas_Z14;
9789
#endif
9890

91+
#ifdef DYN_Z13
9992
// z13: Vector Facility (SIMD for double)
100-
#ifdef HAVE_Z13_SUPPORT
10193
if (hwcap & HWCAP_S390_VX)
10294
return &gotoblas_Z13;
10395
#endif
@@ -123,19 +115,27 @@ static gotoblas_t* force_coretype(char* coretype) {
123115
}
124116
}
125117

126-
switch (found)
127-
{
128-
#ifdef HAVE_Z13_SUPPORT
129-
case 1: return (&gotoblas_Z13);
118+
if (found == 1) {
119+
#ifdef DYN_Z13
120+
return &gotoblas_Z13;
121+
#else
122+
openblas_warning(1, "Z13 support not compiled in");
123+
return NULL;
130124
#endif
131-
#ifdef HAVE_Z14_SUPPORT
132-
case 2: return (&gotoblas_Z14);
125+
} else if (found == 2) {
126+
#ifdef DYN_Z14
127+
return &gotoblas_Z14;
128+
#else
129+
openblas_warning(1, "Z14 support not compiled in");
130+
return NULL;
133131
#endif
134-
case 3: return (&gotoblas_ZARCH_GENERIC);
135-
default: return NULL;
132+
} else if (found == 3) {
133+
return &gotoblas_ZARCH_GENERIC;
136134
}
135+
137136
snprintf(message, 128, "Core not found: %s\n", coretype);
138137
openblas_warning(1, message);
138+
return NULL;
139139
}
140140

141141
void gotoblas_dynamic_init(void) {

lapack-netlib/TESTING/EIG/cchkst2stg.f

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,8 +1014,8 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
10141014
* the one from above. Compare it with D1 computed
10151015
* using the 1-stage.
10161016
*
1017-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1018-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1017+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1018+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10191019
CALL CLACPY( 'U', N, N, A, LDA, V, LDU )
10201020
LH = MAX(1, 4*N)
10211021
LW = LWORK - LH
@@ -1048,8 +1048,8 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
10481048
* the one from above. Compare it with D1 computed
10491049
* using the 1-stage.
10501050
*
1051-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1052-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1051+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1052+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10531053
CALL CLACPY( 'L', N, N, A, LDA, V, LDU )
10541054
CALL CHETRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU,
10551055
$ WORK, LH, WORK( LH+1 ), LW, IINFO )

lapack-netlib/TESTING/EIG/dchksb2stg.f

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -670,8 +670,8 @@ SUBROUTINE DCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
670670
* the one from above. Compare it with D1 computed
671671
* using the DSBTRD.
672672
*
673-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
674-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
673+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
674+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
675675
CALL DLACPY( ' ', K+1, N, A, LDA, U, LDU )
676676
LH = MAX(1, 4*N)
677677
LW = LWORK - LH
@@ -743,8 +743,8 @@ SUBROUTINE DCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
743743
* the one from above. Compare it with D1 computed
744744
* using the DSBTRD.
745745
*
746-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
747-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
746+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
747+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
748748
CALL DLACPY( ' ', K+1, N, A, LDA, U, LDU )
749749
LH = MAX(1, 4*N)
750750
LW = LWORK - LH

lapack-netlib/TESTING/EIG/dchkst2stg.f

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -999,8 +999,8 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
999999
* the one from above. Compare it with D1 computed
10001000
* using the 1-stage.
10011001
*
1002-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1003-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1002+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1003+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10041004
CALL DLACPY( "U", N, N, A, LDA, V, LDU )
10051005
LH = MAX(1, 4*N)
10061006
LW = LWORK - LH
@@ -1032,8 +1032,8 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
10321032
* the one from above. Compare it with D1 computed
10331033
* using the 1-stage.
10341034
*
1035-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1036-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1035+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1036+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10371037
CALL DLACPY( "L", N, N, A, LDA, V, LDU )
10381038
CALL DSYTRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU,
10391039
$ WORK, LH, WORK( LH+1 ), LW, IINFO )

lapack-netlib/TESTING/EIG/zchkhb2stg.f

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ SUBROUTINE ZCHKHB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
680680
* the one from above. Compare it with D1 computed
681681
* using the DSBTRD.
682682
*
683-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
684-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
683+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
684+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
685685
CALL ZLACPY( ' ', K+1, N, A, LDA, U, LDU )
686686
LH = MAX(1, 4*N)
687687
LW = LWORK - LH
@@ -753,8 +753,8 @@ SUBROUTINE ZCHKHB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
753753
* the one from above. Compare it with D1 computed
754754
* using the DSBTRD.
755755
*
756-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
757-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
756+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
757+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
758758
CALL ZLACPY( ' ', K+1, N, A, LDA, U, LDU )
759759
LH = MAX(1, 4*N)
760760
LW = LWORK - LH

lapack-netlib/TESTING/EIG/zchkst2stg.f

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,8 +1014,8 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
10141014
* the one from above. Compare it with D1 computed
10151015
* using the 1-stage.
10161016
*
1017-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1018-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1017+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1018+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10191019
CALL ZLACPY( 'U', N, N, A, LDA, V, LDU )
10201020
LH = MAX(1, 4*N)
10211021
LW = LWORK - LH
@@ -1048,8 +1048,8 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
10481048
* the one from above. Compare it with D1 computed
10491049
* using the 1-stage.
10501050
*
1051-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1052-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1051+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1052+
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10531053
CALL ZLACPY( 'L', N, N, A, LDA, V, LDU )
10541054
CALL ZHETRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU,
10551055
$ WORK, LH, WORK( LH+1 ), LW, IINFO )

0 commit comments

Comments
 (0)