Skip to content

Commit 6208c98

Browse files
authored
Merge pull request #104 from xianyi/develop
rebase
2 parents 9cac379 + 8e20ab2 commit 6208c98

File tree

7 files changed

+71
-41
lines changed

7 files changed

+71
-41
lines changed

Makefile.power

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ USE_OPENMP = 1
1010
endif
1111

1212
ifeq ($(CORE), POWER10)
13-
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
13+
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
1414
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
1515
endif
1616

exports/gensymbol

Lines changed: 70 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
zgeadd, dzsum);
5252

5353
@blasobjs = (lsame, xerbla);
54-
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
54+
@bfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
5555
@cblasobjsc = (
5656
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
5757
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
@@ -72,7 +72,7 @@
7272
);
7373

7474
@cblasobjss = (
75-
cblas_sasum, cblas_saxpy,
75+
cblas_sasum, cblas_saxpy, cblas_saxpby,
7676
cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
7777
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg,
7878
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
@@ -92,9 +92,9 @@
9292
cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy
9393
);
9494

95-
@cblasobjs = ( cblas_xerbla );
95+
@cblasobjs = ( cblas_xerbla );
9696

97-
@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
97+
@bfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
9898

9999
@exblasobjs = (
100100
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
@@ -415,7 +415,7 @@ zpotri,
415415
cgeqrt, cgeqrt2, cgeqrt3, cgemqrt,
416416
ctpqrt, ctpqrt2, ctpmqrt, ctprfb,
417417
);
418-
@lapack2objszc = (
418+
@lapackobjs2zc = (
419419
# ZCLASRC -- Double-single mixed precision complex routines called from
420420
# single, single-extra and double precision complex LAPACK
421421
# routines (i.e. from CLASRC, CXLASRC, ZLASRC).
@@ -425,7 +425,7 @@ zpotri,
425425
cpotrs,
426426
);
427427

428-
@lapack2objsd = (
428+
@lapackobjs2d = (
429429
# DLASRC -- Double precision real LAPACK routines
430430
# already provided by @lapackobjs:
431431
# dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri,
@@ -568,7 +568,7 @@ zpotri,
568568
);
569569
# functions added for lapack-3.6.0
570570

571-
@lapack2objsc = ( @lapack2objsc,
571+
@lapackobjs2c = ( @lapackobjs2c,
572572
cgejsv,
573573
cgesvdx,
574574
cgesvj,
@@ -604,7 +604,7 @@ zpotri,
604604
csyr2,
605605
cunm22,
606606
);
607-
@lapackobjs2d = (@lapack2objsd,
607+
@lapackobjs2d = (@lapackobjs2d,
608608
dbdsvdx,
609609
dgesvdx,
610610
dgetrf2,
@@ -637,7 +637,7 @@ zpotri,
637637
dpotrf2,
638638
dsecnd,
639639
);
640-
@lapack2objss = (@lapack2objss,
640+
@lapackobjs2s = (@lapackobjs2s,
641641
sbdsvdx,
642642
second,
643643
sgesvdx,
@@ -670,7 +670,7 @@ zpotri,
670670
sorm22,
671671
spotrf2,
672672
);
673-
@lapack2objsz = (@lapack2objsz,
673+
@lapackobjs2z = (@lapackobjs2z,
674674
zgejsv,
675675
zgesvdx,
676676
zgesvj,
@@ -707,7 +707,7 @@ zpotri,
707707
zunm22,
708708
);
709709
# functions added for lapack-3.7.0
710-
@lapack2objss = (@lapack2objss,
710+
@lapackobjs2s = (@lapackobjs2s,
711711
slarfy,
712712
strevc3,
713713
sgelqt,
@@ -726,7 +726,7 @@ zpotri,
726726
stplqt2,
727727
stpmlqt,
728728
);
729-
@lapack2objsd = (@lapack2objsd,
729+
@lapackobjs2d = (@lapackobjs2d,
730730
dlarfy,
731731
dsyconvf,
732732
dtrevc3,
@@ -746,7 +746,7 @@ zpotri,
746746
dtplqt2,
747747
dtpmlqt,
748748
);
749-
@lapack2objsc = (@lapack2objsc,
749+
@lapackobjs2c = (@lapackobjs2c,
750750
clarfy,
751751
csyconvf,
752752
ctrevc3,
@@ -766,7 +766,7 @@ zpotri,
766766
ctplqt2,
767767
ctpmlqt,
768768
);
769-
@lapack2objsz = (@lapack2objsz,
769+
@lapackobjs2z = (@lapackobjs2z,
770770
zlarfy,
771771
zsyconvf,
772772
ztrevc3,
@@ -786,7 +786,7 @@ zpotri,
786786
zlamswlq,
787787
zgemlq,
788788
);
789-
@lapack2objs = (@lapack2objs,
789+
@lapackobjs2 = (@lapackobjs2,
790790
sladiv1,
791791
dladiv1,
792792
iparam2stage,
@@ -796,21 +796,21 @@ zpotri,
796796
ilaenv2stage,
797797
);
798798
# functions added for lapack-3.9.0
799-
@lapack2objsc = (@lapack2objsc,
799+
@lapackobjs2c = (@lapackobjs2c,
800800
cgesvdq,
801801
cungtsqr,
802802
dcombssq,
803803
);
804-
@lapack2objsd = (@lapack2objsd,
804+
@lapackobjs2d = (@lapackobjs2d,
805805
dgesvdq,
806806
dorgtsqr,
807807
);
808-
@lapack2objss = (@lapack2objss,
808+
@lapackobjs2s = (@lapackobjs2s,
809809
scombssq,
810810
sgesvdq,
811811
sorgtsqr,
812812
);
813-
@lapack2objsz = (@lapack2objsz,
813+
@lapackobjs2z = (@lapackobjs2z,
814814
zgesvdq,
815815
zungtsqr
816816
);
@@ -835,10 +835,29 @@ zpotri,
835835
dlatzm, dtzrqf);
836836

837837
@lapack_deprecated_objss = (
838+
sgelsx,
838839
sgegs,
839-
sgegv,
840+
sgegv,
841+
sgeqpf,
842+
sggsvd,
843+
sggsvp,
844+
slahrd,
845+
slatzm,
846+
stzrqf
840847
);
841-
848+
849+
@lapack_deprecated_objsz = (
850+
zgegs,
851+
zgegv,
852+
zgelsx,
853+
zgeqpf,
854+
zggsvd,
855+
zggsvp,
856+
zlahrd,
857+
zlatzm,
858+
ztzrqf
859+
);
860+
842861
@lapacke_deprecated_objsc = (
843862
LAPACKE_cggsvp,
844863
LAPACKE_cggsvp_work,
@@ -3590,48 +3609,66 @@ use File::Basename;
35903609
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
35913610

35923611
if ($ARGV[12] == 1) {
3593-
@blasobjs = (@blasobjs, @halfblasobjs);
3594-
@cblasobjs = (@cblasobjs, @halfcblasobjs);
3612+
@blasobjs = (@blasobjs, @bfblasobjs);
3613+
@cblasobjs = (@cblasobjs, @bfcblasobjs);
35953614
}
35963615
if ($ARGV[13] == 1) {
35973616
@blasobjs = (@blasobjs, @blasobjss);
35983617
@cblasobjs = (@cblasobjs, @cblasobjss);
35993618
@lapackobjs = (@lapackobjs, @lapackobjss);
3600-
@lapack2objs = (@lapack2objs, @lapack2objss);
3619+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
3620+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
3621+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
3622+
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objss);
3623+
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objss);
36013624
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
36023625
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
3603-
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
36043626
}
36053627
if ($ARGV[14] == 1) {
36063628
@blasobjs = (@blasobjs, @blasobjsd);
36073629
@cblasobjs = (@cblasobjs, @cblasobjsd);
36083630
@lapackobjs = (@lapackobjs, @lapackobjsd);
3609-
@lapack2objs = (@lapack2objs, @lapack2objsd);
3631+
if ($ARGV[13] == 0) {
3632+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
3633+
}
3634+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d, @lapackobjs2dz);
3635+
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsd);
3636+
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsd);
36103637
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
36113638
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
3612-
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d);
36133639
}
36143640
if ($ARGV[15] == 1) {
36153641
@blasobjs = (@blasobjs, @blasobjsc);
36163642
@cblasobjs = (@cblasobjs, @cblasobjsc);
36173643
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc);
3618-
@cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsc);
3644+
@cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsc);
36193645
@lapackobjs = (@lapackobjs, @lapackobjsc);
3620-
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
3646+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2c, @lapackobjs2zc);
3647+
if ($ARGV[13] == 0) {
3648+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
3649+
}
3650+
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsc);
3651+
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsc);
36213652
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
36223653
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
3623-
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c);
36243654
}
36253655
if ($ARGV[16] == 1) {
36263656
@blasobjs = (@blasobjs, @blasobjsz);
36273657
@cblasobjs = (@cblasobjs, @cblasobjsz);
36283658
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz);
3629-
@cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsz);
3659+
@cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsz);
36303660
@lapackobjs = (@lapackobjs, @lapackobjsz);
3631-
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
3661+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2z);
3662+
if ($ARGV[15] == 0) {
3663+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2zc);
3664+
}
3665+
if ($ARGV[14] == 0) {
3666+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz);
3667+
}
3668+
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsz);
3669+
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsz);
36323670
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
36333671
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
3634-
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z);
36353672
}
36363673
if ($ARGV[8] == 1) {
36373674
#ONLY_CBLAS=1

kernel/power/ctrmm_kernel_8x4_power8.S

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 400
8685
#define STACKSIZE 592
8786
#define ALPHA_R_SP 304+192(SP)
8887
#define ALPHA_I_SP 312+192(SP)
8988
#else
90-
#define STACKSIZE 256
9189
#define STACKSIZE 452
9290
#define ALPHA_R_SP 224+196(SP)
9391
#define ALPHA_I_SP 232+196(SP)

kernel/power/dgemm_kernel_16x4_power8.S

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 320
8685
#define STACKSIZE 512
8786
#define ALPHA_SP 296+192(SP)
8887
#define FZERO 304+192(SP)
8988
#else
90-
#define STACKSIZE 240
9189
#define STACKSIZE 440
9290
#define ALPHA_SP 224+200(SP)
9391
#define FZERO 232+200(SP)

kernel/power/dtrmm_kernel_16x4_power8.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 320
8685
#define STACKSIZE 520
8786
#define ALPHA_SP 296+200(SP)
8887
#define FZERO 304+200(SP)

kernel/power/dtrsm_kernel_LT_16x4_power8.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
#endif
4848

4949
#ifdef __64BIT__
50-
#define STACKSIZE 320
5150
#define STACKSIZE 520
5251
#define ALPHA 296+200(SP)
5352
#define FZERO 304+200(SP)

kernel/power/strmm_kernel_16x8_power8.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 340
8685
#define STACKSIZE 540
8786
#define ALPHA_SP 296+200(SP)
8887
#define FZERO 304+200(SP)

0 commit comments

Comments
 (0)