import atlas-3.10.3-8.el8

This commit is contained in:
CentOS Sources 2020-11-03 06:50:33 -05:00 committed by Andrew Lukoshko
parent 15a17e6da2
commit fdf3ef1703
11 changed files with 1024 additions and 180 deletions

View File

@ -1,6 +1,9 @@
0e11ec19a521973eaa551954debd112c21479e9c SOURCES/ARMa732.tar.bz2
9398518fe55b4a544278237bc639656e04543c50 SOURCES/ARMv732NEON.tar.bz2
d2f7a62aacdc5091aaa673a311a23f521e5c6486 SOURCES/IBMz1264.tar.bz2
042c0b9df85a9a469e20cf0801f83b03ec40425d SOURCES/IBMz1364VXZ.tar.bz2
352e057319fa7503cd74a0ab81055dc286cc1c45 SOURCES/IBMz1464VXZ2.tar.bz2
0abb8f638b8ffdc13994d533d8a4febcab364f2f SOURCES/IBMz1564VXZ2.tar.bz2
b3ee9bca1510b11c6aa671ba5ba7dff8918ce0cf SOURCES/IBMz932.tar.bz2
43f8d8eaf8cc62bc4665df3550b77e95f3dced22 SOURCES/IBMz964.tar.bz2
c47ac6f00d7bf4ab882e71fa1ab894cc551c77b7 SOURCES/POWER332.tar.bz2

3
.gitignore vendored
View File

@ -1,6 +1,9 @@
SOURCES/ARMa732.tar.bz2
SOURCES/ARMv732NEON.tar.bz2
SOURCES/IBMz1264.tar.bz2
SOURCES/IBMz1364VXZ.tar.bz2
SOURCES/IBMz1464VXZ2.tar.bz2
SOURCES/IBMz1564VXZ2.tar.bz2
SOURCES/IBMz932.tar.bz2
SOURCES/IBMz964.tar.bz2
SOURCES/POWER332.tar.bz2

View File

@ -0,0 +1,30 @@
From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:03:52 +0100
Subject: [PATCH 1/8] Avoid c99 standard compiler
When probing for a usable GCC, the existing code already dropped path
names that contained "c89" or "c90", because these compilers don't have
the GCC extensions enabled. This patch also drops names with "c99" in
them.
---
CONFIG/src/atlconf_misc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c
index 63cb1ef..fb62214 100644
--- a/CONFIG/src/atlconf_misc.c
+++ b/CONFIG/src/atlconf_misc.c
@@ -824,7 +824,8 @@ int CompIsGcc(char *comp)
int i;
cmpname = NameWithoutPath(comp);
- if (strstr(cmpname, "c89") || strstr(cmpname, "c90"))
+ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") ||
+ strstr(cmpname, "c99"))
{
free(cmpname);
return(0);
--
2.23.0

View File

@ -0,0 +1,38 @@
From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:20:19 +0100
Subject: [PATCH 2/8] Fix -rpath-link command line options
The "-rpath-link" command line options were written in the wrong syntax,
causing errors in the build. This is fixed.
---
makes/Make.lib | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/makes/Make.lib b/makes/Make.lib
index 4ceff02..b322a32 100644
--- a/makes/Make.lib
+++ b/makes/Make.lib
@@ -47,11 +47,11 @@ cshared : fat_cshared
#
LDTRY_WIN:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \
+ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
- -Wl,"-rpath-link $(LIBINSTdir)" \
+ -Wl,"-rpath-link=$(LIBINSTdir)" \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
GCCTRY_norp_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
@@ -113,7 +113,7 @@ TRYALL_WIN :
#
LDTRY:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) \
+ -rpath-link=$(LIBINSTdir) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY:
$(GOODGCC) -shared -o $(outso).$(so_ver) \

View File

@ -0,0 +1,55 @@
From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 5 Dec 2018 18:59:15 +0100
Subject: [PATCH 3/8] Fix SIMD support on IBM z13
The header file atlas_simd.h contained a syntax error and a few functional
errors that affected IBM z13. It prevented any SIMD kernels from being
compiled successfully for that platform. This is fixed. The macro
vec_madd is avoided, because some GCC versions don't implement it
correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used
instead.
---
include/atlas_simd.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index baee6b1..68daf79 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -69,7 +69,7 @@
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_VXZ)
- #if ATL_VLEN != 2;
+ #if ATL_VLEN != 2
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_NEON)
@@ -390,19 +390,19 @@
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
#endif
- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vzero(v_) v_ = vec_splats((double)0.0)
#define ATL_vcopy(d_, s_) d_ = s_
- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_)))
#define ATL_vuld(v_, p_) ATL_vld(v_, p_)
#define ATL_vust(p_, v_) ATL_vst(p_, v_)
#define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
#define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_)
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
{ ATL_VTYPE t_;\
t_ = vec_splat(s0_, 1); \
- s0 += t_; \
+ s0_ += t_; \
}
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
--
2.23.0

View File

@ -0,0 +1,46 @@
From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 19:44:32 +0100
Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible
The probing of the L1 data cache size is sometimes not reliable. This can
cause the tuning to yield varying, sub-obtimal results. But on Linux the
L1 data cache size can usually be retrieved with sysconf instead, which is
faster and more reliable. Do this whenever possible.
---
tune/sysinfo/L1CacheSize.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c
index e62a273..dffa76e 100644
--- a/tune/sysinfo/L1CacheSize.c
+++ b/tune/sysinfo/L1CacheSize.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#define REPS 4096
@@ -276,7 +277,16 @@ int main(int nargs, char *args[])
exit(-1);
}
if (nargs > 1) MaxSize = atoi(args[1]);
- L1Size = GetL1Size(MaxSize, 1.08);
+
+#ifdef _SC_LEVEL1_DCACHE_SIZE
+ {
+ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE);
+ L1Size = res > 0 ? (int) (res / 1024) : 0;
+ }
+#endif
+
+ if (!L1Size)
+ L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
--
2.23.0

View File

@ -0,0 +1,68 @@
From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 20:06:27 +0100
Subject: [PATCH 5/8] Optimizations for IBM z13
Perform some optimizations for IBM z13:
- Compile with -O2 instead of -O.
- Streamline vector loads/stores.
- Define the vvrsum2 macro.
Also, use the compile option -march=z13 instead of -march=native.
---
CONFIG/src/atlcomp.txt | 8 +++-----
include/atlas_simd.h | 11 +++++------
2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index aa31604..2ac71cf 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77
'gfortran' '-O3 -funroll-loops'
MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-O3 -funroll-loops'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc
- 'gcc' '-march=native -O -mvx -mzvector'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc
- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2'
+MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
- 'gfortran' '-march=native -O -mvx -mzvector'
+ 'gfortran' '-march=z13 -mtune=z13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index 68daf79..f171933 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -384,8 +384,8 @@
#endif
#define ATL_VTYPE vector double
#if (defined(DREAL) || defined(DCPLX))
- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; }
- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];}
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
#else
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
@@ -400,10 +400,9 @@
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
#define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
- { ATL_VTYPE t_;\
- t_ = vec_splat(s0_, 1); \
- s0_ += t_; \
- }
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX))
--
2.23.0

View File

@ -0,0 +1,276 @@
From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 25 Mar 2020 20:11:19 +0100
Subject: [PATCH 6/8] Add IBM z14 support
Add general support for IBM z14. Also detect and handle the vector
enhancements facility 1, which specifically adds single-precision FP
arithmetic for vectors.
---
CONFIG/include/atlconf.h | 14 ++++----
CONFIG/src/Makefile | 6 ++++
CONFIG/src/atlcomp.txt | 4 +++
CONFIG/src/backend/Make.ext | 4 ++-
CONFIG/src/backend/archinfo_linux.c | 3 +-
CONFIG/src/backend/probe_vxz2.c | 12 +++++++
CONFIG/src/probe_comp.c | 3 +-
include/atlas_prefetch.h | 3 +-
include/atlas_simd.h | 53 +++++++++++++++++++++++++++++
9 files changed, 91 insertions(+), 11 deletions(-)
create mode 100644 CONFIG/src/backend/probe_vxz2.c
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index e51d56d..3828fdb 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 62
+#define NMACH 63
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase};
enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort};
enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr};
-#define NISA 15
+#define NISA 16
static char *ISAXNAM[NISA] =
- {"", "VSX", "VXZ", "AltiVec",
+ {"", "VSX", "VXZ2", "VXZ", "AltiVec",
"AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow",
"FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"};
enum ISAEXT
- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV,
+ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV,
ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow,
ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC};
diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile
index 212b9d7..782a4cf 100644
--- a/CONFIG/src/Makefile
+++ b/CONFIG/src/Makefile
@@ -158,6 +158,12 @@ IRun_NEON :
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \
redir=config0.out
- cat config0.out
+IRun_VXZ2 :
+ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \
+ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \
+ redir=config0.out
+ - cat config0.out
IRun_VXZ :
$(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \
$(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2ac71cf..2cfacc2 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z13 -mtune=z13 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=z14 -mtune=z14 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext
index 4743353..794babf 100644
--- a/CONFIG/src/backend/Make.ext
+++ b/CONFIG/src/backend/Make.ext
@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \
probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \
probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \
probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \
- probe_vxz.c
+ probe_vxz2.c probe_vxz.c
all : $(files)
@@ -107,6 +107,8 @@ flibchkF.f : $(basf)
$(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f
probe_arm32_FPABI.c : $(basf)
$(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI
+probe_vxz2.c : $(basf)
+ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2
probe_vxz.c : $(basf)
$(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz
probe_aff_SETAFFNP.c : $(basf)
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index cdcee92..ed6f476 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196;
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */
+ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
break;
diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c
new file mode 100644
index 0000000..a69d92d
--- /dev/null
+++ b/CONFIG/src/backend/probe_vxz2.c
@@ -0,0 +1,12 @@
+#include <vecintrin.h>
+void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y
+{
+ vector float vx, vy;
+ vx = (vector float) {x[0], x[1], x[2], x[3]};
+ vy = (vector float) {y[0], y[1], y[2], y[3]};
+ vy += vx;
+ z[0] = vy[0];
+ z[1] = vy[1];
+ z[2] = vy[2];
+ z[3] = vy[3];
+}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 1652e24..857ea82 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb,
vp = "-mavx2 -mfma";
else if (vecexts & (1<<ISA_VSX))
vp = "-mvsx";
- else if (vecexts & (1<<ISA_VXZ))
+ else if ((vecexts & (1<<ISA_VXZ)) || (vecexts & (1<<ISA_VXZ2)))
vp = "-mvx -mzvector";
else if (vecexts & (1<<ISA_AV))
vp = "-maltivec";
@@ -1207,6 +1207,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
{
case IbmZ12:
case IbmZ13:
+ case IbmZ14:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index e7988a7..fa426ac 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -155,7 +155,8 @@
#define ATL_L1LS 32
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
- defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13)
+ defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
+ defined(ATL_ARCH_IbmZ14)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index f171933..eb75577 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -68,6 +68,11 @@
((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
#define ATL_FRCGNUVEC
#endif
+ #elif defined(ATL_VXZ2)
+ #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \
+ ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
+ #define ATL_FRCGNUVEC
+ #endif
#elif defined(ATL_VXZ)
#if ATL_VLEN != 2
#define ATL_FRCGNUVEC
@@ -113,6 +118,12 @@
#else
#define ATL_VLEN 2
#endif
+ #elif defined(ATL_VXZ2)
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VLEN 4
+ #else
+ #define ATL_VLEN 2
+ #endif
#elif defined(ATL_VXZ)
#define ATL_VLEN 2
#elif defined(ATL_NEON)
@@ -376,6 +387,48 @@
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#endif
+#elif defined(ATL_VXZ2)
+ #include <vecintrin.h>
+
+ #define ATL_VPERMI(s_, t_, i_) \
+ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_))
+
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VTYPE vector float
+ #if ATL_VLEN != 4
+ #error "VSXZ2 supports only VLEN = 4 for floats!"
+ #endif
+ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \
+ { ATL_VTYPE t0_, t1_; \
+ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \
+ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \
+ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \
+ }
+ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2)
+ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3)
+ #else /* double precision */
+ #define ATL_VTYPE vector double
+ #if ATL_VLEN != 2
+ #error "VSXZ2 supports only VLEN = 2 for doubles!"
+ #endif
+ #define ATL_vvrsum1(s0_) \
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
+ #endif
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
+ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vcopy(d_, s_) d_ = s_
+ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_))
+ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_))
+ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
+ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
+ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
+ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
+ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_VXZ)
#include <vecintrin.h>
--
2.23.0

View File

@ -0,0 +1,265 @@
From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 29 May 2019 17:51:34 +0200
Subject: [PATCH 7/8] Enable "cross-compile"
This adds support for building ATLAS without running any target code. In
order for this to work, the archdefs must contain some additional files
that would otherwise be built during various tuning steps; see the new
targets extra_get and extra_put in "CONFIG/ARCHS/Makefile".
Even if the archdefs contain these additional files, cross compilation
is *not* automatically enabled. To activate it and disable tuning at
build time, add the option "-Si archdef 2" when running "configure".
---
CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++
bin/atlas_install.c | 2 ++
makes/Make.aux | 10 +++++-----
makes/Make.bin | 22 ++++++++++++++++++++++
makes/Make.l3tune | 6 ++++++
makes/Make.sysinfo | 8 +++++++-
6 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile
index 321e05c..e61b5a0 100644
--- a/CONFIG/ARCHS/Makefile
+++ b/CONFIG/ARCHS/Makefile
@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt
- cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/.
rm -f xnegflt
archput : sys_put kern_put gemm_put la_put
+
+ifdef ATL_NOTUNE
+
+# To avoid tuning, some extra files are needed.
+
+extra_get :
+ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/
+
+extra_put :
+ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/.
+
+ArchNew : extra_get
+
+archput : extra_put
+
+endif
diff --git a/bin/atlas_install.c b/bin/atlas_install.c
index de3eb3a..3c811e6 100644
--- a/bin/atlas_install.c
+++ b/bin/atlas_install.c
@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA)
ATL_Cassert(system("make IBozoL1.grd\n")==0,
"USING BOZO L1 DEFAULTS", NULL);
}
+ if (ARCHDEF >= 2)
+ setenv("ATL_NOTUNE", "1", 1);
if (ARCHDEF)
DefInstall = !system("make IArchDef.grd\n");
diff --git a/makes/Make.aux b/makes/Make.aux
index 1f769c8..c793028 100644
--- a/makes/Make.aux
+++ b/makes/Make.aux
@@ -113,23 +113,23 @@ clean :
$(ATLFWAIT) :
cd $(BINdir) ; $(MAKE) xatlas_waitfile
-$(INCAdir)/atlas_type.h : $(ATLFWAIT)
+$(INCAdir)/atlas_type.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h
$(ATLFWAIT) -f $(INCAdir)/atlas_type.h
sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h
dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h
cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h
zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h
diff --git a/makes/Make.bin b/makes/Make.bin
index 1035cb9..acad578 100644
--- a/makes/Make.bin
+++ b/makes/Make.bin
@@ -163,7 +163,9 @@ IRunMADef :
cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre)
IRunMMDef :
+ifndef ATL_NOTUNE
cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre)
+endif
cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2
cd $(MMTdir) ; $(MAKE) install pre=$(pre)
IKillL1 : force_build
@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \
cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/.
$(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r2install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre)
+endif
$(R1Tdir)/res/$(pre)R1K.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r1install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum
cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/.
INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \
$(R1Tdir)/res/$(pre)R2K.sum
cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/.
+ifndef ATL_NOTUNE
cd $(R1Tdir) ; $(MAKE) $(pre)nxtune
+else
+ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib
+endif
$(MVTdir)/res/$(pre)MVNK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum
cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/.
$(MVTdir)/res/$(pre)MVTK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum
cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/.
diff --git a/makes/Make.l3tune b/makes/Make.l3tune
index eaf7d7d..cd7f5f1 100644
--- a/makes/Make.l3tune
+++ b/makes/Make.l3tune
@@ -118,6 +118,7 @@ res/atlas_strsmXover.h :
cp $(strsmXover) res/.
stsmfc :
+ifndef ATL_NOTUNE
rm -f $(strsmXover)
cd $(L3Bdir) ; $(MAKE) slib
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \
@@ -128,6 +129,7 @@ stsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) slib
dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h
@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h :
cp $(dtrsmXover) res/.
dtsmfc :
+ifndef ATL_NOTUNE
rm -f $(dtrsmXover)
cd $(L3Bdir) ; $(MAKE) dlib
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \
@@ -148,6 +151,7 @@ dtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) dlib
qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h
@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h :
cp $(qtrsmXover) res/.
qtsmfc :
+ifndef ATL_NOTUNE
rm -f $(qtrsmXover)
cd $(L3Bdir) ; $(MAKE) qlib
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \
@@ -168,6 +173,7 @@ qtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) qlib
$(pre)tsmfc.o : force_build
diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo
index 2b7dfdc..8e5dab2 100644
--- a/makes/Make.sysinfo
+++ b/makes/Make.sysinfo
@@ -5,6 +5,7 @@ maxlat=6
mflop=200
flags=
+ifndef ATL_NOTUNE
sTestFlags : force_build
$(MAKE) srbob `cat res/sBEST` pre='s' type=float
@@ -85,12 +86,14 @@ RunLamch : xemit_lamch
cp res/atlas_?lamch.h $(INCAdir)/.
RunTyp: xemit_typ
$(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h
+endif
xemit_buildinfo : emit_buildinfo.o
$(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o
xsyssum : GetSysSum.o
$(XCC) $(XCCFLAGS) -o $@ GetSysSum.o
+ifndef ATL_NOTUNE
xL1 : time.o L1CacheSize.o
$(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o
@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c
$(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c
xmasrch : $(mySRCdir)/masrch.c
$(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c
+endif
ATL_cputime.c :
cp $(mySRCdir)/ATL_cputime.c .
@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c
GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c
+
+ifndef ATL_NOTUNE
time.o : $(mySRCdir)/time.c
$(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c
emit_lamch.o : $(mySRCdir)/emit_lamch.c
@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c
tlb.o : $(mySRCdir)/tlb.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c
-
+endif
force_build :
--
2.23.0

View File

@ -0,0 +1,105 @@
From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Thu, 26 Mar 2020 17:14:49 +0100
Subject: [PATCH 8/8] Add IBM z15 support
Add support for specifying "IBMz15" as target architecture.
---
CONFIG/include/atlconf.h | 8 ++++----
CONFIG/src/atlcomp.txt | 4 ++++
CONFIG/src/backend/archinfo_linux.c | 1 +
CONFIG/src/probe_comp.c | 1 +
include/atlas_prefetch.h | 2 +-
5 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index 3828fdb..382601f 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 63
+#define NMACH 64
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15,
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2cfacc2..acb2c83 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z14 -mtune=z14 -O2'
MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=arch13 -mtune=arch13 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=arch13 -mtune=arch13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index ed6f476..934a005 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15;
else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 857ea82..88bb25e 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
case IbmZ12:
case IbmZ13:
case IbmZ14:
+ case IbmZ15:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index fa426ac..583f19d 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -156,7 +156,7 @@
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
- defined(ATL_ARCH_IbmZ14)
+ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
--
2.23.0

View File

@ -5,7 +5,7 @@ Version: 3.10.3
%if "%{?enable_native_atlas}" != "0"
%define dist .native
%endif
Release: 7%{?dist}
Release: 8%{?dist}
Summary: Automatically Tuned Linear Algebra Software
Group: System Environment/Libraries
@ -31,6 +31,11 @@ Source14: ARMv732NEON.tar.bz2
Source15: IBMz1264.tar.bz2
Source16: ARMa732.tar.bz2
#provided by IBM (3.10.3-8)
Source17: IBMz1364VXZ.tar.bz2
Source18: IBMz1464VXZ2.tar.bz2
Source19: IBMz1564VXZ2.tar.bz2
# Properly pass -melf_* to the linker with -Wl, fixes FTBFS bug 817552
# https://sourceforge.net/tracker/?func=detail&atid=379484&aid=3555789&group_id=23725
Patch3: atlas-melf.patch
@ -47,7 +52,18 @@ Patch9: atlas.3.10.1-unbundle.patch
# Atlas getri patch (covscan)
Patch10: atlas-getri.patch
BuildRequires: gcc-gfortran, lapack-static, gcc
# Atlas patches dealing with z{13,14,15} support and crosscompilation provided by IBM
Patch11: 0001-Avoid-c99-standard-compiler.patch
Patch12: 0002-Fix-rpath-link-command-line-options.patch
Patch13: 0003-Fix-SIMD-support-on-IBM-z13.patch
Patch14: 0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch
Patch15: 0005-Optimizations-for-IBM-z13.patch
Patch16: 0006-Add-IBM-z14-support.patch
Patch17: 0007-Enable-cross-compile.patch
Patch18: 0008-Add-IBM-z15-support.patch
BuildRequires: gcc-gfortran, lapack-static, gcc, lapack-devel
%ifarch x86_64
Obsoletes: atlas-sse3 < 3.10.3-1
@ -61,8 +77,8 @@ Obsoletes: atlas-sse3 < 3.10.3-1
%endif
%ifarch s390 s390x
#Obsoletes: atlas-z10 < 3.10
#Obsoletes: atlas-z196 < 3.10
Obsoletes: atlas-z10 < 3.10.3-8
Obsoletes: atlas-z196 < 3.10.3-8
%endif
@ -100,6 +116,13 @@ Obsoletes: atlas-sse-devel < 3.10.3-1
Obsoletes: atlas-sse2-devel < 3.10.3-1
Obsoletes: atlas-sse3-devel < 3.10.3-1
%endif
%ifarch s390 s390x
Obsoletes: atlas-z10-devel < 3.10.3-8
Obsoletes: atlas-z196-devel < 3.10.3-8
%endif
%description devel
This package contains headers for development with ATLAS
(Automatically Tuned Linear Algebra Software).
@ -121,6 +144,11 @@ Obsoletes: atlas-sse-static < 3.10.3-1
Obsoletes: atlas-sse2-static < 3.10.3-1
Obsoletes: atlas-sse3-static < 3.10.3-1
%endif
%ifarch s390 s390x
Obsoletes: atlas-z10-static < 3.10.3-8
Obsoletes: atlas-z196-static < 3.10.3-8
%endif
%description static
This package contains static version of ATLAS (Automatically Tuned
Linear Algebra Software).
@ -133,8 +161,6 @@ Linear Algebra Software).
#
%ifarch x86_64
%define types base corei2
#corei4
# sse3
%package corei2-static
Summary: ATLAS libraries for Corei2 (Ivy/Sandy bridge) CPUs
@ -169,114 +195,49 @@ optimizations for the corei2 (Ivy/Sandy bridge) CPUs.
%endif
%ifarch %{ix86}
%define types base
#corei1
#%package corei1
#Summary: ATLAS libraries for Corei1 (Nehalem/Westmere) CPUs
#Group: System Environment/Libraries
#%description corei1
#This package contains ATLAS (Automatically Tuned Linear Algebra Software)
#shared libraries compiled with optimizations for the Corei1 (Nehalem/Westmere) CPUs.
#The base ATLAS builds for the ix86 architecture are made for PIII CPUs.
#%package corei1-devel
#Summary: Development libraries for ATLAS for Corei1 (Nehalem/Westmere) CPUs
#Group: Development/Libraries
#Requires: %{name}-corei1 = %{version}-%{release}
#Obsoletes: %name-header <= %version-%release
#Requires(posttrans): chkconfig
#Requires(postun): chkconfig
#%description corei1-devel
#This package contains shared and static versions of the ATLAS
#(Automatically Tuned Linear Algebra Software) libraries compiled with
#optimizations for the corei1 (Nehalem/Westmere) CPUs.
#%package corei1-static
#Summary: Static libraries for ATLAS for Corei1 (/Nehalem/Westmere) CPUs
#Group: Development/Libraries
#Requires: %{name}-corei1-devel = %{version}-%{release}
#Requires(posttrans): chkconfig
#Requires(postun): chkconfig
#%description corei1-static
#This package contains the ATLAS (Automatically Tuned Linear Algebra
#Software) static libraries compiled with optimizations for the Corei1 (Nehalem/Westemere)
#CPUs. The base ATLAS builds for the ix86 architecture are made for the PIII CPUs.
%define types base
%endif
%ifarch s390 s390x
%define types base z196 z10
%define types base z14
%package z196
Summary: ATLAS libraries for z196
#z14
%package z14
Summary: ATLAS libraries for z14
Group: System Environment/Libraries
%description z196
This package contains the ATLAS (Automatically Tuned Linear Algebra
Software) libraries compiled with optimizations for the z196.
%description z14
This package contains ATLAS (Automatically Tuned Linear Algebra Software)
shared libraries compiled with optimizations for the z14 CPUs.
%package z196-devel
Summary: Development libraries for ATLAS for z196
%package z14-devel
Summary: Development libraries for ATLAS for z14
Group: Development/Libraries
Requires: %{name}-z196 = %{version}-%{release}
Obsoletes: %name-z196-header <= %version-%release
Requires(posttrans): chkconfig
Requires(postun): chkconfig
%description z196-devel
This package contains headers and shared versions of the ATLAS
(Automatically Tuned Linear Algebra Software) libraries compiled with
optimizations for the z196 architecture.
%package z196-static
Summary: Static libraries for ATLAS
Group: Development/Libraries
Requires: %{name}-z196-devel = %{version}-%{release}
Requires(posttrans): chkconfig
Requires(postun): chkconfig
%description z196-static
This package contains static version of ATLAS (Automatically Tuned
Linear Algebra Software) for the z196 architecture.
%package z10
Summary: ATLAS libraries for z10
Group: System Environment/Libraries
%description z10
This package contains the ATLAS (Automatically Tuned Linear Algebra
Software) libraries compiled with optimizations for the z10.
%package z10-devel
Summary: Development libraries for ATLAS for z10
Group: Development/Libraries
Requires: %{name}-z10 = %{version}-%{release}
Requires: %{name}-z14 = %{version}-%{release}
Obsoletes: %name-header <= %version-%release
Requires(posttrans): chkconfig
Requires(postun): chkconfig
%description z10-devel
This package contains headers and shared versions of the ATLAS
%description z14-devel
This package contains shared and static versions of the ATLAS
(Automatically Tuned Linear Algebra Software) libraries compiled with
optimizations for the z10 architecture.
optimizations for the z14 CPUs.
%package z10-static
Summary: Static libraries for ATLAS
%package z14-static
Summary: Static libraries for ATLAS for z14
Group: Development/Libraries
Requires: %{name}-devel = %{version}-%{release}
Requires: %{name}-z14-devel = %{version}-%{release}
Requires(posttrans): chkconfig
Requires(postun): chkconfig
%description z10-static
This package contains static version of ATLAS (Automatically Tuned
Linear Algebra Software) for the z10 architecture.
%description z14-static
This package contains the ATLAS (Automatically Tuned Linear Algebra
Software) static libraries compiled with optimizations for the z14
CPUs.
#z15
%endif
@ -356,7 +317,6 @@ CPUs. The base ATLAS builds for the ppc64 architecture are made for the Power 5
%endif
%prep
#cat /proc/cpuinfo
%setup -q -n ATLAS
#patch0 -p0 -b .shared
#arm patch not applicable, probably not needed
@ -373,6 +333,19 @@ CPUs. The base ATLAS builds for the ppc64 architecture are made for the Power 5
%patch9 -p1 -b .unbundle
%patch10 -p1 -b .getri
#IBM patches
%ifarch s390x s390
%patch11 -p1
%patch12 -p1
%patch13 -p1
%patch14 -p1
%patch15 -p1
%patch16 -p1
%patch17 -p1
#As of rhel 8.3, z15 is not supported by the gcc
#%patch18 -p1
%endif
cp %{SOURCE1} CONFIG/ARCHS/
#cp %{SOURCE2} CONFIG/ARCHS/
cp %{SOURCE3} doc
@ -382,6 +355,13 @@ cp %{SOURCE13} CONFIG/ARCHS/
cp %{SOURCE14} CONFIG/ARCHS/
cp %{SOURCE15} CONFIG/ARCHS/
cp %{SOURCE16} CONFIG/ARCHS/
#z13
cp %{SOURCE17} CONFIG/ARCHS/
#z14
cp %{SOURCE18} CONFIG/ARCHS/
#z15
cp %{SOURCE19} CONFIG/ARCHS/
#cp %{SOURCE8} CONFIG/ARCHS/
#cp %{SOURCE9} CONFIG/ARCHS/
@ -400,7 +380,7 @@ mkdir lapacklib
cd lapacklib
ar x %{_libdir}/liblapack_pic.a
# Remove functions that have ATLAS implementations
rm -f cgelqf.o cgels.o cgeqlf.o cgeqrf.o cgerqf.o cgesv.o cgetrf.o cgetri.o cgetrs.o clarfb.o clarft.o clauum.o cposv.o cpotrf.o cpotri.o cpotrs.o ctrtri.o dgelqf.o dgels.o dgeqlf.o dgeqrf.o dgerqf.o dgesv.o dgetrf.o dgetri.o dgetrs.o dlamch.o dlarfb.o dlarft.o dlauum.o dposv.o dpotrf.o dpotri.o dpotrs.o dtrtri.o ieeeck.o ilaenv.o lsame.o sgelqf.o sgels.o sgeqlf.o sgeqrf.o sgerqf.o sgesv.o sgetrf.o sgetri.o sgetrs.o slamch.o slarfb.o slarft.o slauum.o sposv.o spotrf.o spotri.o spotrs.o strtri.o xerbla.o zgelqf.o zgels.o zgeqlf.o zgeqrf.o zgerqf.o zgesv.o zgetrf.o zgetri.o zgetrs.o zlarfb.o zlarft.o zlauum.o zposv.o zpotrf.o zpotri.o zpotrs.o ztrtri.o
rm -f cgelqf.o cgels.o cgeqlf.o cgeqrf.o cgerqf.o cgesv.o cgetrf.o cgetri.o cgetrs.o clarfb.o clarft.o clauum.o cposv.o cpotrf.o cpotri.o cpotrs.o ctrtri.o dgelqf.o dgels.o dgeqlf.o dgeqrf.o dgerqf.o dgesv.o dgetrf.o dgetri.o dgetrs.o dlamch.o dlarfb.o dlarft.o dlauum.o dposv.o dpotrf.o dpotri.o dpotrs.o dtrtri.o ieeeck.o ilaenv.o lsame.o sgelqf.o sgels.o sgeqlf.o sgeqrf.o sgerqf.o sgesv.o sgetrf.o sgetri.o sgetrs.o slamch.o slarfb.o slarft.o slauum.o sposv.o spotrf.o spotri.o spotrs.o strtri.o xerbla.o zgelqf.o zgels.o zgeqlf.o zgeqrf.o zgerqf.o zgesv.o zgetrf.o zgetri.o zgetrs.o zlarfb.o zlarft.o zlauum.o zposv.o zpotrf.o zpotri.o zpotrs.o ztrtri.o
# Create new library
ar rcs ../liblapack_pic_pruned.a *.o
cd ..
@ -420,9 +400,9 @@ p=$(pwd)
%define threads_option "-t 2"
#Target architectures for the 'base' versions
%ifarch s390x
%ifarch s390x
%define flags %{nil}
%define base_options "-A IBMz9 -V 1"
%define base_options "-A IBMz13 -V 8 -Si archdef 2"
%endif
%ifarch x86_64
@ -467,10 +447,11 @@ p=$(pwd)
%endif
for type in %{types}; do
if [ "$type" = "base" ]; then
libname=atlas
arg_options=%{base_options}
thread_options=%{threads_option}
thread_options=%{threads_option}
%define pr_base %(echo $((%{__isa_bits}+0)))
else
libname=atlas-${type}
@ -481,12 +462,15 @@ for type in %{types}; do
elif [ "$type" = "corei1" ]; then
arg_options="-A Corei1 -V 896"
%define pr_corei1 %(echo $((%{__isa_bits}+2)))
elif [ "$type" = "z10" ]; then
arg_options="-A IBMz10 -V 1"
%define pr_z10 %(echo $((%{__isa_bits}+2)))
elif [ "$type" = "z196" ]; then
arg_options="-A IBMz196 -V 1"
%define pr_z196 %(echo $((%{__isa_bits}+4)))
elif [ "$type" = "z14" ]; then
thread_options="-t 4"
arg_options="-A IBMz14 -V 4 -Si archdef 2"
%define pr_z14 %(echo $((%{__isa_bits}+2)))
#gcc in rhel 8.3 does not support z15, z15 subpackage is thus not being build/shipped
elif [ "$type" = "z15" ]; then
thread_options="-t 0"
arg_options="-A IBMz15 -V 4 -Si archdef 2"
%define pr_z15 %(echo $((%{__isa_bits}+4)))
elif [ "$type" = "power7" ]; then
thread_options="-t 4"
arg_options="-A POWER7 -V 1"
@ -497,12 +481,13 @@ for type in %{types}; do
%define pr_power8 %(echo $((%{__isa_bits}+4)))
fi
fi
mkdir -p %{_arch}_${type}
pushd %{_arch}_${type}
../configure %{mode} $thread_options $arg_options -D c -DWALL -Fa alg '%{flags} -D_FORTIFY_SOURCE=2 -g -Wa,--noexecstack,--generate-missing-build-notes=yes -fstack-protector-strong -fstack-clash-protection -fPIC -fplugin=annobin -Wl,-z,now'\
../configure %{mode} $thread_options $arg_options -D c -DWALL -F xc ' ' -Fa alg '%{flags} -D_FORTIFY_SOURCE=2 -g -Wa,--noexecstack,--generate-missing-build-notes=yes -fstack-protector-strong -fstack-clash-protection -fPIC -fplugin=annobin -Wl,-z,now'\
--prefix=%{buildroot}%{_prefix} \
--incdir=%{buildroot}%{_includedir} \
--libdir=%{buildroot}%{_libdir}/${libname}
--libdir=%{buildroot}%{_libdir}/${libname}
#--with-netlib-lapack-tarfile=%{SOURCE10}
#matches both SLAPACK and SSLAPACK
@ -526,7 +511,7 @@ for type in %{types}; do
popd
done
%install
%install
for type in %{types}; do
pushd %{_arch}_${type}
make DESTDIR=%{buildroot} install
@ -567,20 +552,18 @@ mkdir -p %{buildroot}%{_includedir}/atlas
%check
# Run make check but don't fail the build on these arches
#%ifarch s390 aarch64 ppc64
#for type in %{types}; do
# pushd %{_arch}_${type}
# make check ptcheck
# popd
#done
#%else
# Run the check only for the z13. z14/z15 may fail due to illegal instrucitons...
for type in %{types}; do
pushd %{_arch}_${type}
make check ptcheck
popd
if [ "$type" = "z14" ] || [ "$type" = "z15" ]; then
# skip the tests (may fail due to illegal instructions).
echo "Skipping tests for the $type subpackage"
else
pushd %{_arch}_${type}
make check ptcheck
popd
fi
done
#%endif
%post -p /sbin/ldconfig
@ -632,31 +615,19 @@ fi
%ifarch s390 s390x
%post -n atlas-z10 -p /sbin/ldconfig
%post -n atlas-z14 -p /sbin/ldconfig
%postun -n atlas-z10 -p /sbin/ldconfig
%postun -n atlas-z14 -p /sbin/ldconfig
%posttrans z10-devel
%posttrans z14-devel
/usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \
%{_includedir}/atlas-%{_arch}-z10 %{pr_z10}
%{_includedir}/atlas-%{_arch}-z14 %{pr_z14}
%postun z10-devel
%postun z14-devel
if [ $1 -ge 0 ] ; then
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z10
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z14
fi
%post -n atlas-z196 -p /sbin/ldconfig
%postun -n atlas-z196 -p /sbin/ldconfig
%posttrans z196-devel
/usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \
%{_includedir}/atlas-%{_arch}-z196 %{pr_z196}
%postun z196-devel
if [ $1 -ge 0 ] ; then
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z196
fi
%endif
@ -674,7 +645,7 @@ fi
if [ $1 -ge 0 ] ; then
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-power7
fi
%post -n atlas-ppc8 -p /sbin/ldconfig
%postun -n atlas-ppc8 -p /sbin/ldconfig
@ -767,61 +738,45 @@ fi
%ifarch %{ix86}
#%files corei1
#%doc doc/README.dist
#%dir %{_libdir}/atlas-corei1
#%{_libdir}/atlas-corei1/*.so.*
#%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-corei1.conf
#%files corei1-devel
#%doc doc
#%{_libdir}/atlas-corei1/*.so
#%{_includedir}/atlas-%{_arch}-corei1/
#%{_includedir}/*.h
#%ghost %{_includedir}/atlas
#%files corei1-static
#%{_libdir}/atlas-corei1/*.a
%endif
%ifarch s390 s390x
%files z10
%doc doc/README.dist
%dir %{_libdir}/atlas-z10
%{_libdir}/atlas-z10/*.so.*
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z10.conf
%files z10-devel
%files z14
%doc doc/README.dist
%dir %{_libdir}/atlas-z14
%{_libdir}/atlas-z14/*.so.*
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z14.conf
%files z14-devel
%doc doc
%{_libdir}/atlas-z10/*.so
%{_includedir}/atlas-%{_arch}-z10/
%{_libdir}/atlas-z14/*.so
%{_includedir}/atlas-%{_arch}-z14/
%{_includedir}/*.h
%ghost %{_includedir}/atlas
%files z10-static
%{_libdir}/atlas-z10/*.a
%files z196
%doc doc/README.dist
%dir %{_libdir}/atlas-z196
%{_libdir}/atlas-z196/*.so.*
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z196.conf
%files z196-devel
%doc doc
%{_libdir}/atlas-z196/*.so
%{_includedir}/atlas-%{_arch}-z196/
%{_includedir}/*.h
%ghost %{_includedir}/atlas
%files z196-static
%{_libdir}/atlas-z196/*.a
%files z14-static
%{_libdir}/atlas-z14/*.a
%endif
#enable_native_atlas if
%endif
%changelog
* Tue Jun 2 2020 Jakub Martisko <jamartis@redhat.com> - 3.10.3-8
* Update the s390 subpackages:
- The base pakcage is now optimized to z13
- New subpackage is introduced: z14
- All remaining subpackages for s390 are being deprecated
- Clean up of the spec file
- Some commented out subpackages were removed form the spec
- These subpackages were not built/shipped -> no change from the users point of view
Resolves: #1782560
Resolves: #1780286
Resolves: #1782561
* Mon Jun 10 2019 Jakub Martisko <jamartis@redhat.com> - 3.10.3-7
* Fix covscan related issues (getri function)
Related: #1602445