import atlas-3.10.3-8.el8
This commit is contained in:
commit
7fd11a7fe7
11
.atlas.metadata
Normal file
11
.atlas.metadata
Normal file
@ -0,0 +1,11 @@
|
||||
0e11ec19a521973eaa551954debd112c21479e9c SOURCES/ARMa732.tar.bz2
|
||||
9398518fe55b4a544278237bc639656e04543c50 SOURCES/ARMv732NEON.tar.bz2
|
||||
d2f7a62aacdc5091aaa673a311a23f521e5c6486 SOURCES/IBMz1264.tar.bz2
|
||||
042c0b9df85a9a469e20cf0801f83b03ec40425d SOURCES/IBMz1364VXZ.tar.bz2
|
||||
352e057319fa7503cd74a0ab81055dc286cc1c45 SOURCES/IBMz1464VXZ2.tar.bz2
|
||||
0abb8f638b8ffdc13994d533d8a4febcab364f2f SOURCES/IBMz1564VXZ2.tar.bz2
|
||||
b3ee9bca1510b11c6aa671ba5ba7dff8918ce0cf SOURCES/IBMz932.tar.bz2
|
||||
43f8d8eaf8cc62bc4665df3550b77e95f3dced22 SOURCES/IBMz964.tar.bz2
|
||||
c47ac6f00d7bf4ab882e71fa1ab894cc551c77b7 SOURCES/POWER332.tar.bz2
|
||||
85c00d3190abbe250d46472824b17d9164e3dfc2 SOURCES/PPRO32.tgz
|
||||
337eef1167030a9440ea645ce0037abfd4b0be4e SOURCES/atlas3.10.3.tar.bz2
|
11
.gitignore
vendored
Normal file
11
.gitignore
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
SOURCES/ARMa732.tar.bz2
|
||||
SOURCES/ARMv732NEON.tar.bz2
|
||||
SOURCES/IBMz1264.tar.bz2
|
||||
SOURCES/IBMz1364VXZ.tar.bz2
|
||||
SOURCES/IBMz1464VXZ2.tar.bz2
|
||||
SOURCES/IBMz1564VXZ2.tar.bz2
|
||||
SOURCES/IBMz932.tar.bz2
|
||||
SOURCES/IBMz964.tar.bz2
|
||||
SOURCES/POWER332.tar.bz2
|
||||
SOURCES/PPRO32.tgz
|
||||
SOURCES/atlas3.10.3.tar.bz2
|
30
SOURCES/0001-Avoid-c99-standard-compiler.patch
Normal file
30
SOURCES/0001-Avoid-c99-standard-compiler.patch
Normal file
@ -0,0 +1,30 @@
|
||||
From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Tue, 19 Feb 2019 19:03:52 +0100
|
||||
Subject: [PATCH 1/8] Avoid c99 standard compiler
|
||||
|
||||
When probing for a usable GCC, the existing code already dropped path
|
||||
names that contained "c89" or "c90", because these compilers don't have
|
||||
the GCC extensions enabled. This patch also drops names with "c99" in
|
||||
them.
|
||||
---
|
||||
CONFIG/src/atlconf_misc.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c
|
||||
index 63cb1ef..fb62214 100644
|
||||
--- a/CONFIG/src/atlconf_misc.c
|
||||
+++ b/CONFIG/src/atlconf_misc.c
|
||||
@@ -824,7 +824,8 @@ int CompIsGcc(char *comp)
|
||||
int i;
|
||||
|
||||
cmpname = NameWithoutPath(comp);
|
||||
- if (strstr(cmpname, "c89") || strstr(cmpname, "c90"))
|
||||
+ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") ||
|
||||
+ strstr(cmpname, "c99"))
|
||||
{
|
||||
free(cmpname);
|
||||
return(0);
|
||||
--
|
||||
2.23.0
|
||||
|
38
SOURCES/0002-Fix-rpath-link-command-line-options.patch
Normal file
38
SOURCES/0002-Fix-rpath-link-command-line-options.patch
Normal file
@ -0,0 +1,38 @@
|
||||
From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Tue, 19 Feb 2019 19:20:19 +0100
|
||||
Subject: [PATCH 2/8] Fix -rpath-link command line options
|
||||
|
||||
The "-rpath-link" command line options were written in the wrong syntax,
|
||||
causing errors in the build. This is fixed.
|
||||
---
|
||||
makes/Make.lib | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/makes/Make.lib b/makes/Make.lib
|
||||
index 4ceff02..b322a32 100644
|
||||
--- a/makes/Make.lib
|
||||
+++ b/makes/Make.lib
|
||||
@@ -47,11 +47,11 @@ cshared : fat_cshared
|
||||
#
|
||||
LDTRY_WIN:
|
||||
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
|
||||
- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \
|
||||
+ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \
|
||||
--whole-archive $(libas) --no-whole-archive $(LIBS)
|
||||
GCCTRY_WIN:
|
||||
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
|
||||
- -Wl,"-rpath-link $(LIBINSTdir)" \
|
||||
+ -Wl,"-rpath-link=$(LIBINSTdir)" \
|
||||
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
|
||||
GCCTRY_norp_WIN:
|
||||
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
|
||||
@@ -113,7 +113,7 @@ TRYALL_WIN :
|
||||
#
|
||||
LDTRY:
|
||||
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
|
||||
- -rpath-link $(LIBINSTdir) \
|
||||
+ -rpath-link=$(LIBINSTdir) \
|
||||
--whole-archive $(libas) --no-whole-archive $(LIBS)
|
||||
GCCTRY:
|
||||
$(GOODGCC) -shared -o $(outso).$(so_ver) \
|
55
SOURCES/0003-Fix-SIMD-support-on-IBM-z13.patch
Normal file
55
SOURCES/0003-Fix-SIMD-support-on-IBM-z13.patch
Normal file
@ -0,0 +1,55 @@
|
||||
From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 5 Dec 2018 18:59:15 +0100
|
||||
Subject: [PATCH 3/8] Fix SIMD support on IBM z13
|
||||
|
||||
The header file atlas_simd.h contained a syntax error and a few functional
|
||||
errors that affected IBM z13. It prevented any SIMD kernels from being
|
||||
compiled successfully for that platform. This is fixed. The macro
|
||||
vec_madd is avoided, because some GCC versions don't implement it
|
||||
correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used
|
||||
instead.
|
||||
---
|
||||
include/atlas_simd.h | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
|
||||
index baee6b1..68daf79 100644
|
||||
--- a/include/atlas_simd.h
|
||||
+++ b/include/atlas_simd.h
|
||||
@@ -69,7 +69,7 @@
|
||||
#define ATL_FRCGNUVEC
|
||||
#endif
|
||||
#elif defined(ATL_VXZ)
|
||||
- #if ATL_VLEN != 2;
|
||||
+ #if ATL_VLEN != 2
|
||||
#define ATL_FRCGNUVEC
|
||||
#endif
|
||||
#elif defined(ATL_NEON)
|
||||
@@ -390,19 +390,19 @@
|
||||
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
|
||||
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
|
||||
#endif
|
||||
- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
|
||||
+ #define ATL_vzero(v_) v_ = vec_splats((double)0.0)
|
||||
#define ATL_vcopy(d_, s_) d_ = s_
|
||||
- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
|
||||
+ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_)))
|
||||
#define ATL_vuld(v_, p_) ATL_vld(v_, p_)
|
||||
#define ATL_vust(p_, v_) ATL_vst(p_, v_)
|
||||
#define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
|
||||
#define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
|
||||
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
|
||||
- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_)
|
||||
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
|
||||
#define ATL_vvrsum1(s0_) \
|
||||
{ ATL_VTYPE t_;\
|
||||
t_ = vec_splat(s0_, 1); \
|
||||
- s0 += t_; \
|
||||
+ s0_ += t_; \
|
||||
}
|
||||
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
--
|
||||
2.23.0
|
||||
|
@ -0,0 +1,46 @@
|
||||
From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 12 Dec 2018 19:44:32 +0100
|
||||
Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible
|
||||
|
||||
The probing of the L1 data cache size is sometimes not reliable. This can
|
||||
cause the tuning to yield varying, sub-obtimal results. But on Linux the
|
||||
L1 data cache size can usually be retrieved with sysconf instead, which is
|
||||
faster and more reliable. Do this whenever possible.
|
||||
---
|
||||
tune/sysinfo/L1CacheSize.c | 12 +++++++++++-
|
||||
1 file changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c
|
||||
index e62a273..dffa76e 100644
|
||||
--- a/tune/sysinfo/L1CacheSize.c
|
||||
+++ b/tune/sysinfo/L1CacheSize.c
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
|
||||
#define REPS 4096
|
||||
|
||||
@@ -276,7 +277,16 @@ int main(int nargs, char *args[])
|
||||
exit(-1);
|
||||
}
|
||||
if (nargs > 1) MaxSize = atoi(args[1]);
|
||||
- L1Size = GetL1Size(MaxSize, 1.08);
|
||||
+
|
||||
+#ifdef _SC_LEVEL1_DCACHE_SIZE
|
||||
+ {
|
||||
+ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE);
|
||||
+ L1Size = res > 0 ? (int) (res / 1024) : 0;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ if (!L1Size)
|
||||
+ L1Size = GetL1Size(MaxSize, 1.08);
|
||||
if (!L1Size)
|
||||
L1Size = GetL1Size(MaxSize, 1.08);
|
||||
if (!L1Size)
|
||||
--
|
||||
2.23.0
|
||||
|
68
SOURCES/0005-Optimizations-for-IBM-z13.patch
Normal file
68
SOURCES/0005-Optimizations-for-IBM-z13.patch
Normal file
@ -0,0 +1,68 @@
|
||||
From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 12 Dec 2018 20:06:27 +0100
|
||||
Subject: [PATCH 5/8] Optimizations for IBM z13
|
||||
|
||||
Perform some optimizations for IBM z13:
|
||||
- Compile with -O2 instead of -O.
|
||||
- Streamline vector loads/stores.
|
||||
- Define the vvrsum2 macro.
|
||||
|
||||
Also, use the compile option -march=z13 instead of -march=native.
|
||||
---
|
||||
CONFIG/src/atlcomp.txt | 8 +++-----
|
||||
include/atlas_simd.h | 11 +++++------
|
||||
2 files changed, 8 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
|
||||
index aa31604..2ac71cf 100644
|
||||
--- a/CONFIG/src/atlcomp.txt
|
||||
+++ b/CONFIG/src/atlcomp.txt
|
||||
@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77
|
||||
'gfortran' '-O3 -funroll-loops'
|
||||
MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
'gcc' '-O3 -funroll-loops'
|
||||
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc
|
||||
- 'gcc' '-march=native -O -mvx -mzvector'
|
||||
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc
|
||||
- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2'
|
||||
+MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
+ 'gcc' '-march=z13 -mtune=z13 -O2'
|
||||
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
|
||||
- 'gfortran' '-march=native -O -mvx -mzvector'
|
||||
+ 'gfortran' '-march=z13 -mtune=z13 -O2'
|
||||
#
|
||||
# Windows defaults ; need to make SSE/SSE2 arch dep.
|
||||
#
|
||||
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
|
||||
index 68daf79..f171933 100644
|
||||
--- a/include/atlas_simd.h
|
||||
+++ b/include/atlas_simd.h
|
||||
@@ -384,8 +384,8 @@
|
||||
#endif
|
||||
#define ATL_VTYPE vector double
|
||||
#if (defined(DREAL) || defined(DCPLX))
|
||||
- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; }
|
||||
- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];}
|
||||
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
|
||||
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
|
||||
#else
|
||||
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
|
||||
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
|
||||
@@ -400,10 +400,9 @@
|
||||
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
|
||||
#define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
|
||||
#define ATL_vvrsum1(s0_) \
|
||||
- { ATL_VTYPE t_;\
|
||||
- t_ = vec_splat(s0_, 1); \
|
||||
- s0_ += t_; \
|
||||
- }
|
||||
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
|
||||
+ #define ATL_vvrsum2(s0_, s1_) \
|
||||
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
|
||||
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX))
|
||||
--
|
||||
2.23.0
|
||||
|
276
SOURCES/0006-Add-IBM-z14-support.patch
Normal file
276
SOURCES/0006-Add-IBM-z14-support.patch
Normal file
@ -0,0 +1,276 @@
|
||||
From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 25 Mar 2020 20:11:19 +0100
|
||||
Subject: [PATCH 6/8] Add IBM z14 support
|
||||
|
||||
Add general support for IBM z14. Also detect and handle the vector
|
||||
enhancements facility 1, which specifically adds single-precision FP
|
||||
arithmetic for vectors.
|
||||
---
|
||||
CONFIG/include/atlconf.h | 14 ++++----
|
||||
CONFIG/src/Makefile | 6 ++++
|
||||
CONFIG/src/atlcomp.txt | 4 +++
|
||||
CONFIG/src/backend/Make.ext | 4 ++-
|
||||
CONFIG/src/backend/archinfo_linux.c | 3 +-
|
||||
CONFIG/src/backend/probe_vxz2.c | 12 +++++++
|
||||
CONFIG/src/probe_comp.c | 3 +-
|
||||
include/atlas_prefetch.h | 3 +-
|
||||
include/atlas_simd.h | 53 +++++++++++++++++++++++++++++
|
||||
9 files changed, 91 insertions(+), 11 deletions(-)
|
||||
create mode 100644 CONFIG/src/backend/probe_vxz2.c
|
||||
|
||||
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
|
||||
index e51d56d..3828fdb 100644
|
||||
--- a/CONFIG/include/atlconf.h
|
||||
+++ b/CONFIG/include/atlconf.h
|
||||
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||
* Corei3EP: v3 Haswell, E5-26XX
|
||||
* Corei4: skylake
|
||||
*/
|
||||
-#define NMACH 62
|
||||
+#define NMACH 63
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
|
||||
"POWER6", "POWER7", "POWER8", "POWERe6500",
|
||||
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13",
|
||||
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
|
||||
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
|
||||
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
|
||||
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
|
||||
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
|
||||
"ARM64xgene1", "ARM64a53", "ARM64a57"};
|
||||
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
|
||||
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */
|
||||
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
|
||||
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
|
||||
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
|
||||
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
#define MachIsARM64(mach_) \
|
||||
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
|
||||
#define MachIsS390(mach_) \
|
||||
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 )
|
||||
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
|
||||
|
||||
|
||||
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
|
||||
@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase};
|
||||
enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort};
|
||||
enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr};
|
||||
|
||||
-#define NISA 15
|
||||
+#define NISA 16
|
||||
static char *ISAXNAM[NISA] =
|
||||
- {"", "VSX", "VXZ", "AltiVec",
|
||||
+ {"", "VSX", "VXZ2", "VXZ", "AltiVec",
|
||||
"AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow",
|
||||
"FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"};
|
||||
enum ISAEXT
|
||||
- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV,
|
||||
+ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV,
|
||||
ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow,
|
||||
ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC};
|
||||
|
||||
diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile
|
||||
index 212b9d7..782a4cf 100644
|
||||
--- a/CONFIG/src/Makefile
|
||||
+++ b/CONFIG/src/Makefile
|
||||
@@ -158,6 +158,12 @@ IRun_NEON :
|
||||
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \
|
||||
redir=config0.out
|
||||
- cat config0.out
|
||||
+IRun_VXZ2 :
|
||||
+ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \
|
||||
+ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c
|
||||
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \
|
||||
+ redir=config0.out
|
||||
+ - cat config0.out
|
||||
IRun_VXZ :
|
||||
$(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \
|
||||
$(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c
|
||||
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
|
||||
index 2ac71cf..2cfacc2 100644
|
||||
--- a/CONFIG/src/atlcomp.txt
|
||||
+++ b/CONFIG/src/atlcomp.txt
|
||||
@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
'gcc' '-march=z13 -mtune=z13 -O2'
|
||||
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
|
||||
'gfortran' '-march=z13 -mtune=z13 -O2'
|
||||
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
+ 'gcc' '-march=z14 -mtune=z14 -O2'
|
||||
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
|
||||
+ 'gfortran' '-march=z14 -mtune=z14 -O2'
|
||||
#
|
||||
# Windows defaults ; need to make SSE/SSE2 arch dep.
|
||||
#
|
||||
diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext
|
||||
index 4743353..794babf 100644
|
||||
--- a/CONFIG/src/backend/Make.ext
|
||||
+++ b/CONFIG/src/backend/Make.ext
|
||||
@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \
|
||||
probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \
|
||||
probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \
|
||||
probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \
|
||||
- probe_vxz.c
|
||||
+ probe_vxz2.c probe_vxz.c
|
||||
|
||||
all : $(files)
|
||||
|
||||
@@ -107,6 +107,8 @@ flibchkF.f : $(basf)
|
||||
$(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f
|
||||
probe_arm32_FPABI.c : $(basf)
|
||||
$(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI
|
||||
+probe_vxz2.c : $(basf)
|
||||
+ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2
|
||||
probe_vxz.c : $(basf)
|
||||
$(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz
|
||||
probe_aff_SETAFFNP.c : $(basf)
|
||||
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
|
||||
index cdcee92..ed6f476 100644
|
||||
--- a/CONFIG/src/backend/archinfo_linux.c
|
||||
+++ b/CONFIG/src/backend/archinfo_linux.c
|
||||
@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch()
|
||||
else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196;
|
||||
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
|
||||
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
|
||||
- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */
|
||||
+ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
|
||||
+ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
|
||||
free(res);
|
||||
}
|
||||
break;
|
||||
diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c
|
||||
new file mode 100644
|
||||
index 0000000..a69d92d
|
||||
--- /dev/null
|
||||
+++ b/CONFIG/src/backend/probe_vxz2.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+#include <vecintrin.h>
|
||||
+void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y
|
||||
+{
|
||||
+ vector float vx, vy;
|
||||
+ vx = (vector float) {x[0], x[1], x[2], x[3]};
|
||||
+ vy = (vector float) {y[0], y[1], y[2], y[3]};
|
||||
+ vy += vx;
|
||||
+ z[0] = vy[0];
|
||||
+ z[1] = vy[1];
|
||||
+ z[2] = vy[2];
|
||||
+ z[3] = vy[3];
|
||||
+}
|
||||
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
|
||||
index 1652e24..857ea82 100644
|
||||
--- a/CONFIG/src/probe_comp.c
|
||||
+++ b/CONFIG/src/probe_comp.c
|
||||
@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb,
|
||||
vp = "-mavx2 -mfma";
|
||||
else if (vecexts & (1<<ISA_VSX))
|
||||
vp = "-mvsx";
|
||||
- else if (vecexts & (1<<ISA_VXZ))
|
||||
+ else if ((vecexts & (1<<ISA_VXZ)) || (vecexts & (1<<ISA_VXZ2)))
|
||||
vp = "-mvx -mzvector";
|
||||
else if (vecexts & (1<<ISA_AV))
|
||||
vp = "-maltivec";
|
||||
@@ -1207,6 +1207,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
|
||||
{
|
||||
case IbmZ12:
|
||||
case IbmZ13:
|
||||
+ case IbmZ14:
|
||||
case IntCorei3:
|
||||
case IntCorei4:
|
||||
case IntCorei2:
|
||||
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
|
||||
index e7988a7..fa426ac 100644
|
||||
--- a/include/atlas_prefetch.h
|
||||
+++ b/include/atlas_prefetch.h
|
||||
@@ -155,7 +155,8 @@
|
||||
#define ATL_L1LS 32
|
||||
#define ATL_L2LS 64
|
||||
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
|
||||
- defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13)
|
||||
+ defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
|
||||
+ defined(ATL_ARCH_IbmZ14)
|
||||
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
|
||||
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
|
||||
#define ATL_GOT_L1PREFETCH
|
||||
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
|
||||
index f171933..eb75577 100644
|
||||
--- a/include/atlas_simd.h
|
||||
+++ b/include/atlas_simd.h
|
||||
@@ -68,6 +68,11 @@
|
||||
((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
|
||||
#define ATL_FRCGNUVEC
|
||||
#endif
|
||||
+ #elif defined(ATL_VXZ2)
|
||||
+ #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \
|
||||
+ ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
|
||||
+ #define ATL_FRCGNUVEC
|
||||
+ #endif
|
||||
#elif defined(ATL_VXZ)
|
||||
#if ATL_VLEN != 2
|
||||
#define ATL_FRCGNUVEC
|
||||
@@ -113,6 +118,12 @@
|
||||
#else
|
||||
#define ATL_VLEN 2
|
||||
#endif
|
||||
+ #elif defined(ATL_VXZ2)
|
||||
+ #if defined(SREAL) || defined(SCPLX)
|
||||
+ #define ATL_VLEN 4
|
||||
+ #else
|
||||
+ #define ATL_VLEN 2
|
||||
+ #endif
|
||||
#elif defined(ATL_VXZ)
|
||||
#define ATL_VLEN 2
|
||||
#elif defined(ATL_NEON)
|
||||
@@ -376,6 +387,48 @@
|
||||
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
#endif
|
||||
+#elif defined(ATL_VXZ2)
|
||||
+ #include <vecintrin.h>
|
||||
+
|
||||
+ #define ATL_VPERMI(s_, t_, i_) \
|
||||
+ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_))
|
||||
+
|
||||
+ #if defined(SREAL) || defined(SCPLX)
|
||||
+ #define ATL_VTYPE vector float
|
||||
+ #if ATL_VLEN != 4
|
||||
+ #error "VSXZ2 supports only VLEN = 4 for floats!"
|
||||
+ #endif
|
||||
+ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \
|
||||
+ { ATL_VTYPE t0_, t1_; \
|
||||
+ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \
|
||||
+ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \
|
||||
+ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \
|
||||
+ }
|
||||
+ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2)
|
||||
+ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3)
|
||||
+ #else /* double precision */
|
||||
+ #define ATL_VTYPE vector double
|
||||
+ #if ATL_VLEN != 2
|
||||
+ #error "VSXZ2 supports only VLEN = 2 for doubles!"
|
||||
+ #endif
|
||||
+ #define ATL_vvrsum1(s0_) \
|
||||
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
|
||||
+ #define ATL_vvrsum2(s0_, s1_) \
|
||||
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
|
||||
+ #endif
|
||||
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
|
||||
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
|
||||
+ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
|
||||
+ #define ATL_vcopy(d_, s_) d_ = s_
|
||||
+ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
|
||||
+ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_))
|
||||
+ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_))
|
||||
+ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
|
||||
+ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
|
||||
+ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
|
||||
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
|
||||
+ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
+ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
#elif defined(ATL_VXZ)
|
||||
#include <vecintrin.h>
|
||||
|
||||
--
|
||||
2.23.0
|
||||
|
265
SOURCES/0007-Enable-cross-compile.patch
Normal file
265
SOURCES/0007-Enable-cross-compile.patch
Normal file
@ -0,0 +1,265 @@
|
||||
From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 29 May 2019 17:51:34 +0200
|
||||
Subject: [PATCH 7/8] Enable "cross-compile"
|
||||
|
||||
This adds support for building ATLAS without running any target code. In
|
||||
order for this to work, the archdefs must contain some additional files
|
||||
that would otherwise be built during various tuning steps; see the new
|
||||
targets extra_get and extra_put in "CONFIG/ARCHS/Makefile".
|
||||
|
||||
Even if the archdefs contain these additional files, cross compilation
|
||||
is *not* automatically enabled. To activate it and disable tuning at
|
||||
build time, add the option "-Si archdef 2" when running "configure".
|
||||
---
|
||||
CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++
|
||||
bin/atlas_install.c | 2 ++
|
||||
makes/Make.aux | 10 +++++-----
|
||||
makes/Make.bin | 22 ++++++++++++++++++++++
|
||||
makes/Make.l3tune | 6 ++++++
|
||||
makes/Make.sysinfo | 8 +++++++-
|
||||
6 files changed, 66 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile
|
||||
index 321e05c..e61b5a0 100644
|
||||
--- a/CONFIG/ARCHS/Makefile
|
||||
+++ b/CONFIG/ARCHS/Makefile
|
||||
@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt
|
||||
- cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/.
|
||||
rm -f xnegflt
|
||||
archput : sys_put kern_put gemm_put la_put
|
||||
+
|
||||
+ifdef ATL_NOTUNE
|
||||
+
|
||||
+# To avoid tuning, some extra files are needed.
|
||||
+
|
||||
+extra_get :
|
||||
+ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/
|
||||
+
|
||||
+extra_put :
|
||||
+ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/.
|
||||
+
|
||||
+ArchNew : extra_get
|
||||
+
|
||||
+archput : extra_put
|
||||
+
|
||||
+endif
|
||||
diff --git a/bin/atlas_install.c b/bin/atlas_install.c
|
||||
index de3eb3a..3c811e6 100644
|
||||
--- a/bin/atlas_install.c
|
||||
+++ b/bin/atlas_install.c
|
||||
@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA)
|
||||
ATL_Cassert(system("make IBozoL1.grd\n")==0,
|
||||
"USING BOZO L1 DEFAULTS", NULL);
|
||||
}
|
||||
+ if (ARCHDEF >= 2)
|
||||
+ setenv("ATL_NOTUNE", "1", 1);
|
||||
if (ARCHDEF)
|
||||
DefInstall = !system("make IArchDef.grd\n");
|
||||
|
||||
diff --git a/makes/Make.aux b/makes/Make.aux
|
||||
index 1f769c8..c793028 100644
|
||||
--- a/makes/Make.aux
|
||||
+++ b/makes/Make.aux
|
||||
@@ -113,23 +113,23 @@ clean :
|
||||
|
||||
$(ATLFWAIT) :
|
||||
cd $(BINdir) ; $(MAKE) xatlas_waitfile
|
||||
-$(INCAdir)/atlas_type.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_type.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_type.h
|
||||
sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h
|
||||
dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h
|
||||
cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h
|
||||
zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h
|
||||
|
||||
diff --git a/makes/Make.bin b/makes/Make.bin
|
||||
index 1035cb9..acad578 100644
|
||||
--- a/makes/Make.bin
|
||||
+++ b/makes/Make.bin
|
||||
@@ -163,7 +163,9 @@ IRunMADef :
|
||||
cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre)
|
||||
|
||||
IRunMMDef :
|
||||
+ifndef ATL_NOTUNE
|
||||
cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre)
|
||||
+endif
|
||||
cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2
|
||||
cd $(MMTdir) ; $(MAKE) install pre=$(pre)
|
||||
IKillL1 : force_build
|
||||
@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \
|
||||
cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/.
|
||||
|
||||
$(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(R1Tdir) ; $(MAKE) $(pre)r2install
|
||||
+else
|
||||
cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre)
|
||||
+endif
|
||||
$(R1Tdir)/res/$(pre)R1K.sum : force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(R1Tdir) ; $(MAKE) $(pre)r1install
|
||||
+else
|
||||
cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre)
|
||||
+endif
|
||||
INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum
|
||||
cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/.
|
||||
INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \
|
||||
$(R1Tdir)/res/$(pre)R2K.sum
|
||||
cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/.
|
||||
+ifndef ATL_NOTUNE
|
||||
cd $(R1Tdir) ; $(MAKE) $(pre)nxtune
|
||||
+else
|
||||
+ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib
|
||||
+endif
|
||||
|
||||
$(MVTdir)/res/$(pre)MVNK.sum : force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall
|
||||
+else
|
||||
cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre)
|
||||
+endif
|
||||
INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum
|
||||
cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/.
|
||||
$(MVTdir)/res/$(pre)MVTK.sum : force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall
|
||||
+else
|
||||
cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre)
|
||||
+endif
|
||||
INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum
|
||||
cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/.
|
||||
|
||||
diff --git a/makes/Make.l3tune b/makes/Make.l3tune
|
||||
index eaf7d7d..cd7f5f1 100644
|
||||
--- a/makes/Make.l3tune
|
||||
+++ b/makes/Make.l3tune
|
||||
@@ -118,6 +118,7 @@ res/atlas_strsmXover.h :
|
||||
cp $(strsmXover) res/.
|
||||
|
||||
stsmfc :
|
||||
+ifndef ATL_NOTUNE
|
||||
rm -f $(strsmXover)
|
||||
cd $(L3Bdir) ; $(MAKE) slib
|
||||
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \
|
||||
@@ -128,6 +129,7 @@ stsmfc :
|
||||
tran=NoTranspose_ diag=$(diag)
|
||||
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \
|
||||
tran=Transpose_ diag=$(diag)
|
||||
+endif
|
||||
cd $(L3Bdir) ; $(MAKE) slib
|
||||
dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h
|
||||
|
||||
@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h :
|
||||
cp $(dtrsmXover) res/.
|
||||
|
||||
dtsmfc :
|
||||
+ifndef ATL_NOTUNE
|
||||
rm -f $(dtrsmXover)
|
||||
cd $(L3Bdir) ; $(MAKE) dlib
|
||||
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \
|
||||
@@ -148,6 +151,7 @@ dtsmfc :
|
||||
tran=NoTranspose_ diag=$(diag)
|
||||
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \
|
||||
tran=Transpose_ diag=$(diag)
|
||||
+endif
|
||||
cd $(L3Bdir) ; $(MAKE) dlib
|
||||
qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h
|
||||
|
||||
@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h :
|
||||
cp $(qtrsmXover) res/.
|
||||
|
||||
qtsmfc :
|
||||
+ifndef ATL_NOTUNE
|
||||
rm -f $(qtrsmXover)
|
||||
cd $(L3Bdir) ; $(MAKE) qlib
|
||||
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \
|
||||
@@ -168,6 +173,7 @@ qtsmfc :
|
||||
tran=NoTranspose_ diag=$(diag)
|
||||
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \
|
||||
tran=Transpose_ diag=$(diag)
|
||||
+endif
|
||||
cd $(L3Bdir) ; $(MAKE) qlib
|
||||
|
||||
$(pre)tsmfc.o : force_build
|
||||
diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo
|
||||
index 2b7dfdc..8e5dab2 100644
|
||||
--- a/makes/Make.sysinfo
|
||||
+++ b/makes/Make.sysinfo
|
||||
@@ -5,6 +5,7 @@ maxlat=6
|
||||
mflop=200
|
||||
flags=
|
||||
|
||||
+ifndef ATL_NOTUNE
|
||||
sTestFlags : force_build
|
||||
$(MAKE) srbob `cat res/sBEST` pre='s' type=float
|
||||
|
||||
@@ -85,12 +86,14 @@ RunLamch : xemit_lamch
|
||||
cp res/atlas_?lamch.h $(INCAdir)/.
|
||||
RunTyp: xemit_typ
|
||||
$(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h
|
||||
+endif
|
||||
|
||||
xemit_buildinfo : emit_buildinfo.o
|
||||
$(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o
|
||||
xsyssum : GetSysSum.o
|
||||
$(XCC) $(XCCFLAGS) -o $@ GetSysSum.o
|
||||
|
||||
+ifndef ATL_NOTUNE
|
||||
xL1 : time.o L1CacheSize.o
|
||||
$(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o
|
||||
|
||||
@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c
|
||||
$(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c
|
||||
xmasrch : $(mySRCdir)/masrch.c
|
||||
$(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c
|
||||
+endif
|
||||
|
||||
ATL_cputime.c :
|
||||
cp $(mySRCdir)/ATL_cputime.c .
|
||||
@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c
|
||||
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c
|
||||
GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c
|
||||
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c
|
||||
+
|
||||
+ifndef ATL_NOTUNE
|
||||
time.o : $(mySRCdir)/time.c
|
||||
$(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c
|
||||
emit_lamch.o : $(mySRCdir)/emit_lamch.c
|
||||
@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c
|
||||
$(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c
|
||||
tlb.o : $(mySRCdir)/tlb.c
|
||||
$(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c
|
||||
-
|
||||
+endif
|
||||
|
||||
|
||||
force_build :
|
||||
--
|
||||
2.23.0
|
||||
|
105
SOURCES/0008-Add-IBM-z15-support.patch
Normal file
105
SOURCES/0008-Add-IBM-z15-support.patch
Normal file
@ -0,0 +1,105 @@
|
||||
From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Thu, 26 Mar 2020 17:14:49 +0100
|
||||
Subject: [PATCH 8/8] Add IBM z15 support
|
||||
|
||||
Add support for specifying "IBMz15" as target architecture.
|
||||
---
|
||||
CONFIG/include/atlconf.h | 8 ++++----
|
||||
CONFIG/src/atlcomp.txt | 4 ++++
|
||||
CONFIG/src/backend/archinfo_linux.c | 1 +
|
||||
CONFIG/src/probe_comp.c | 1 +
|
||||
include/atlas_prefetch.h | 2 +-
|
||||
5 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
|
||||
index 3828fdb..382601f 100644
|
||||
--- a/CONFIG/include/atlconf.h
|
||||
+++ b/CONFIG/include/atlconf.h
|
||||
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||
* Corei3EP: v3 Haswell, E5-26XX
|
||||
* Corei4: skylake
|
||||
*/
|
||||
-#define NMACH 63
|
||||
+#define NMACH 64
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
|
||||
"POWER6", "POWER7", "POWER8", "POWERe6500",
|
||||
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
|
||||
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15",
|
||||
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
|
||||
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
|
||||
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
|
||||
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
|
||||
"ARM64xgene1", "ARM64a53", "ARM64a57"};
|
||||
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
|
||||
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
|
||||
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15,
|
||||
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
|
||||
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
|
||||
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
#define MachIsARM64(mach_) \
|
||||
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
|
||||
#define MachIsS390(mach_) \
|
||||
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
|
||||
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 )
|
||||
|
||||
|
||||
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
|
||||
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
|
||||
index 2cfacc2..acb2c83 100644
|
||||
--- a/CONFIG/src/atlcomp.txt
|
||||
+++ b/CONFIG/src/atlcomp.txt
|
||||
@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
'gcc' '-march=z14 -mtune=z14 -O2'
|
||||
MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
|
||||
'gfortran' '-march=z14 -mtune=z14 -O2'
|
||||
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
+ 'gcc' '-march=arch13 -mtune=arch13 -O2'
|
||||
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77
|
||||
+ 'gfortran' '-march=arch13 -mtune=arch13 -O2'
|
||||
#
|
||||
# Windows defaults ; need to make SSE/SSE2 arch dep.
|
||||
#
|
||||
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
|
||||
index ed6f476..934a005 100644
|
||||
--- a/CONFIG/src/backend/archinfo_linux.c
|
||||
+++ b/CONFIG/src/backend/archinfo_linux.c
|
||||
@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch()
|
||||
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
|
||||
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
|
||||
else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
|
||||
+ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15;
|
||||
else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
|
||||
free(res);
|
||||
}
|
||||
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
|
||||
index 857ea82..88bb25e 100644
|
||||
--- a/CONFIG/src/probe_comp.c
|
||||
+++ b/CONFIG/src/probe_comp.c
|
||||
@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
|
||||
case IbmZ12:
|
||||
case IbmZ13:
|
||||
case IbmZ14:
|
||||
+ case IbmZ15:
|
||||
case IntCorei3:
|
||||
case IntCorei4:
|
||||
case IntCorei2:
|
||||
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
|
||||
index fa426ac..583f19d 100644
|
||||
--- a/include/atlas_prefetch.h
|
||||
+++ b/include/atlas_prefetch.h
|
||||
@@ -156,7 +156,7 @@
|
||||
#define ATL_L2LS 64
|
||||
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
|
||||
defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
|
||||
- defined(ATL_ARCH_IbmZ14)
|
||||
+ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15)
|
||||
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
|
||||
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
|
||||
#define ATL_GOT_L1PREFETCH
|
||||
--
|
||||
2.23.0
|
||||
|
47
SOURCES/README.dist
Normal file
47
SOURCES/README.dist
Normal file
@ -0,0 +1,47 @@
|
||||
Notes on the packaged version of ATLAS
|
||||
|
||||
by Quentin Spencer
|
||||
updated: October 4, 2005
|
||||
|
||||
updated by Deji Akingunola
|
||||
October 15, 2008
|
||||
|
||||
updated by Deji Akingunola
|
||||
June 15, 2011
|
||||
|
||||
updated by Frantisek Kluknavsky
|
||||
Nov 20, 2012
|
||||
|
||||
Because ATLAS relies on compile-time optimizations to obtain improved
|
||||
performance over BLAS and LAPACK, the resulting binaries are closely
|
||||
tied to the hardware on which they are compiled, and can likely result
|
||||
in very poor performance on other hardware. For this reason,
|
||||
including a package like ATLAS in Fedora requires some compromises.
|
||||
Optimizing ATLAS for the most modern hardware can result in
|
||||
significant performance penalties for users using the same package on
|
||||
older hardware. A binary ATLAS package must perform reasonably well on the
|
||||
entire range of hardware on which it could potentially be installed.
|
||||
|
||||
The result is a set of libraries that will not
|
||||
necessarily achieve optimal performance on any given hardware but
|
||||
should still offer significant performance gains over the reference
|
||||
BLAS and LAPACK libraries on most hardware.
|
||||
|
||||
In addition to the base 32bit build, subpackages are built for SSE, SSE2,
|
||||
and SSE3 ix86 extensions.
|
||||
|
||||
On 64bit x86 systems the default atlas package was built with SSE3
|
||||
optimization.
|
||||
|
||||
This packaging allows multiple installation of different atlas sub-packages
|
||||
at the same time. The alternatives system (read 'man alternatives' for usage)
|
||||
is used in the -devel subpackages to select the appropriate location for the
|
||||
architectural dependent header files.
|
||||
|
||||
For users who want optimal performance on
|
||||
particular hardware, custom RPMs can be built from the source package
|
||||
by setting the RPM macro "enable_native_atlas" to a value of 1. This
|
||||
can be done from the command line as in the following example:
|
||||
|
||||
rpmbuild -D "enable_native_atlas 1" --rebuild atlas-3.8.3-1.src.rpm
|
||||
|
14
SOURCES/atlas-genparse.patch
Normal file
14
SOURCES/atlas-genparse.patch
Normal file
@ -0,0 +1,14 @@
|
||||
diff --git a/include/atlas_genparse.h b/include/atlas_genparse.h
|
||||
index 909a38e..1e6d153 100644
|
||||
--- a/include/atlas_genparse.h
|
||||
+++ b/include/atlas_genparse.h
|
||||
@@ -163,7 +163,8 @@ static int GetDoubleArr(char *str, int N, double *d)
|
||||
if (!str)
|
||||
break;
|
||||
str++;
|
||||
- assert(sscanf(str, "%le", d+i) == 1);
|
||||
+ if (sscanf(str, "%le", d+i) != 1)
|
||||
+ break;
|
||||
i++;
|
||||
}
|
||||
return(i);
|
12
SOURCES/atlas-getri.patch
Normal file
12
SOURCES/atlas-getri.patch
Normal file
@ -0,0 +1,12 @@
|
||||
diff --git a/src/testing/ATL_f77getri.c b/src/testing/ATL_f77getri.c
|
||||
index 2cc576c..7ff8eba 100644
|
||||
--- a/src/testing/ATL_f77getri.c
|
||||
+++ b/src/testing/ATL_f77getri.c
|
||||
@@ -97,7 +97,6 @@ int f77getri(const enum ATLAS_ORDER Order, const int N, TYPE *A, const int lda,
|
||||
#ifdef ATL_FunkyInts
|
||||
*lwork = F77lwork;
|
||||
for (i=0; i < MN; i++) ipiv[i] = F77ipiv[i] + 1;
|
||||
- free(F77ipiv);
|
||||
#else
|
||||
for (i=0; i < MN; i++) ipiv[i]++;
|
||||
#endif
|
16
SOURCES/atlas-melf.patch
Normal file
16
SOURCES/atlas-melf.patch
Normal file
@ -0,0 +1,16 @@
|
||||
diff --git a/CONFIG/src/SpewMakeInc.c b/CONFIG/src/SpewMakeInc.c
|
||||
index eed259e..65d68a1 100644
|
||||
--- a/CONFIG/src/SpewMakeInc.c
|
||||
+++ b/CONFIG/src/SpewMakeInc.c
|
||||
@@ -764,9 +764,9 @@ int main(int nargs, char **args)
|
||||
else
|
||||
{
|
||||
if (ptrbits == 32)
|
||||
- fprintf(fpout, " -melf_i386");
|
||||
+ fprintf(fpout, " -Wl,-melf_i386");
|
||||
else if (ptrbits == 64)
|
||||
- fprintf(fpout, " -melf_x86_64");
|
||||
+ fprintf(fpout, " -Wl,-melf_x86_64");
|
||||
if (OS == OSFreeBSD)
|
||||
fprintf(fpout, "_fbsd");
|
||||
}
|
40
SOURCES/atlas-shared_libraries.patch
Normal file
40
SOURCES/atlas-shared_libraries.patch
Normal file
@ -0,0 +1,40 @@
|
||||
From 3119c671c566761a79ac98405cb619892acde3e8 Mon Sep 17 00:00:00 2001
|
||||
From: Lukas Slebodnik <lslebodn@redhat.com>
|
||||
Date: Fri, 20 Sep 2013 09:26:58 +0200
|
||||
Subject: [PATCH] atlas-shared_libraries
|
||||
|
||||
---
|
||||
ATLAS/makes/Make.lib | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ATLAS/makes/Make.lib b/ATLAS/makes/Make.lib
|
||||
index ab1eb9963d36678972a0a410905169aaa563dc64..27c6e316b442e09b0f46afac7940aaa11e25e45c 100644
|
||||
--- a/ATLAS/makes/Make.lib
|
||||
+++ b/ATLAS/makes/Make.lib
|
||||
@@ -4,6 +4,8 @@ mySRCdir = $(SRCdir)/lib
|
||||
#
|
||||
# override with libatlas.so only when atlas is built to one lib
|
||||
#
|
||||
+so_ver_major=3
|
||||
+so_ver = $(so_ver_major).10
|
||||
DYNlibs = liblapack.so libf77blas.so libcblas.so libatlas.so
|
||||
PTDYNlibs = liblapack.so libptf77blas.so libptcblas.so libatlas.so
|
||||
CDYNlibs = liblapack.so libcblas.so libatlas.so
|
||||
@@ -116,9 +116,12 @@ LDTRY:
|
||||
-rpath-link $(LIBINSTdir) \
|
||||
--whole-archive $(libas) --no-whole-archive $(LIBS)
|
||||
GCCTRY:
|
||||
- $(GOODGCC) -shared -o $(outso) \
|
||||
- -Wl,"-rpath-link $(LIBINSTdir)" \
|
||||
+ $(GOODGCC) -shared -o $(outso).$(so_ver) \
|
||||
+ \
|
||||
+ -Wl,-soname,"$(outso).$(so_ver_major)" \
|
||||
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
|
||||
+ ln -s $(outso).$(so_ver) $(outso).$(so_ver_major)
|
||||
+ ln -s $(outso).$(so_ver) $(outso)
|
||||
GCCTRY_norp:
|
||||
$(GOODGCC) -shared -o $(outso) \
|
||||
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
|
||||
--
|
||||
1.8.3.1
|
||||
|
12
SOURCES/atlas-throttling.patch
Normal file
12
SOURCES/atlas-throttling.patch
Normal file
@ -0,0 +1,12 @@
|
||||
diff -up ATLAS/CONFIG/src/config.c.zaloha ATLAS/CONFIG/src/config.c
|
||||
--- ATLAS/CONFIG/src/config.c.zaloha 2012-10-25 11:29:02.495425989 +0200
|
||||
+++ ATLAS/CONFIG/src/config.c 2012-10-25 11:42:10.218216957 +0200
|
||||
@@ -711,6 +711,8 @@ int ProbePtrbits(int verb, char *targarg
|
||||
|
||||
int ProbeCPUThrottle(int verb, char *targarg, enum OSTYPE OS, enum ASMDIA asmb)
|
||||
{
|
||||
+ return 0; /* impossible to turn off cpu throttling => ignore */
|
||||
+ /* this undermines performance of compiled library */
|
||||
int i, iret;
|
||||
char *ln;
|
||||
i = strlen(targarg) + 22 + 12;
|
17
SOURCES/atlas.3.10.1-unbundle.patch
Normal file
17
SOURCES/atlas.3.10.1-unbundle.patch
Normal file
@ -0,0 +1,17 @@
|
||||
diff -up wrk/makes/Make.lib.wrk wrk/makes/Make.lib
|
||||
--- wrk/makes/Make.lib.wrk 2015-01-23 21:14:46.465494411 +0100
|
||||
+++ wrk/makes/Make.lib 2015-01-23 22:48:39.632479588 +0100
|
||||
@@ -185,11 +185,11 @@ TRYALL :
|
||||
#
|
||||
fat_ptshared : # threaded target
|
||||
$(MAKE) TRYALL outso=libtatlas.so \
|
||||
- libas="libptlapack.a libptf77blas.a libptcblas.a libatlas.a" \
|
||||
+ libas="libptlapack.a libptf77blas.a libptcblas.a libatlas.a $(SLAPACKlib)" \
|
||||
LIBINSTdir="$(LIBINSTdir)"
|
||||
fat_shared : # serial target
|
||||
$(MAKE) TRYALL outso=libsatlas.so \
|
||||
- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \
|
||||
+ libas="liblapack.a libf77blas.a libcblas.a libatlas.a $(SLAPACKlib)" \
|
||||
LIBINSTdir="$(LIBINSTdir)"
|
||||
#
|
||||
# Builds shared lib, not include fortran codes from LAPACK
|
1106
SPECS/atlas.spec
Normal file
1106
SPECS/atlas.spec
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user