RHEL 9.0.0 Alpha bootstrap

The content of this branch was automatically imported from Fedora ELN
with the following as its source:
https://src.fedoraproject.org/rpms/atlas#bb257311175483869e229d5333830461708a4e0c
This commit is contained in:
Petr Šabata 2020-10-14 22:02:37 +02:00
parent 7aa81a9112
commit 12ed595d08
19 changed files with 2257 additions and 0 deletions

15
.gitignore vendored
View File

@ -0,0 +1,15 @@
atlas3.8.3.tar.bz2
PPRO32.tgz
K7323DNow.tgz
/atlas3.10.0.tar.bz2
/atlas3.10.1.tar.bz2
/IBMz932.tar.bz2
/IBMz964.tar.bz2
/POWER332.tar.bz2
/ARMv732NEON.tar.bz2
/lapack-3.5.0.tgz
/atlas3.10.2.tar.bz2
/POWER864LEVSXp4.tar.bz2
/IBMz1364VXZ.tar.bz2
/IBMz1464VXZ2.tar.bz2
/IBMz1564VXZ2.tar.bz2

View File

@ -0,0 +1,30 @@
From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:03:52 +0100
Subject: [PATCH 1/8] Avoid c99 standard compiler
When probing for a usable GCC, the existing code already dropped path
names that contained "c89" or "c90", because these compilers don't have
the GCC extensions enabled. This patch also drops names with "c99" in
them.
---
CONFIG/src/atlconf_misc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c
index 63cb1ef..fb62214 100644
--- a/CONFIG/src/atlconf_misc.c
+++ b/CONFIG/src/atlconf_misc.c
@@ -824,7 +824,8 @@ int CompIsGcc(char *comp)
int i;
cmpname = NameWithoutPath(comp);
- if (strstr(cmpname, "c89") || strstr(cmpname, "c90"))
+ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") ||
+ strstr(cmpname, "c99"))
{
free(cmpname);
return(0);
--
2.23.0

View File

@ -0,0 +1,38 @@
From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:20:19 +0100
Subject: [PATCH 2/8] Fix -rpath-link command line options
The "-rpath-link" command line options were written in the wrong syntax,
causing errors in the build. This is fixed.
---
makes/Make.lib | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/makes/Make.lib b/makes/Make.lib
index 4ceff02..b322a32 100644
--- a/makes/Make.lib
+++ b/makes/Make.lib
@@ -47,11 +47,11 @@ cshared : fat_cshared
#
LDTRY_WIN:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \
+ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
- -Wl,"-rpath-link $(LIBINSTdir)" \
+ -Wl,"-rpath-link=$(LIBINSTdir)" \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
GCCTRY_norp_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
@@ -113,7 +113,7 @@ TRYALL_WIN :
#
LDTRY:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) \
+ -rpath-link=$(LIBINSTdir) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY:
$(GOODGCC) -shared -o $(outso).$(so_ver) \

View File

@ -0,0 +1,55 @@
From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 5 Dec 2018 18:59:15 +0100
Subject: [PATCH 3/8] Fix SIMD support on IBM z13
The header file atlas_simd.h contained a syntax error and a few functional
errors that affected IBM z13. It prevented any SIMD kernels from being
compiled successfully for that platform. This is fixed. The macro
vec_madd is avoided, because some GCC versions don't implement it
correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used
instead.
---
include/atlas_simd.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index baee6b1..68daf79 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -69,7 +69,7 @@
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_VXZ)
- #if ATL_VLEN != 2;
+ #if ATL_VLEN != 2
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_NEON)
@@ -390,19 +390,19 @@
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
#endif
- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vzero(v_) v_ = vec_splats((double)0.0)
#define ATL_vcopy(d_, s_) d_ = s_
- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_)))
#define ATL_vuld(v_, p_) ATL_vld(v_, p_)
#define ATL_vust(p_, v_) ATL_vst(p_, v_)
#define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
#define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_)
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
{ ATL_VTYPE t_;\
t_ = vec_splat(s0_, 1); \
- s0 += t_; \
+ s0_ += t_; \
}
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
--
2.23.0

View File

@ -0,0 +1,46 @@
From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 19:44:32 +0100
Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible
The probing of the L1 data cache size is sometimes not reliable. This can
cause the tuning to yield varying, sub-obtimal results. But on Linux the
L1 data cache size can usually be retrieved with sysconf instead, which is
faster and more reliable. Do this whenever possible.
---
tune/sysinfo/L1CacheSize.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c
index e62a273..dffa76e 100644
--- a/tune/sysinfo/L1CacheSize.c
+++ b/tune/sysinfo/L1CacheSize.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#define REPS 4096
@@ -276,7 +277,16 @@ int main(int nargs, char *args[])
exit(-1);
}
if (nargs > 1) MaxSize = atoi(args[1]);
- L1Size = GetL1Size(MaxSize, 1.08);
+
+#ifdef _SC_LEVEL1_DCACHE_SIZE
+ {
+ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE);
+ L1Size = res > 0 ? (int) (res / 1024) : 0;
+ }
+#endif
+
+ if (!L1Size)
+ L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
--
2.23.0

View File

@ -0,0 +1,68 @@
From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 20:06:27 +0100
Subject: [PATCH 5/8] Optimizations for IBM z13
Perform some optimizations for IBM z13:
- Compile with -O2 instead of -O.
- Streamline vector loads/stores.
- Define the vvrsum2 macro.
Also, use the compile option -march=z13 instead of -march=native.
---
CONFIG/src/atlcomp.txt | 8 +++-----
include/atlas_simd.h | 11 +++++------
2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index aa31604..2ac71cf 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77
'gfortran' '-O3 -funroll-loops'
MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-O3 -funroll-loops'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc
- 'gcc' '-march=native -O -mvx -mzvector'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc
- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2'
+MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
- 'gfortran' '-march=native -O -mvx -mzvector'
+ 'gfortran' '-march=z13 -mtune=z13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index 68daf79..f171933 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -384,8 +384,8 @@
#endif
#define ATL_VTYPE vector double
#if (defined(DREAL) || defined(DCPLX))
- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; }
- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];}
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
#else
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
@@ -400,10 +400,9 @@
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
#define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
- { ATL_VTYPE t_;\
- t_ = vec_splat(s0_, 1); \
- s0_ += t_; \
- }
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX))
--
2.23.0

View File

@ -0,0 +1,276 @@
From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 25 Mar 2020 20:11:19 +0100
Subject: [PATCH 6/8] Add IBM z14 support
Add general support for IBM z14. Also detect and handle the vector
enhancements facility 1, which specifically adds single-precision FP
arithmetic for vectors.
---
CONFIG/include/atlconf.h | 14 ++++----
CONFIG/src/Makefile | 6 ++++
CONFIG/src/atlcomp.txt | 4 +++
CONFIG/src/backend/Make.ext | 4 ++-
CONFIG/src/backend/archinfo_linux.c | 3 +-
CONFIG/src/backend/probe_vxz2.c | 12 +++++++
CONFIG/src/probe_comp.c | 3 +-
include/atlas_prefetch.h | 3 +-
include/atlas_simd.h | 53 +++++++++++++++++++++++++++++
9 files changed, 91 insertions(+), 11 deletions(-)
create mode 100644 CONFIG/src/backend/probe_vxz2.c
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index e51d56d..3828fdb 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 62
+#define NMACH 63
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase};
enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort};
enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr};
-#define NISA 15
+#define NISA 16
static char *ISAXNAM[NISA] =
- {"", "VSX", "VXZ", "AltiVec",
+ {"", "VSX", "VXZ2", "VXZ", "AltiVec",
"AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow",
"FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"};
enum ISAEXT
- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV,
+ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV,
ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow,
ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC};
diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile
index 212b9d7..782a4cf 100644
--- a/CONFIG/src/Makefile
+++ b/CONFIG/src/Makefile
@@ -158,6 +158,12 @@ IRun_NEON :
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \
redir=config0.out
- cat config0.out
+IRun_VXZ2 :
+ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \
+ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \
+ redir=config0.out
+ - cat config0.out
IRun_VXZ :
$(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \
$(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2ac71cf..2cfacc2 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z13 -mtune=z13 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=z14 -mtune=z14 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext
index 4743353..794babf 100644
--- a/CONFIG/src/backend/Make.ext
+++ b/CONFIG/src/backend/Make.ext
@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \
probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \
probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \
probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \
- probe_vxz.c
+ probe_vxz2.c probe_vxz.c
all : $(files)
@@ -107,6 +107,8 @@ flibchkF.f : $(basf)
$(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f
probe_arm32_FPABI.c : $(basf)
$(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI
+probe_vxz2.c : $(basf)
+ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2
probe_vxz.c : $(basf)
$(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz
probe_aff_SETAFFNP.c : $(basf)
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index cdcee92..ed6f476 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196;
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */
+ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
break;
diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c
new file mode 100644
index 0000000..a69d92d
--- /dev/null
+++ b/CONFIG/src/backend/probe_vxz2.c
@@ -0,0 +1,12 @@
+#include <vecintrin.h>
+void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y
+{
+ vector float vx, vy;
+ vx = (vector float) {x[0], x[1], x[2], x[3]};
+ vy = (vector float) {y[0], y[1], y[2], y[3]};
+ vy += vx;
+ z[0] = vy[0];
+ z[1] = vy[1];
+ z[2] = vy[2];
+ z[3] = vy[3];
+}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 1652e24..857ea82 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb,
vp = "-mavx2 -mfma";
else if (vecexts & (1<<ISA_VSX))
vp = "-mvsx";
- else if (vecexts & (1<<ISA_VXZ))
+ else if ((vecexts & (1<<ISA_VXZ)) || (vecexts & (1<<ISA_VXZ2)))
vp = "-mvx -mzvector";
else if (vecexts & (1<<ISA_AV))
vp = "-maltivec";
@@ -1207,6 +1207,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
{
case IbmZ12:
case IbmZ13:
+ case IbmZ14:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index e7988a7..fa426ac 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -155,7 +155,8 @@
#define ATL_L1LS 32
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
- defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13)
+ defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
+ defined(ATL_ARCH_IbmZ14)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index f171933..eb75577 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -68,6 +68,11 @@
((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
#define ATL_FRCGNUVEC
#endif
+ #elif defined(ATL_VXZ2)
+ #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \
+ ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
+ #define ATL_FRCGNUVEC
+ #endif
#elif defined(ATL_VXZ)
#if ATL_VLEN != 2
#define ATL_FRCGNUVEC
@@ -113,6 +118,12 @@
#else
#define ATL_VLEN 2
#endif
+ #elif defined(ATL_VXZ2)
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VLEN 4
+ #else
+ #define ATL_VLEN 2
+ #endif
#elif defined(ATL_VXZ)
#define ATL_VLEN 2
#elif defined(ATL_NEON)
@@ -376,6 +387,48 @@
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#endif
+#elif defined(ATL_VXZ2)
+ #include <vecintrin.h>
+
+ #define ATL_VPERMI(s_, t_, i_) \
+ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_))
+
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VTYPE vector float
+ #if ATL_VLEN != 4
+ #error "VSXZ2 supports only VLEN = 4 for floats!"
+ #endif
+ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \
+ { ATL_VTYPE t0_, t1_; \
+ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \
+ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \
+ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \
+ }
+ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2)
+ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3)
+ #else /* double precision */
+ #define ATL_VTYPE vector double
+ #if ATL_VLEN != 2
+ #error "VSXZ2 supports only VLEN = 2 for doubles!"
+ #endif
+ #define ATL_vvrsum1(s0_) \
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
+ #endif
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
+ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vcopy(d_, s_) d_ = s_
+ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_))
+ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_))
+ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
+ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
+ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
+ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
+ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_VXZ)
#include <vecintrin.h>
--
2.23.0

View File

@ -0,0 +1,265 @@
From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 29 May 2019 17:51:34 +0200
Subject: [PATCH 7/8] Enable "cross-compile"
This adds support for building ATLAS without running any target code. In
order for this to work, the archdefs must contain some additional files
that would otherwise be built during various tuning steps; see the new
targets extra_get and extra_put in "CONFIG/ARCHS/Makefile".
Even if the archdefs contain these additional files, cross compilation
is *not* automatically enabled. To activate it and disable tuning at
build time, add the option "-Si archdef 2" when running "configure".
---
CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++
bin/atlas_install.c | 2 ++
makes/Make.aux | 10 +++++-----
makes/Make.bin | 22 ++++++++++++++++++++++
makes/Make.l3tune | 6 ++++++
makes/Make.sysinfo | 8 +++++++-
6 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile
index 321e05c..e61b5a0 100644
--- a/CONFIG/ARCHS/Makefile
+++ b/CONFIG/ARCHS/Makefile
@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt
- cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/.
rm -f xnegflt
archput : sys_put kern_put gemm_put la_put
+
+ifdef ATL_NOTUNE
+
+# To avoid tuning, some extra files are needed.
+
+extra_get :
+ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/
+
+extra_put :
+ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/.
+
+ArchNew : extra_get
+
+archput : extra_put
+
+endif
diff --git a/bin/atlas_install.c b/bin/atlas_install.c
index de3eb3a..3c811e6 100644
--- a/bin/atlas_install.c
+++ b/bin/atlas_install.c
@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA)
ATL_Cassert(system("make IBozoL1.grd\n")==0,
"USING BOZO L1 DEFAULTS", NULL);
}
+ if (ARCHDEF >= 2)
+ setenv("ATL_NOTUNE", "1", 1);
if (ARCHDEF)
DefInstall = !system("make IArchDef.grd\n");
diff --git a/makes/Make.aux b/makes/Make.aux
index 1f769c8..c793028 100644
--- a/makes/Make.aux
+++ b/makes/Make.aux
@@ -113,23 +113,23 @@ clean :
$(ATLFWAIT) :
cd $(BINdir) ; $(MAKE) xatlas_waitfile
-$(INCAdir)/atlas_type.h : $(ATLFWAIT)
+$(INCAdir)/atlas_type.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h
$(ATLFWAIT) -f $(INCAdir)/atlas_type.h
sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h
dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h
cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h
zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h
diff --git a/makes/Make.bin b/makes/Make.bin
index 1035cb9..acad578 100644
--- a/makes/Make.bin
+++ b/makes/Make.bin
@@ -163,7 +163,9 @@ IRunMADef :
cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre)
IRunMMDef :
+ifndef ATL_NOTUNE
cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre)
+endif
cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2
cd $(MMTdir) ; $(MAKE) install pre=$(pre)
IKillL1 : force_build
@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \
cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/.
$(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r2install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre)
+endif
$(R1Tdir)/res/$(pre)R1K.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r1install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum
cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/.
INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \
$(R1Tdir)/res/$(pre)R2K.sum
cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/.
+ifndef ATL_NOTUNE
cd $(R1Tdir) ; $(MAKE) $(pre)nxtune
+else
+ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib
+endif
$(MVTdir)/res/$(pre)MVNK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum
cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/.
$(MVTdir)/res/$(pre)MVTK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum
cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/.
diff --git a/makes/Make.l3tune b/makes/Make.l3tune
index eaf7d7d..cd7f5f1 100644
--- a/makes/Make.l3tune
+++ b/makes/Make.l3tune
@@ -118,6 +118,7 @@ res/atlas_strsmXover.h :
cp $(strsmXover) res/.
stsmfc :
+ifndef ATL_NOTUNE
rm -f $(strsmXover)
cd $(L3Bdir) ; $(MAKE) slib
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \
@@ -128,6 +129,7 @@ stsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) slib
dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h
@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h :
cp $(dtrsmXover) res/.
dtsmfc :
+ifndef ATL_NOTUNE
rm -f $(dtrsmXover)
cd $(L3Bdir) ; $(MAKE) dlib
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \
@@ -148,6 +151,7 @@ dtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) dlib
qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h
@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h :
cp $(qtrsmXover) res/.
qtsmfc :
+ifndef ATL_NOTUNE
rm -f $(qtrsmXover)
cd $(L3Bdir) ; $(MAKE) qlib
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \
@@ -168,6 +173,7 @@ qtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) qlib
$(pre)tsmfc.o : force_build
diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo
index 2b7dfdc..8e5dab2 100644
--- a/makes/Make.sysinfo
+++ b/makes/Make.sysinfo
@@ -5,6 +5,7 @@ maxlat=6
mflop=200
flags=
+ifndef ATL_NOTUNE
sTestFlags : force_build
$(MAKE) srbob `cat res/sBEST` pre='s' type=float
@@ -85,12 +86,14 @@ RunLamch : xemit_lamch
cp res/atlas_?lamch.h $(INCAdir)/.
RunTyp: xemit_typ
$(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h
+endif
xemit_buildinfo : emit_buildinfo.o
$(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o
xsyssum : GetSysSum.o
$(XCC) $(XCCFLAGS) -o $@ GetSysSum.o
+ifndef ATL_NOTUNE
xL1 : time.o L1CacheSize.o
$(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o
@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c
$(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c
xmasrch : $(mySRCdir)/masrch.c
$(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c
+endif
ATL_cputime.c :
cp $(mySRCdir)/ATL_cputime.c .
@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c
GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c
+
+ifndef ATL_NOTUNE
time.o : $(mySRCdir)/time.c
$(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c
emit_lamch.o : $(mySRCdir)/emit_lamch.c
@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c
tlb.o : $(mySRCdir)/tlb.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c
-
+endif
force_build :
--
2.23.0

View File

@ -0,0 +1,105 @@
From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Thu, 26 Mar 2020 17:14:49 +0100
Subject: [PATCH 8/8] Add IBM z15 support
Add support for specifying "IBMz15" as target architecture.
---
CONFIG/include/atlconf.h | 8 ++++----
CONFIG/src/atlcomp.txt | 4 ++++
CONFIG/src/backend/archinfo_linux.c | 1 +
CONFIG/src/probe_comp.c | 1 +
include/atlas_prefetch.h | 2 +-
5 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index 3828fdb..382601f 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 63
+#define NMACH 64
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15,
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2cfacc2..acb2c83 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z14 -mtune=z14 -O2'
MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=arch13 -mtune=arch13 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=arch13 -mtune=arch13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index ed6f476..934a005 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15;
else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 857ea82..88bb25e 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
case IbmZ12:
case IbmZ13:
case IbmZ14:
+ case IbmZ15:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index fa426ac..583f19d 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -156,7 +156,7 @@
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
- defined(ATL_ARCH_IbmZ14)
+ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
--
2.23.0

47
README.dist Normal file
View File

@ -0,0 +1,47 @@
Notes on the packaged version of ATLAS
by Quentin Spencer
updated: October 4, 2005
updated by Deji Akingunola
October 15, 2008
updated by Deji Akingunola
June 15, 2011
updated by Frantisek Kluknavsky
Nov 20, 2012
Because ATLAS relies on compile-time optimizations to obtain improved
performance over BLAS and LAPACK, the resulting binaries are closely
tied to the hardware on which they are compiled, and can likely result
in very poor performance on other hardware. For this reason,
including a package like ATLAS in Fedora requires some compromises.
Optimizing ATLAS for the most modern hardware can result in
significant performance penalties for users using the same package on
older hardware. A binary ATLAS package must perform reasonably well on the
entire range of hardware on which it could potentially be installed.
The result is a set of libraries that will not
necessarily achieve optimal performance on any given hardware but
should still offer significant performance gains over the reference
BLAS and LAPACK libraries on most hardware.
In addition to the base 32bit build, subpackages are built for SSE, SSE2,
and SSE3 ix86 extensions.
On 64bit x86 systems the default atlas package was built with SSE3
optimization.
This packaging allows multiple installation of different atlas sub-packages
at the same time. The alternatives system (read 'man alternatives' for usage)
is used in the -devel subpackages to select the appropriate location for the
architectural dependent header files.
For users who want optimal performance on
particular hardware, custom RPMs can be built from the source package
by setting the RPM macro "enable_native_atlas" to a value of 1. This
can be done from the command line as in the following example:
rpmbuild -D "enable_native_atlas 1" --rebuild atlas-3.8.3-1.src.rpm

49
atlas-gcc10.patch Normal file
View File

@ -0,0 +1,49 @@
From 9a3e640a517926c47b5655ba0033d4f56df4a66e Mon Sep 17 00:00:00 2001
From: Jakub Martisko <jamartis@redhat.com>
Date: Wed, 22 Jan 2020 14:24:46 +0100
Subject: [PATCH] test
---
interfaces/blas/C/testing/c_dblat1.f | 4 ++--
interfaces/blas/C/testing/c_sblat1.f | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/interfaces/blas/C/testing/c_dblat1.f b/interfaces/blas/C/testing/c_dblat1.f
index 55ea989..7269601 100644
--- a/interfaces/blas/C/testing/c_dblat1.f
+++ b/interfaces/blas/C/testing/c_dblat1.f
@@ -247,11 +247,11 @@
IF (ICASE.EQ.7) THEN
* .. DNRM2TEST ..
STEMP(1) = DTRUE1(NP1)
- CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
+ CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
ELSE IF (ICASE.EQ.8) THEN
* .. DASUMTEST ..
STEMP(1) = DTRUE3(NP1)
- CALL STEST1(DASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
+ CALL STEST1(DASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
ELSE IF (ICASE.EQ.9) THEN
* .. DSCALTEST ..
CALL DSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)
diff --git a/interfaces/blas/C/testing/c_sblat1.f b/interfaces/blas/C/testing/c_sblat1.f
index 1fc6dce..b97ed0b 100644
--- a/interfaces/blas/C/testing/c_sblat1.f
+++ b/interfaces/blas/C/testing/c_sblat1.f
@@ -247,11 +247,11 @@
IF (ICASE.EQ.7) THEN
* .. SNRM2TEST ..
STEMP(1) = DTRUE1(NP1)
- CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
+ CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
ELSE IF (ICASE.EQ.8) THEN
* .. SASUMTEST ..
STEMP(1) = DTRUE3(NP1)
- CALL STEST1(SASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
+ CALL STEST1(SASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
ELSE IF (ICASE.EQ.9) THEN
* .. SSCALTEST ..
CALL SSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)
--
2.24.1

14
atlas-genparse.patch Normal file
View File

@ -0,0 +1,14 @@
diff --git a/include/atlas_genparse.h b/include/atlas_genparse.h
index 909a38e..1e6d153 100644
--- a/include/atlas_genparse.h
+++ b/include/atlas_genparse.h
@@ -163,7 +163,8 @@ static int GetDoubleArr(char *str, int N, double *d)
if (!str)
break;
str++;
- assert(sscanf(str, "%le", d+i) == 1);
+ if (sscanf(str, "%le", d+i) != 1)
+ break;
i++;
}
return(i);

12
atlas-getri.patch Normal file
View File

@ -0,0 +1,12 @@
diff --git a/src/testing/ATL_f77getri.c b/src/testing/ATL_f77getri.c
index 2cc576c..7ff8eba 100644
--- a/src/testing/ATL_f77getri.c
+++ b/src/testing/ATL_f77getri.c
@@ -97,7 +97,6 @@ int f77getri(const enum ATLAS_ORDER Order, const int N, TYPE *A, const int lda,
#ifdef ATL_FunkyInts
*lwork = F77lwork;
for (i=0; i < MN; i++) ipiv[i] = F77ipiv[i] + 1;
- free(F77ipiv);
#else
for (i=0; i < MN; i++) ipiv[i]++;
#endif

16
atlas-melf.patch Normal file
View File

@ -0,0 +1,16 @@
diff --git a/CONFIG/src/SpewMakeInc.c b/CONFIG/src/SpewMakeInc.c
index eed259e..65d68a1 100644
--- a/CONFIG/src/SpewMakeInc.c
+++ b/CONFIG/src/SpewMakeInc.c
@@ -764,9 +764,9 @@ int main(int nargs, char **args)
else
{
if (ptrbits == 32)
- fprintf(fpout, " -melf_i386");
+ fprintf(fpout, " -Wl,-melf_i386");
else if (ptrbits == 64)
- fprintf(fpout, " -melf_x86_64");
+ fprintf(fpout, " -Wl,-melf_x86_64");
if (OS == OSFreeBSD)
fprintf(fpout, "_fbsd");
}

View File

@ -0,0 +1,40 @@
From 3119c671c566761a79ac98405cb619892acde3e8 Mon Sep 17 00:00:00 2001
From: Lukas Slebodnik <lslebodn@redhat.com>
Date: Fri, 20 Sep 2013 09:26:58 +0200
Subject: [PATCH] atlas-shared_libraries
---
ATLAS/makes/Make.lib | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/ATLAS/makes/Make.lib b/ATLAS/makes/Make.lib
index ab1eb9963d36678972a0a410905169aaa563dc64..27c6e316b442e09b0f46afac7940aaa11e25e45c 100644
--- a/ATLAS/makes/Make.lib
+++ b/ATLAS/makes/Make.lib
@@ -4,6 +4,8 @@ mySRCdir = $(SRCdir)/lib
#
# override with libatlas.so only when atlas is built to one lib
#
+so_ver_major=3
+so_ver = $(so_ver_major).10
DYNlibs = liblapack.so libf77blas.so libcblas.so libatlas.so
PTDYNlibs = liblapack.so libptf77blas.so libptcblas.so libatlas.so
CDYNlibs = liblapack.so libcblas.so libatlas.so
@@ -116,9 +116,12 @@ LDTRY:
-rpath-link $(LIBINSTdir) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY:
- $(GOODGCC) -shared -o $(outso) \
- -Wl,"-rpath-link $(LIBINSTdir)" \
+ $(GOODGCC) -shared -o $(outso).$(so_ver) \
+ \
+ -Wl,-soname,"$(outso).$(so_ver_major)" \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
+ ln -s $(outso).$(so_ver) $(outso).$(so_ver_major)
+ ln -s $(outso).$(so_ver) $(outso)
GCCTRY_norp:
$(GOODGCC) -shared -o $(outso) \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
--
1.8.3.1

12
atlas-throttling.patch Normal file
View File

@ -0,0 +1,12 @@
diff -up ATLAS/CONFIG/src/config.c.zaloha ATLAS/CONFIG/src/config.c
--- ATLAS/CONFIG/src/config.c.zaloha 2012-10-25 11:29:02.495425989 +0200
+++ ATLAS/CONFIG/src/config.c 2012-10-25 11:42:10.218216957 +0200
@@ -711,6 +711,8 @@ int ProbePtrbits(int verb, char *targarg
int ProbeCPUThrottle(int verb, char *targarg, enum OSTYPE OS, enum ASMDIA asmb)
{
+ return 0; /* impossible to turn off cpu throttling => ignore */
+ /* this undermines performance of compiled library */
int i, iret;
char *ln;
i = strlen(targarg) + 22 + 12;

View File

@ -0,0 +1,17 @@
diff -up wrk/makes/Make.lib.wrk wrk/makes/Make.lib
--- wrk/makes/Make.lib.wrk 2015-01-23 21:14:46.465494411 +0100
+++ wrk/makes/Make.lib 2015-01-23 22:48:39.632479588 +0100
@@ -185,11 +185,11 @@ TRYALL :
#
fat_ptshared : # threaded target
$(MAKE) TRYALL outso=libtatlas.so \
- libas="libptlapack.a libptf77blas.a libptcblas.a libatlas.a" \
+ libas="libptlapack.a libptf77blas.a libptcblas.a libatlas.a $(SLAPACKlib)" \
LIBINSTdir="$(LIBINSTdir)"
fat_shared : # serial target
$(MAKE) TRYALL outso=libsatlas.so \
- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \
+ libas="liblapack.a libf77blas.a libcblas.a libatlas.a $(SLAPACKlib)" \
LIBINSTdir="$(LIBINSTdir)"
#
# Builds shared lib, not include fortran codes from LAPACK

1132
atlas.spec Normal file

File diff suppressed because it is too large Load Diff

20
sources Normal file
View File

@ -0,0 +1,20 @@
SHA512 (atlas3.10.3.tar.bz2) = bf17306f09f2aa973cb776e2c9eacfb2409ad4d95d19802e1c4e0597d0a099fccdb5eaafe273c2682a41e41a3c6fabc8bbba4ce03180cffea40ede5df1d1f56e
SHA512 (IBMz1032.tgz) = f745187d75073de461d6948489dad3abea9a67ad10ec63e021160d3f61ae5be36e94768faa0e7e6e3158b1401bf954eae1e7e6416857b652415030836c6aba3d
SHA512 (IBMz1064.tgz) = 14fbc584a8711a0292c8be0dce962bd7ac12347b2d203c2a7b0cc66ea68ac57d5b88afc6778df39efea43077fcc70c6c63db365b5b4badb879ab6900b5296094
SHA512 (IBMz1264.tar.bz2) = 54bab951a818feb08fe5c671213db80d17bfefe75a3993d80655161219f018e87125c4ccc09c701cde45fd672a9856f4fff557ffc378c5b2fe7e9c6ebc3bd1de
SHA512 (IBMz19632.tgz) = aa10213265866b3176efe1d9d204da170844573f7ab26a36551a174eab3951ccd5f54a5149f1351affc38c510162cce9e10eb2a830af32992cb3febe9e1ecafe
SHA512 (IBMz19664.tgz) = 5837d5dfd04c31c304e1f454d0148bd412ec8853c50a7c3dcee9a61529bd04c30d68a0c7aae2bfa2c393fde3582fe36f98e6f5891b271b19562491298ba600b2
SHA512 (IBMz932.tar.bz2) = 8f71140d1b30d00ed44faea71e42ab3ff24917a62670f7becdb0d861bf4e7c3c972f9601d161439a518dcc87405c74af31cdd4e2996999a5da8452cc8d2a52df
SHA512 (IBMz964.tar.bz2) = b7356e5b47615c64c9b2dd6a497f071e39d4d90f6dd42478fea1d7597cf21ea08123c480fde002aba181a2ad0eeb21acb61469c7e4b2e8961e4d72e5f86e14cc
SHA512 (PPRO32.tgz) = a30069e79f95a36b2c7125e7861218e9612bd92913db929ea98800201e7ca7d55c9a1480063c7d5a4c50fcb2b271907ce43cd9b229c694a5ee3b56561a7820e6
SHA512 (USII32.tgz) = e9d3b1f5ccd38fc364666205e33f7a927e96c3cebc35d9692cafe3b536697224f20702641f875421b200ff78774831fd5790174ef55c899e0cdb905e3ac2371f
SHA512 (USII64.tgz) = 5bd654f8b45306a18f3ad2b593ba23012909ba5ad91614de5024b80998bde832d0ddc84d2c0c0e75dd28915f3c07ec40ac9351213b24e54028fbad4d385ebcc6
SHA512 (POWER332.tar.bz2) = 95a7281dbb7a2d0897a58599577afdedba66e6e5edb73223efdeecd93b6706031139b9b58b14345449dccbf1abfa8275bc261f826c692282d14dc30320728c75
SHA512 (ARMv732NEON.tar.bz2) = 92acbdd8f7aebd841a10a13df85baa00c518dae388e1ee8dd4bc35fc461d732d2df2cfeae0a3614cea251b80a9ee6a5b49ad71ab8b36b98b70bda6d1c215c78d
SHA512 (ARMa732.tar) = 47d6564b5a439bc3778ccc79242220b236c7dc8d36e12ce6850c7e9a02e2379618322c003ba4490573c40b78227c2c3925222da4f4e5f87aab48eae192b45bb9
SHA512 (ARMa732.tar.bz2) = 8b83b59a32f18d2cd432c205efd4358b0000ce1685799f2f38a60532bc925e9cd871371d2dfd226ab8e30e830bf608f022d63bcd26f26f9fe74acab067bd4d4f
SHA512 (POWER864LEVSXp4.tar.bz2) = e2fa637061a4a4806bc091009c37ccd719c4c4051baf36ed451917e255375881fa168caa5ca296ae9c89bb28523d9015fda42a5dbc51aef4c66efbf6efd966d2
SHA512 (K7323DNow.tgz) = e1d5e4208ce454b5f5daa68663d2dd28a2bd3cc97496e4e1515df880b9ccd00bcc75bd820402c3b2bf8409f98500e43f2481fbf5dd480f7d0ba60fe2f82a1ac1
SHA512 (IBMz1364VXZ.tar.bz2) = abcd32e4e92eaee702bb9583179d7019b551c5cdc45733ef683a62627d52d002425f7eb9515c15c700160920a6cda9bdd9586a748e6bea0425958346c341481f
SHA512 (IBMz1464VXZ2.tar.bz2) = 6783ec5658d59f8a1f1270fa9845c5aad3a320b03e7b0bee7e16c0357679118bec0bf99f0ada8815620d2df17ab10c3bd91dff2454dbd4f0e6401de69944317f
SHA512 (IBMz1564VXZ2.tar.bz2) = ff522d80d758c508c71989a189442121e4be2f0309c7c9dcb87174bacef6a3c6caf2debc069311335a9c14930450e84a81f9be171e46f4a0c2da5ae0771a9b1c