diff --git a/.gitignore b/.gitignore index d55f61c..464fe71 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/v0.3.3.tar.gz +SOURCES/v0.3.10.tar.gz diff --git a/.openblas.metadata b/.openblas.metadata index ac9f8a7..d457f79 100644 --- a/.openblas.metadata +++ b/.openblas.metadata @@ -1 +1 @@ -bff159c528c1a860cee4976114d224da32d302a2 SOURCES/v0.3.3.tar.gz +cbe3fdd0e6ee235debc611d76976dac62f3ddc1c SOURCES/v0.3.10.tar.gz diff --git a/SOURCES/openblas-0.2.15-constructor.patch b/SOURCES/openblas-0.2.15-constructor.patch deleted file mode 100644 index 6b6a092..0000000 --- a/SOURCES/openblas-0.2.15-constructor.patch +++ /dev/null @@ -1,19 +0,0 @@ -diff -up OpenBLAS-0.2.15/driver/others/memory.c.priority OpenBLAS-0.2.15/driver/others/memory.c ---- OpenBLAS-0.2.15/driver/others/memory.c.priority 2015-10-27 21:44:50.000000000 +0100 -+++ OpenBLAS-0.2.15/driver/others/memory.c 2016-01-13 21:12:01.862225898 +0100 -@@ -146,8 +146,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF - #define CONSTRUCTOR __attribute__ ((constructor)) - #define DESTRUCTOR __attribute__ ((destructor)) - #else -+#if __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) - #define CONSTRUCTOR __attribute__ ((constructor(101))) - #define DESTRUCTOR __attribute__ ((destructor(101))) -+#elif __GNUC__ && INIT_PRIORITY -+#define CONSTRUCTOR __attribute__ ((constructor)) -+#define DESTRUCTOR __attribute__ ((destructor)) -+#else -+#define CONSTRUCTOR -+#define DESTRUCTOR - #endif - - #ifdef DYNAMIC_ARCH diff --git a/SOURCES/openblas-0.2.20-asmflags.patch b/SOURCES/openblas-0.3.10-asmflags.patch similarity index 91% rename from SOURCES/openblas-0.2.20-asmflags.patch rename to SOURCES/openblas-0.3.10-asmflags.patch index 239812f..16dd8db 100644 --- a/SOURCES/openblas-0.2.20-asmflags.patch +++ b/SOURCES/openblas-0.3.10-asmflags.patch @@ -1,8 +1,8 @@ diff --git a/kernel/Makefile b/kernel/Makefile -index a0a8fcd..df0669b 100644 +index 9b468a6..87df5fa 100644 --- a/kernel/Makefile +++ b/kernel/Makefile -@@ -73,22 +73,28 @@ endif +@@ -113,25 +113,32 @@ endif all : libs scabs1.$(SUFFIX): $(KERNELDIR)/$(SCABS_KERNEL) @@ -31,13 +31,18 @@ index a0a8fcd..df0669b 100644 + $(CC) -c $$CFLAGS -DF_INTERFACE $< -o $(@F) setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h + ifeq ($(USE_GEMM3M), 1) +- $(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -DUSE_GEMM3M $< -o $@ + else - $(CC) -c $(CFLAGS) $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS $< -o $@ + endif setparam$(TSUFFIX).c : setparam-ref.c - sed 's/TS/$(TSUFFIX)/g' $< > $(@F) -@@ -98,25 +104,32 @@ kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) +@@ -142,25 +149,32 @@ kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S @@ -78,10 +83,10 @@ index a0a8fcd..df0669b 100644 #ifdef DYNAMIC_ARCH clean :: diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 -index a8f9cf0..5ebfc9c 100644 +index 9707032..7835f0d 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 -@@ -495,319 +495,417 @@ XBLASOBJS += \ +@@ -522,339 +522,443 @@ XBLASOBJS += \ $(KDIR)samax_k$(TSUFFIX).$(SUFFIX) $(KDIR)samax_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAMAXKERNEL) @@ -279,7 +284,7 @@ index a8f9cf0..5ebfc9c 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UCOMPLEX -DXDOUBLE -UUSE_ABS -DUSE_MIN $< -o $@ - + ### ASUM ### $(KDIR)sasum_k$(TSUFFIX).$(SUFFIX) $(KDIR)sasum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SASUMKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -310,6 +315,38 @@ index a8f9cf0..5ebfc9c 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE $< -o $@ + ### SUM ### + $(KDIR)ssum_k$(TSUFFIX).$(SUFFIX) $(KDIR)ssum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSUMKERNEL) +- $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -UCOMPLEX -UDOUBLE $< -o $@ + + $(KDIR)dsum_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSUMKERNEL) +- $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -UCOMPLEX -DDOUBLE $< -o $@ + + $(KDIR)qsum_k$(TSUFFIX).$(SUFFIX) $(KDIR)qsum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSUMKERNEL) +- $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -UCOMPLEX -DXDOUBLE $< -o $@ + + $(KDIR)csum_k$(TSUFFIX).$(SUFFIX) $(KDIR)csum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSUMKERNEL) +- $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -DCOMPLEX -UDOUBLE $< -o $@ + + $(KDIR)zsum_k$(TSUFFIX).$(SUFFIX) $(KDIR)zsum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSUMKERNEL) +- $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -DCOMPLEX -DDOUBLE $< -o $@ + + $(KDIR)xsum_k$(TSUFFIX).$(SUFFIX) $(KDIR)xsum_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSUMKERNEL) +- $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE $< -o $@ + + ### AXPY ### $(KDIR)saxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)saxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAXPYKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -950,11 +987,17 @@ index 2aeb8f0..e7c49d2 100644 + $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 -index 0664263..de28ab3 100644 +index da6c5fd..c3f92fb 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 -@@ -403,2897 +403,3837 @@ XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) - XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) +@@ -449,119 +449,150 @@ XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) + + ifeq ($(BUILD_HALF),1) + $(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + endif $(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ @@ -986,15 +1029,78 @@ index 0664263..de28ab3 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DXDOUBLE -DCOMPLEX $< -o $@ + + ifeq ($(BUILD_HALF), 1) + + $(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY) +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + + $(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY) + + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s + m4 shgemmotcopy.s > shgemmotcopy_nomacros.s +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@ + rm shgemmotcopy.s shgemmotcopy_nomacros.s + else +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + endif + + ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) + + $(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY) +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + + $(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s + m4 shgemmitcopy.s > shgemmitcopy_nomacros.s +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@ + rm shgemmitcopy.s shgemmitcopy_nomacros.s + else +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + endif + + endif + endif + $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s + m4 sgemmotcopy.s > sgemmotcopy_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ + rm sgemmotcopy.s sgemmotcopy_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ + endif + ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) @@ -1004,16 +1110,38 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY) -- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ - ++ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s + m4 sgemmitcopy.s > sgemmitcopy_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ + rm sgemmitcopy.s sgemmitcopy_nomacros.s + else +- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ + endif + endif $(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s + m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ + rm dgemm_ncopy.s dgemm_ncopy_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@ + endif $(KDIR)$(DGEMMOTCOPYOBJ) : $(KERNELDIR)/$(DGEMMOTCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ @@ -1028,12 +1156,23 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s + m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ + rm dgemm_itcopy.s dgemm_itcopy_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@ - endif + endif +@@ -569,65 +600,81 @@ endif ifdef EXPRECISION $(KDIR)$(QGEMMONCOPYOBJ) : $(KERNELDIR)/$(QGEMMONCOPY) @@ -1080,9 +1219,20 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s + m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ + rm cgemm_itcopy.s cgemm_itcopy_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ + endif endif @@ -1104,12 +1254,23 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s + m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ + rm zgemm_itcopy.s zgemm_itcopy_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@ - endif + endif +@@ -635,18 +682,22 @@ endif ifdef EXPRECISION $(KDIR)$(XGEMMONCOPYOBJ) : $(KERNELDIR)/$(XGEMMONCOPY) @@ -1136,17 +1297,58 @@ index 0664263..de28ab3 100644 endif - endif +@@ -654,1780 +705,2344 @@ endif $(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s + m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ + endif + + ifeq ($(BUILD_HALF), 1) + + $(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s + m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s + else +- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + endif + endif $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s + m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@ + endif $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ @@ -1154,44 +1356,132 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -DXDOUBLE -UCOMPLEX $< -o $@ $(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s + m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ + rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ + endif $(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s + m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ + rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DCN $< -o $@ + endif $(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s + m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ + rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ + endif $(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s + m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s +- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ + rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s + else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ + endif $(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s + m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ + rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ + endif $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s + m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ + rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ + endif $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s + m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ + rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ + endif $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s + m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s +- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ + rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s + else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ + endif $(KDIR)xgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DNN $< -o $@ @@ -1216,44 +1506,132 @@ index 0664263..de28ab3 100644 ifdef USE_TRMM $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s + m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ + rm strmmkernel_ln.s strmmkernel_ln_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ + endif $(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s + m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ + rm strmmkernel_lt.s strmmkernel_lt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ + endif $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s + m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ + rm strmmkernel_rn.s strmmkernel_rn_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ + endif $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s + m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + endif $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s + m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ + rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ + endif $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s + m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ + rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ + endif $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s + m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ + rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ + endif $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s + m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ + rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + endif $(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -1276,84 +1654,261 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s + m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ + rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s + m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ + rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s + m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ + rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ + endif $(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s + m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ + rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ + endif $(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s + m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ + rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s + m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ + rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s + m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ + rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ + endif $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s + m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ + rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ + endif $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s + m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ + rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s + m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ + rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s + m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ + rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ + endif $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s + m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ + rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ + endif $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s + m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ + rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s + m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ + rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ + endif $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s + m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ + rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ + endif $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s + m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ + rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ + endif + else $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -1371,9 +1926,20 @@ index 0664263..de28ab3 100644 + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s + m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s + else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + endif $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -1580,9 +2146,20 @@ index 0664263..de28ab3 100644 + $(CC) -c $$CFLAGS -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DLN -UCONJ $< -o $@ $(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s + m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s +- $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) -c $$CFLAGS -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ + rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s + else - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ + endif $(KDIR)dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_RN) $(DTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DRN -UCONJ $< -o $@ @@ -3421,6 +3998,13 @@ index 0664263..de28ab3 100644 + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ + $(CC) $$PFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ + ifeq ($(BUILD_HALF),1) + $(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) +- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ ++ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + endif + $(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ @@ -3446,6 +4030,32 @@ index 0664263..de28ab3 100644 + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ + $(CC) $$PFLAGS -c -DXDOUBLE -DCOMPLEX $< -o $@ + + ifeq ($(BUILD_HALF), 1) + $(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY) +- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ ++ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + + $(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY) +- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ ++ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + + ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) + $(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY) +- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ ++ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + + $(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY) +- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ ++ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + + endif + endif + $(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ @@ -3470,7 +4080,7 @@ index 0664263..de28ab3 100644 endif - $(DGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMONCOPY) + $(D cgemm_kernel_r_nomacros.s +- $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ ++ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ ++ $(CC) $$PFLAGS -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ + rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s + else +- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ + endif $(KDIR)cgemm_kernel_b$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ @@ -3689,9 +4317,20 @@ index 0664263..de28ab3 100644 + $(CC) $$PFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ $(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) -- $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + ifeq ($(OS), AIX) +- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s + m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s +- $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ ++ $(CC) $$PFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s + else +- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ ++ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ ++ $(CC) $$CFLAGS -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + endif $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -5731,7 +6370,7 @@ index 0664263..de28ab3 100644 ##### BLAS extensions ###### -@@ -3303,112 +4243,128 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c +@@ -3740,112 +4786,128 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c endif $(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN) @@ -5876,7 +6515,7 @@ index 0664263..de28ab3 100644 ifndef COMATCOPY_CN -@@ -3416,112 +4372,128 @@ COMATCOPY_CN = ../arm/zomatcopy_cn.c +@@ -3853,112 +4915,128 @@ COMATCOPY_CN = ../arm/zomatcopy_cn.c endif $(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN) @@ -6021,7 +6660,7 @@ index 0664263..de28ab3 100644 -@@ -3530,112 +4502,128 @@ ZOMATCOPY_CN = ../arm/zomatcopy_cn.c +@@ -3967,112 +5045,128 @@ ZOMATCOPY_CN = ../arm/zomatcopy_cn.c endif $(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN) @@ -6166,7 +6805,7 @@ index 0664263..de28ab3 100644 ifndef SGEADD_K -@@ -3643,26 +4631,30 @@ SGEADD_K = ../generic/geadd.c +@@ -4080,26 +5174,30 @@ SGEADD_K = ../generic/geadd.c endif $(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) diff --git a/SOURCES/openblas-0.3.10-concurrency.patch b/SOURCES/openblas-0.3.10-concurrency.patch new file mode 100644 index 0000000..124ec0e --- /dev/null +++ b/SOURCES/openblas-0.3.10-concurrency.patch @@ -0,0 +1,54 @@ +diff --git a/cpp_thread_test/cpp_thread_safety_common.h b/cpp_thread_test/cpp_thread_safety_common.h +index 60ab5bb..8005369 100644 +--- a/cpp_thread_test/cpp_thread_safety_common.h ++++ b/cpp_thread_test/cpp_thread_safety_common.h +@@ -5,6 +5,14 @@ inline void pauser(){ + std::getline(std::cin, dummy); + } + ++void FailIfThreadsAreZero(uint32_t numConcurrentThreads) { ++ if(numConcurrentThreads == 0) { ++ std::cout<<"ERROR: Invalid parameter 0 for number of concurrent calls into OpenBLAS!"<>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution& rngdist, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numMat){ + for(uint32_t i=0; i(randomMatSize*randomMatSize); j++){ +diff --git a/cpp_thread_test/dgemm_thread_safety.cpp b/cpp_thread_test/dgemm_thread_safety.cpp +index 1c52875..104c64f 100644 +--- a/cpp_thread_test/dgemm_thread_safety.cpp ++++ b/cpp_thread_test/dgemm_thread_safety.cpp +@@ -46,6 +46,8 @@ int main(int argc, char* argv[]){ + std::cout<<"Number of concurrent calls into OpenBLAS : "<(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast(1024*1024)<<" MiB of RAM\n"< 4){ + std::cout<<"ERROR: too many arguments for thread safety tester"<(randomMatSize*randomMatSize)*numConcurrentThreads*8)+(static_cast(randomMatSize)*numConcurrentThreads*8*2))/static_cast(1024*1024)<<" MiB of RAM\n"< $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc +-ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) +- -@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -- -@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc +- -@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc --ifeq ($(FC), gfortran) +-ifeq ($(F_COMPILER), GFORTRAN) - -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc -ifdef SMP +-ifeq ($(OSNAME), WINNT) +- -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +-else ifeq ($(OSNAME), Haiku) +- -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +-else - -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc +-endif -else - -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -endif @@ -67,17 +89,22 @@ diff -up OpenBLAS-0.2.15/Makefile.system_lapack OpenBLAS-0.2.15/Makefile - -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -endif +-ifeq ($(BUILD_LAPACK_DEPRECATED), 1) +- -@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc +-endif +- -@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc - -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc -endif + @$(MAKE) -C $(NETLIB_LAPACK_DIR) large.tgz : - ifndef NOFORTRAN -diff -up OpenBLAS-0.2.15/Makefile.system.system_lapack OpenBLAS-0.2.15/Makefile.system ---- OpenBLAS-0.2.15/Makefile.system.system_lapack 2015-10-27 13:44:50.000000000 -0700 -+++ OpenBLAS-0.2.15/Makefile.system 2015-10-28 09:14:39.994350500 -0700 -@@ -9,7 +9,7 @@ ifndef TOPDIR - TOPDIR = . + ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) +diff --git a/Makefile.system b/Makefile.system +index 8d78b42..47d8eec 100644 +--- a/Makefile.system ++++ b/Makefile.system +@@ -31,7 +31,7 @@ else ifeq ($(ARCH), zarch) + override ARCH=zarch endif -NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib diff --git a/SOURCES/openblas-0.3.3-tests.patch b/SOURCES/openblas-0.3.10-tests.patch similarity index 69% rename from SOURCES/openblas-0.3.3-tests.patch rename to SOURCES/openblas-0.3.10-tests.patch index ff3bd37..22ed290 100644 --- a/SOURCES/openblas-0.3.3-tests.patch +++ b/SOURCES/openblas-0.3.10-tests.patch @@ -1,19 +1,22 @@ diff --git a/Makefile b/Makefile -index d99521b..01bba2f 100644 +index ae8f7de..61f325c 100644 --- a/Makefile +++ b/Makefile -@@ -122,11 +122,11 @@ tests : +@@ -133,13 +133,13 @@ tests : ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) touch $(LIBNAME) ifndef NO_FBLAS - $(MAKE) -C test all -- $(MAKE) -C utest all + $(MAKE) -C test FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all -+ $(MAKE) -C utest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all endif +- $(MAKE) -C utest all ++ $(MAKE) -C utest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all ifndef NO_CBLAS - $(MAKE) -C ctest all + $(MAKE) -C ctest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all + ifeq ($(CPP_THREAD_SAFETY_TEST), 1) +- $(MAKE) -C cpp_thread_test all ++ $(MAKE) -C cpp_thread_test FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all + endif endif endif - diff --git a/SOURCES/openblas-0.3.10-zarch-gcc-version-detection.patch b/SOURCES/openblas-0.3.10-zarch-gcc-version-detection.patch new file mode 100644 index 0000000..e3ad2e0 --- /dev/null +++ b/SOURCES/openblas-0.3.10-zarch-gcc-version-detection.patch @@ -0,0 +1,57 @@ +diff --git a/Makefile.system b/Makefile.system +index c947a19..cbf419a 100644 +--- a/Makefile.system ++++ b/Makefile.system +@@ -282,9 +282,11 @@ endif + ifeq ($(C_COMPILER), GCC) + GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) + GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) ++GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5) + GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) + GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7) + GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) ++GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2) + GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) + endif + +@@ -570,20 +572,27 @@ ifeq ($(ARCH), zarch) + DYNAMIC_CORE = ZARCH_GENERIC + + # Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer +-GCC_GE_52 := $(subst 0,,$(shell expr `$(CC) -dumpversion` \>= "5.2")) ++ifeq ($(GCCVERSIONGT5), 1) ++ ZARCH_SUPPORT_Z13 := 1 ++else ifeq ($(GCCVERSIONEQ5), 1) ++ifeq ($(GCCMINORVERSIONGTEQ2), 1) ++ ZARCH_SUPPORT_Z13 := 1 ++endif ++endif + + ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release) +-RHEL_WITH_Z13 := $(subst 0,,$(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3")) ++ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1) ++ ZARCH_SUPPORT_Z13 := 1 ++endif + endif + +-ifeq ($(or $(GCC_GE_52),$(RHEL_WITH_Z13)), 1) ++ifeq ($(ZARCH_SUPPORT_Z13), 1) + DYNAMIC_CORE += Z13 + else + $(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x) + endif + +-GCC_MAJOR_GE_7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7) +-ifeq ($(GCC_MAJOR_GE_7), 1) ++ifeq ($(GCCVERSIONGTEQ7), 1) + DYNAMIC_CORE += Z14 + else + $(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x) +@@ -597,7 +606,6 @@ ifneq ($(C_COMPILER), GCC) + DYNAMIC_CORE += POWER9 + endif + ifeq ($(C_COMPILER), GCC) +-GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) + ifeq ($(GCCVERSIONGT5), 1) + DYNAMIC_CORE += POWER9 + else diff --git a/SOURCES/openblas-0.3.3-izamax-s390x.patch b/SOURCES/openblas-0.3.3-izamax-s390x.patch deleted file mode 100644 index b41cee9..0000000 --- a/SOURCES/openblas-0.3.3-izamax-s390x.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/kernel/zarch/izamax.c b/kernel/zarch/izamax.c -index 216c341..6cde691 100644 ---- a/kernel/zarch/izamax.c -+++ b/kernel/zarch/izamax.c -@@ -185,7 +185,7 @@ static BLASLONG ziamax_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *maxf) { - "vsteg %%v6,%[maxf],0 \n\t" - "vmnlg %%v1,%%v5,%%v7 \n\t" - "vlgvg %[index],%%v1,0 \n\t" -- "j 3 \n\t" -+ "j 3f \n\t" - "2: \n\t" - "wfchdb %%v16,%%v26,%%v6 \n\t" - "vsel %%v1,%%v5,%%v7,%%v16 \n\t" diff --git a/SOURCES/openblas-0.3.3-noopt.patch b/SOURCES/openblas-0.3.3-noopt.patch deleted file mode 100644 index ec0b143..0000000 --- a/SOURCES/openblas-0.3.3-noopt.patch +++ /dev/null @@ -1,37 +0,0 @@ -diff --git a/lapack-netlib/INSTALL/Makefile b/lapack-netlib/INSTALL/Makefile -index 150a061..abf0843 100644 ---- a/lapack-netlib/INSTALL/Makefile -+++ b/lapack-netlib/INSTALL/Makefile -@@ -45,6 +45,3 @@ cleantest: - .SUFFIXES: .o .f - .f.o: - $(FORTRAN) $(OPTS) -c -o $@ $< -- --slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< -diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile -index 531cb51..94051a1 100644 ---- a/lapack-netlib/SRC/Makefile -+++ b/lapack-netlib/SRC/Makefile -@@ -603,10 +603,3 @@ clean: - - .F.o: - $(FORTRAN) $(OPTS) -c $< -o $@ -- --slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< -diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile -index e20004c..f168821 100644 ---- a/lapack-netlib/TESTING/MATGEN/Makefile -+++ b/lapack-netlib/TESTING/MATGEN/Makefile -@@ -95,6 +95,3 @@ cleanlib: - - .f.o: - $(FORTRAN) $(OPTS) -c -o $@ $< -- --slaran.o: slaran.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< --dlaran.o: dlaran.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< diff --git a/SOURCES/openblas-0.3.3-power9.patch b/SOURCES/openblas-0.3.3-power9.patch deleted file mode 100644 index 4b3d046..0000000 --- a/SOURCES/openblas-0.3.3-power9.patch +++ /dev/null @@ -1,57 +0,0 @@ -diff --git a/cpuid_power.c b/cpuid_power.c -index 6c7baef..388ea18 100644 ---- a/cpuid_power.c -+++ b/cpuid_power.c -@@ -56,6 +56,7 @@ - #define CPUTYPE_CELL 6 - #define CPUTYPE_PPCG4 7 - #define CPUTYPE_POWER8 8 -+#define CPUTYPE_POWER9 9 - - char *cpuname[] = { - "UNKNOWN", -@@ -66,7 +67,8 @@ char *cpuname[] = { - "POWER6", - "CELL", - "PPCG4", -- "POWER8" -+ "POWER8", -+ "POWER9" - }; - - char *lowercpuname[] = { -@@ -78,7 +80,8 @@ char *lowercpuname[] = { - "power6", - "cell", - "ppcg4", -- "power8" -+ "power8", -+ "power9" - }; - - char *corename[] = { -@@ -90,6 +93,7 @@ char *corename[] = { - "POWER6", - "CELL", - "PPCG4", -+ "POWER8", - "POWER8" - }; - -@@ -120,6 +124,7 @@ int detect(void){ - if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; - if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; - if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; -+ if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8; - if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; - if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; - -@@ -148,7 +153,7 @@ int id; - id = __asm __volatile("mfpvr %0" : "=r"(id)); - switch ( id >> 16 ) { - case 0x4e: // POWER9 -- return return CPUTYPE_POWER8; -+ return CPUTYPE_POWER8; - break; - case 0x4d: - case 0x4b: // POWER8/8E diff --git a/SPECS/openblas.spec b/SPECS/openblas.spec index ff30d9d..3e03bad 100644 --- a/SPECS/openblas.spec +++ b/SPECS/openblas.spec @@ -1,6 +1,6 @@ %bcond_with system_lapack # Version of bundled lapack -%global lapackver 3.7.0 +%global lapackver 3.9.0 # DO NOT "CLEAN UP" OR MODIFY THIS SPEC FILE WITHOUT ASKING THE # MAINTAINER FIRST! @@ -14,31 +14,29 @@ # "obsoleted" features are still kept in the spec. Name: openblas -Version: 0.3.3 -Release: 5%{?dist} +Version: 0.3.10 +Release: 1%{?dist} Summary: An optimized BLAS library based on GotoBLAS2 Group: Development/Libraries License: BSD URL: https://github.com/xianyi/OpenBLAS/ Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz # Use system lapack -Patch0: openblas-0.2.15-system_lapack.patch +Patch0: openblas-0.3.10-system-lapack.patch # Drop extra p from threaded library name -Patch1: openblas-0.2.5-libname.patch -# Don't use constructor priorities on too old architectures -Patch2: openblas-0.2.15-constructor.patch +Patch1: openblas-0.3.10-libname.patch # Supply the proper flags to the test makefile -Patch3: openblas-0.3.3-tests.patch +Patch2: openblas-0.3.10-tests.patch # Enable optimizations for all LAPACK sources -Patch4: openblas-0.3.3-noopt.patch +Patch3: openblas-0.3.10-noopt.patch # Pass ASMFLAGS to assembler compiler -Patch5: openblas-0.2.20-asmflags.patch +Patch4: openblas-0.3.10-asmflags.patch # Remove optimization pragmas on ppc64le -Patch6: openblas-0.2.20-power-optimize.patch -# Fix izamax on s390x -Patch7: openblas-0.3.3-izamax-s390x.patch -# Detect POWER9 as POWER8 -Patch8: openblas-0.3.3-power9.patch +Patch5: openblas-0.3.10-power-optimize.patch +# https://github.com/xianyi/OpenBLAS/pull/2669 +Patch6: openblas-0.3.10-zarch-gcc-version-detection.patch +# https://github.com/xianyi/OpenBLAS/pull/2672 +Patch7: openblas-0.3.10-concurrency.patch BuildRequires: gcc BuildRequires: gcc-gfortran @@ -232,18 +230,15 @@ This package contains the static libraries. tar zxf %{SOURCE0} cd OpenBLAS-%{version} %if %{with system_lapack} -%patch0 -p1 -b .system_lapack +%patch0 -p1 -b .system-lapack %endif %patch1 -p1 -b .libname -%if 0%{?rhel} == 5 -%patch2 -p1 -b .constructor -%endif -%patch3 -p1 -b .tests -%patch4 -p1 -b .noopt -%patch5 -p1 -b .asmflags -%patch6 -p1 -b .power-optimize -%patch7 -p1 -b .izamax-s390x -%patch8 -p1 -b .power9 +%patch2 -p1 -b .tests +%patch3 -p1 -b .noopt +%patch4 -p1 -b .asmflags +%patch5 -p1 -b .power-optimize +%patch6 -p1 -b .zarch-gcc-version-detection +%patch7 -p1 -b .concurrency # Fix source permissions find -name \*.f -exec chmod 644 {} \; @@ -382,7 +377,7 @@ TARGET="TARGET=POWER8 DYNAMIC_ARCH=0" TARGET="TARGET=ARMV8 DYNAMIC_ARCH=0" %endif %ifarch s390x -TARGET="TARGET=Z13 DYNAMIC_ARCH=0" +TARGET="TARGET=ZARCH_GENERIC DYNAMIC_ARCH=1" %endif %if 0%{?rhel} == 5 @@ -461,9 +456,6 @@ suffix="_power8" %ifarch aarch64 suffix="_armv8" %endif -%ifarch s390x -suffix="_z13" -%endif slibname=`basename %{buildroot}%{_libdir}/libopenblas${suffix}-*.so .so` mv %{buildroot}%{_libdir}/${slibname}.a %{buildroot}%{_libdir}/lib%{name}.a if [[ "$suffix" != "" ]]; then @@ -689,6 +681,10 @@ rm -rf %{buildroot}%{_libdir}/pkgconfig %endif %changelog +* Tue Oct 20 2020 Nikola Forró - 0.3.10-1 +- Rebase to version 0.3.10 + resolves: #1847435 + * Fri Nov 22 2019 Nikola Forró - 0.3.3-5 - Detect POWER9 as POWER8 related: #1752241