diff --git a/.gitignore b/.gitignore index 464fe71..f697ae0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/v0.3.10.tar.gz +SOURCES/v0.3.12.tar.gz diff --git a/.openblas.metadata b/.openblas.metadata index d457f79..058300c 100644 --- a/.openblas.metadata +++ b/.openblas.metadata @@ -1 +1 @@ -cbe3fdd0e6ee235debc611d76976dac62f3ddc1c SOURCES/v0.3.10.tar.gz +ae647fed597ae891a7f122b9ddc6b15d4b7e0656 SOURCES/v0.3.12.tar.gz diff --git a/SOURCES/openblas-0.3.10-concurrency.patch b/SOURCES/openblas-0.3.10-concurrency.patch deleted file mode 100644 index 124ec0e..0000000 --- a/SOURCES/openblas-0.3.10-concurrency.patch +++ /dev/null @@ -1,54 +0,0 @@ -diff --git a/cpp_thread_test/cpp_thread_safety_common.h b/cpp_thread_test/cpp_thread_safety_common.h -index 60ab5bb..8005369 100644 ---- a/cpp_thread_test/cpp_thread_safety_common.h -+++ b/cpp_thread_test/cpp_thread_safety_common.h -@@ -5,6 +5,14 @@ inline void pauser(){ - std::getline(std::cin, dummy); - } - -+void FailIfThreadsAreZero(uint32_t numConcurrentThreads) { -+ if(numConcurrentThreads == 0) { -+ std::cout<<"ERROR: Invalid parameter 0 for number of concurrent calls into OpenBLAS!"<>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution& rngdist, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numMat){ - for(uint32_t i=0; i(randomMatSize*randomMatSize); j++){ -diff --git a/cpp_thread_test/dgemm_thread_safety.cpp b/cpp_thread_test/dgemm_thread_safety.cpp -index 1c52875..104c64f 100644 ---- a/cpp_thread_test/dgemm_thread_safety.cpp -+++ b/cpp_thread_test/dgemm_thread_safety.cpp -@@ -46,6 +46,8 @@ int main(int argc, char* argv[]){ - std::cout<<"Number of concurrent calls into OpenBLAS : "<(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast(1024*1024)<<" MiB of RAM\n"< 4){ - std::cout<<"ERROR: too many arguments for thread safety tester"<(randomMatSize*randomMatSize)*numConcurrentThreads*8)+(static_cast(randomMatSize)*numConcurrentThreads*8*2))/static_cast(1024*1024)<<" MiB of RAM\n"<= 4) - GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) -+GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5) - GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) - GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7) - GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) -+GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2) - GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) - endif - -@@ -570,20 +572,27 @@ ifeq ($(ARCH), zarch) - DYNAMIC_CORE = ZARCH_GENERIC - - # Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer --GCC_GE_52 := $(subst 0,,$(shell expr `$(CC) -dumpversion` \>= "5.2")) -+ifeq ($(GCCVERSIONGT5), 1) -+ ZARCH_SUPPORT_Z13 := 1 -+else ifeq ($(GCCVERSIONEQ5), 1) -+ifeq ($(GCCMINORVERSIONGTEQ2), 1) -+ ZARCH_SUPPORT_Z13 := 1 -+endif -+endif - - ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release) --RHEL_WITH_Z13 := $(subst 0,,$(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3")) -+ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1) -+ ZARCH_SUPPORT_Z13 := 1 -+endif - endif - --ifeq ($(or $(GCC_GE_52),$(RHEL_WITH_Z13)), 1) -+ifeq ($(ZARCH_SUPPORT_Z13), 1) - DYNAMIC_CORE += Z13 - else - $(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x) - endif - --GCC_MAJOR_GE_7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7) --ifeq ($(GCC_MAJOR_GE_7), 1) -+ifeq ($(GCCVERSIONGTEQ7), 1) - DYNAMIC_CORE += Z14 - else - $(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x) -@@ -597,7 +606,6 @@ ifneq ($(C_COMPILER), GCC) - DYNAMIC_CORE += POWER9 - endif - ifeq ($(C_COMPILER), GCC) --GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) - ifeq ($(GCCVERSIONGT5), 1) - DYNAMIC_CORE += POWER9 - else diff --git a/SOURCES/openblas-0.3.10-asmflags.patch b/SOURCES/openblas-0.3.12-asmflags.patch similarity index 97% rename from SOURCES/openblas-0.3.10-asmflags.patch rename to SOURCES/openblas-0.3.12-asmflags.patch index 16dd8db..1f767a7 100644 --- a/SOURCES/openblas-0.3.10-asmflags.patch +++ b/SOURCES/openblas-0.3.12-asmflags.patch @@ -1,8 +1,8 @@ diff --git a/kernel/Makefile b/kernel/Makefile -index 9b468a6..87df5fa 100644 +index e52781c6..c925837a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile -@@ -113,25 +113,32 @@ endif +@@ -151,25 +151,32 @@ endif all : libs scabs1.$(SUFFIX): $(KERNELDIR)/$(SCABS_KERNEL) @@ -42,7 +42,7 @@ index 9b468a6..87df5fa 100644 endif setparam$(TSUFFIX).c : setparam-ref.c -@@ -142,25 +149,32 @@ kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) +@@ -180,25 +187,32 @@ kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S @@ -83,10 +83,10 @@ index 9b468a6..87df5fa 100644 #ifdef DYNAMIC_ARCH clean :: diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 -index 9707032..7835f0d 100644 +index 7ad94118..3487a9e3 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 -@@ -522,339 +522,443 @@ XBLASOBJS += \ +@@ -545,217 +545,282 @@ endif $(KDIR)samax_k$(TSUFFIX).$(SUFFIX) $(KDIR)samax_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAMAXKERNEL) @@ -432,6 +432,11 @@ index 9707032..7835f0d 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UCOMPLEX -DXDOUBLE $< -o $@ + ifeq ($(BUILD_BFLOAT16),1) + $(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL) +@@ -771,126 +836,165 @@ $(KDIR)dbf16tod_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(BF16TOKERNEL) + endif + $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -635,12 +640,12 @@ index 9707032..7835f0d 100644 diff --git a/kernel/Makefile.L2 b/kernel/Makefile.L2 -index 2aeb8f0..e7c49d2 100644 +index 79399c34..6d605f82 100644 --- a/kernel/Makefile.L2 +++ b/kernel/Makefile.L2 -@@ -220,209 +220,278 @@ XBLASOBJS += \ - xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) +@@ -236,251 +236,320 @@ XBLASOBJS += \ + ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" $(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) - $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -650,7 +655,9 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UDOUBLE -UCOMPLEX -DTRANS $< -o $@ + endif + ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" $(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) - $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -660,6 +667,7 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DDOUBLE -UCOMPLEX -DTRANS $< -o $@ + endif $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL) - $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@ @@ -671,6 +679,8 @@ index 2aeb8f0..e7c49d2 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@ + + ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" $(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) - $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -710,6 +720,10 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ + endif + + + ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) - $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ @@ -750,6 +764,7 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ + endif $(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL) - $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ @@ -791,6 +806,9 @@ index 2aeb8f0..e7c49d2 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ + + ifeq ($(BUILD_SINGLE),1) + $(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM) - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -800,6 +818,10 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UCOMPLEX -UDOUBLE -DLOWER $< -o $@ + endif + + + ifeq ($(BUILD_DOUBLE),1) $(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM) - $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@ @@ -810,6 +832,7 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UCOMPLEX -DDOUBLE -DLOWER $< -o $@ + endif $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@ @@ -821,6 +844,8 @@ index 2aeb8f0..e7c49d2 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@ + ifeq ($(BUILD_COMPLEX),1) + $(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM) - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -830,6 +855,9 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -UDOUBLE -DLOWER $< -o $@ + endif + + ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM) - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@ @@ -840,6 +868,7 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -DDOUBLE -DLOWER $< -o $@ + endif $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL) - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@ @@ -851,21 +880,29 @@ index 2aeb8f0..e7c49d2 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@ + ifeq ($(BUILD_SINGLE),1) + $(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM) - $(CC) -c $(CFLAGS) -UDOUBLE $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UDOUBLE $< -o $@ + endif + + ifeq ($(BUILD_DOUBLE),1) $(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM) - $(CC) -c $(CFLAGS) -DDOUBLE $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DDOUBLE $< -o $@ + endif $(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM) - $(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DXDOUBLE $< -o $@ + ifeq ($(BUILD_COMPLEX),1) + $(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM) - $(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -885,6 +922,9 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -UDOUBLE -DCONJ -DXCONJ $< -o $@ + endif + + ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM) - $(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@ @@ -905,6 +945,7 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DDOUBLE -DCONJ -DXCONJ $< -o $@ + endif $(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM) - $(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@ @@ -926,6 +967,8 @@ index 2aeb8f0..e7c49d2 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DXDOUBLE -DCONJ -DXCONJ $< -o $@ + ifeq ($(BUILD_COMPLEX),1) + $(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM) - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -945,6 +988,9 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ + endif + + ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM) - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@ @@ -965,7 +1011,7 @@ index 2aeb8f0..e7c49d2 100644 - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ - + endif $(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL) - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -986,17 +1032,18 @@ index 2aeb8f0..e7c49d2 100644 + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ + diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 -index da6c5fd..c3f92fb 100644 +index 2ba593c2..110674b2 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 -@@ -449,119 +449,150 @@ XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) +@@ -526,119 +526,150 @@ XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) - ifeq ($(BUILD_HALF),1) - $(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + ifeq ($(BUILD_BFLOAT16),1) + $(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif $(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) @@ -1030,51 +1077,55 @@ index da6c5fd..c3f92fb 100644 + $(CC) $$CFLAGS -c -DXDOUBLE -DCOMPLEX $< -o $@ - ifeq ($(BUILD_HALF), 1) + ifeq ($(BUILD_BFLOAT16), 1) - $(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY) -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - $(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY) + $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s +- $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s +- m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@ +- rm sbgemmotcopy.s sbgemmotcopy_nomacros.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s - m4 shgemmotcopy.s > shgemmotcopy_nomacros.s -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@ ++ $(CC) $$CFLAGS -E -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s ++ m4 shgemmotcopy.s > shgemmotcopy_nomacros.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@ - rm shgemmotcopy.s shgemmotcopy_nomacros.s ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@ ++ rm shgemmotcopy.s shgemmotcopy_nomacros.s else -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif - ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) + ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) - $(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY) -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - $(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY) + $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s +- $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s +- m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@ +- rm sbgemmitcopy.s sbgemmitcopy_nomacros.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s - m4 shgemmitcopy.s > shgemmitcopy_nomacros.s -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@ ++ $(CC) $$CFLAGS -E -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s ++ m4 shgemmitcopy.s > shgemmitcopy_nomacros.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@ - rm shgemmitcopy.s shgemmitcopy_nomacros.s ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@ ++ rm shgemmitcopy.s shgemmitcopy_nomacros.s else -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif endif @@ -1087,9 +1138,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s +- $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s ++ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s m4 sgemmotcopy.s > sgemmotcopy_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1111,9 +1162,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s +- $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s ++ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s m4 sgemmitcopy.s > sgemmitcopy_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1129,9 +1180,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s +- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1157,9 +1208,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s +- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1172,7 +1223,7 @@ index da6c5fd..c3f92fb 100644 endif endif -@@ -569,65 +600,81 @@ endif +@@ -646,65 +677,81 @@ endif ifdef EXPRECISION $(KDIR)$(QGEMMONCOPYOBJ) : $(KERNELDIR)/$(QGEMMONCOPY) @@ -1220,9 +1271,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s +- $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s ++ $(CC) $$CFLAGS -UDOUBLE -UCOMPLEX -E $< -o - > cgemm_itcopy.s m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1255,9 +1306,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s +- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1270,7 +1321,7 @@ index da6c5fd..c3f92fb 100644 endif endif -@@ -635,18 +682,22 @@ endif +@@ -712,18 +759,22 @@ endif ifdef EXPRECISION $(KDIR)$(XGEMMONCOPYOBJ) : $(KERNELDIR)/$(XGEMMONCOPY) @@ -1297,13 +1348,13 @@ index da6c5fd..c3f92fb 100644 endif -@@ -654,1780 +705,2344 @@ endif +@@ -731,12 +782,15 @@ endif $(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s +- $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s ++ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1315,30 +1366,33 @@ index da6c5fd..c3f92fb 100644 + $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ endif - ifeq ($(BUILD_HALF), 1) + ifdef USE_DIRECT_SGEMM +@@ -750,1768 +804,2329 @@ ifeq ($(BUILD_BFLOAT16), 1) - $(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) + $(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s +- $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s +- m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@ +- rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s - m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@ ++ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel$(TSUFFIX).s ++ m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ + $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@ - rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s ++ rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s else -- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ +- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif endif $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s +- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s ++ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1357,9 +1411,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s +- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1373,9 +1427,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s +- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1389,9 +1443,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s +- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1405,9 +1459,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s +- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s ++ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1421,9 +1475,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s +- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1437,9 +1491,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s +- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1453,9 +1507,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s +- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1469,9 +1523,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s +- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s ++ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1507,9 +1561,9 @@ index da6c5fd..c3f92fb 100644 ifdef USE_TRMM $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1523,9 +1577,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1539,9 +1593,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1555,9 +1609,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1571,9 +1625,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1587,9 +1641,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1603,9 +1657,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1619,9 +1673,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1655,9 +1709,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1671,9 +1725,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1687,9 +1741,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1703,9 +1757,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1719,9 +1773,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1735,9 +1789,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1751,9 +1805,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1767,9 +1821,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1783,9 +1837,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1799,9 +1853,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1815,9 +1869,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1831,9 +1885,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1847,9 +1901,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1863,9 +1917,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1879,9 +1933,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1895,9 +1949,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -1927,9 +1981,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -2147,9 +2201,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s +- $(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s ++ $(CC) $$CFLAGS -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ @@ -3998,11 +4052,11 @@ index da6c5fd..c3f92fb 100644 + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ + $(CC) $$PFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@ - ifeq ($(BUILD_HALF),1) - $(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) -- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + ifeq ($(BUILD_BFLOAT16),1) + $(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) +- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif $(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) @@ -4031,27 +4085,27 @@ index da6c5fd..c3f92fb 100644 + $(CC) $$PFLAGS -c -DXDOUBLE -DCOMPLEX $< -o $@ - ifeq ($(BUILD_HALF), 1) - $(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY) -- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + ifeq ($(BUILD_BFLOAT16), 1) + $(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY) +- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - $(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY) -- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + $(SBGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMOTCOPY) +- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) - $(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY) -- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) + $(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY) +- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - $(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY) -- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ + $(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY) +- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ ++ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif endif @@ -4080,7 +4134,7 @@ index da6c5fd..c3f92fb 100644 endif - $(D cgemm_kernel_r.s + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ -+ $(CC) $$PFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s ++ $(CC) $$PFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ @@ -4318,9 +4372,9 @@ index da6c5fd..c3f92fb 100644 $(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) ifeq ($(OS), AIX) -- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s +- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s + [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \ -+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s ++ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \ @@ -6370,7 +6424,7 @@ index da6c5fd..c3f92fb 100644 ##### BLAS extensions ###### -@@ -3740,112 +4786,128 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c +@@ -3824,112 +4870,128 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c endif $(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN) @@ -6515,7 +6569,7 @@ index da6c5fd..c3f92fb 100644 ifndef COMATCOPY_CN -@@ -3853,112 +4915,128 @@ COMATCOPY_CN = ../arm/zomatcopy_cn.c +@@ -3937,112 +4999,128 @@ COMATCOPY_CN = ../arm/zomatcopy_cn.c endif $(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN) @@ -6660,7 +6714,7 @@ index da6c5fd..c3f92fb 100644 -@@ -3967,112 +5045,128 @@ ZOMATCOPY_CN = ../arm/zomatcopy_cn.c +@@ -4051,112 +5129,128 @@ ZOMATCOPY_CN = ../arm/zomatcopy_cn.c endif $(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN) @@ -6805,7 +6859,7 @@ index da6c5fd..c3f92fb 100644 ifndef SGEADD_K -@@ -4080,26 +5174,30 @@ SGEADD_K = ../generic/geadd.c +@@ -4164,26 +5258,30 @@ SGEADD_K = ../generic/geadd.c endif $(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) @@ -6841,7 +6895,7 @@ index da6c5fd..c3f92fb 100644 + $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@ diff --git a/kernel/Makefile.LA b/kernel/Makefile.LA -index 8834271..d9c6739 100644 +index 88342718..d9c67390 100644 --- a/kernel/Makefile.LA +++ b/kernel/Makefile.LA @@ -14,38 +14,50 @@ XBLASOBJS += xneg_tcopy$(TSUFFIX).$(SUFFIX) xlaswp_ncopy$(TSUFFIX).$(SUFFIX) diff --git a/SOURCES/openblas-0.3.10-libname.patch b/SOURCES/openblas-0.3.12-libname.patch similarity index 92% rename from SOURCES/openblas-0.3.10-libname.patch rename to SOURCES/openblas-0.3.12-libname.patch index 5787ab6..d75de48 100644 --- a/SOURCES/openblas-0.3.10-libname.patch +++ b/SOURCES/openblas-0.3.12-libname.patch @@ -1,8 +1,8 @@ diff --git a/Makefile.system b/Makefile.system -index 47d8eec..c947a19 100644 +index 30d8f4cc..803219a8 100644 --- a/Makefile.system +++ b/Makefile.system -@@ -1331,16 +1331,16 @@ ifndef SMP +@@ -1425,16 +1425,16 @@ ifndef SMP LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX) LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX) else diff --git a/SOURCES/openblas-0.3.10-noopt.patch b/SOURCES/openblas-0.3.12-noopt.patch similarity index 85% rename from SOURCES/openblas-0.3.10-noopt.patch rename to SOURCES/openblas-0.3.12-noopt.patch index f331b8c..499571b 100644 --- a/SOURCES/openblas-0.3.10-noopt.patch +++ b/SOURCES/openblas-0.3.12-noopt.patch @@ -1,5 +1,5 @@ diff --git a/lapack-netlib/INSTALL/Makefile b/lapack-netlib/INSTALL/Makefile -index 1007c1b..348b8d6 100644 +index 1007c1bc..348b8d60 100644 --- a/lapack-netlib/INSTALL/Makefile +++ b/lapack-netlib/INSTALL/Makefile @@ -46,5 +46,5 @@ cleanexe: @@ -11,10 +11,10 @@ index 1007c1b..348b8d6 100644 +slamch.o: slamch.f ; $(FC) $(FFLAGS) -c -o $@ $< +dlamch.o: dlamch.f ; $(FC) $(FFLAGS) -c -o $@ $< diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile -index 9f79e20..cf47a02 100644 +index 83baac87..bc42e82d 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile -@@ -613,9 +613,9 @@ cleanobj: +@@ -643,9 +643,9 @@ cleanobj: cleanlib: rm -f $(LAPACKLIB) @@ -31,14 +31,17 @@ index 9f79e20..cf47a02 100644 +cla_wwaddw.o: cla_wwaddw.f ; $(FC) $(FFLAGS) -c -o $@ $< +zla_wwaddw.o: zla_wwaddw.f ; $(FC) $(FFLAGS) -c -o $@ $< diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile -index 87432fd..bd484e6 100644 +index e21ebd6c..8582e944 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile -@@ -97,5 +97,5 @@ cleanobj: - cleanlib: +@@ -110,8 +110,8 @@ cleanlib: rm -f $(TMGLIB) + ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) -slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< --dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +slaran.o: slaran.f ; $(FC) $(FFLAGS) -c -o $@ $< + endif + ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) +-dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +dlaran.o: dlaran.f ; $(FC) $(FFLAGS) -c -o $@ $< + endif diff --git a/SOURCES/openblas-0.3.10-power-optimize.patch b/SOURCES/openblas-0.3.12-power-optimize.patch similarity index 67% rename from SOURCES/openblas-0.3.10-power-optimize.patch rename to SOURCES/openblas-0.3.12-power-optimize.patch index b2d9766..5d82b6f 100644 --- a/SOURCES/openblas-0.3.10-power-optimize.patch +++ b/SOURCES/openblas-0.3.12-power-optimize.patch @@ -1,5 +1,5 @@ diff --git a/kernel/power/drot.c b/kernel/power/drot.c -index baeb542..daeae9f 100644 +index 951c2f9c..98788c0f 100644 --- a/kernel/power/drot.c +++ b/kernel/power/drot.c @@ -37,8 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -8,11 +8,11 @@ index baeb542..daeae9f 100644 -#pragma GCC optimize "O1" - - #if defined(POWER8) || defined(POWER9) + #if defined(POWER8) || defined(POWER9) || defined(POWER10) + #if defined(__VEC__) || defined(__ALTIVEC__) #include "drot_microk_power8.c" - #endif diff --git a/kernel/power/srot.c b/kernel/power/srot.c -index 6af813c..0a172d5 100644 +index a53342f6..73b477d9 100644 --- a/kernel/power/srot.c +++ b/kernel/power/srot.c @@ -37,8 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -21,11 +21,11 @@ index 6af813c..0a172d5 100644 -#pragma GCC optimize "O1" - - #if defined(POWER8) || defined(POWER9) + #if defined(POWER8) || defined(POWER9) || defined(POWER10) + #if defined(__VEC__) || defined(__ALTIVEC__) #include "srot_microk_power8.c" - #endif diff --git a/kernel/power/zscal.c b/kernel/power/zscal.c -index a1b441d..3c80ba0 100644 +index 5526f4d6..3c2efbfa 100644 --- a/kernel/power/zscal.c +++ b/kernel/power/zscal.c @@ -36,8 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -34,6 +34,6 @@ index a1b441d..3c80ba0 100644 -#pragma GCC optimize "O1" - - #if defined(POWER8) || defined(POWER9) + #if defined(POWER8) || defined(POWER9) || defined(POWER10) + #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(DOUBLE) - #include "zscal_microk_power8.c" diff --git a/SOURCES/openblas-0.3.10-system-lapack.patch b/SOURCES/openblas-0.3.12-system-lapack.patch similarity index 100% rename from SOURCES/openblas-0.3.10-system-lapack.patch rename to SOURCES/openblas-0.3.12-system-lapack.patch diff --git a/SOURCES/openblas-0.3.10-tests.patch b/SOURCES/openblas-0.3.12-tests.patch similarity index 89% rename from SOURCES/openblas-0.3.10-tests.patch rename to SOURCES/openblas-0.3.12-tests.patch index 22ed290..0bd9864 100644 --- a/SOURCES/openblas-0.3.10-tests.patch +++ b/SOURCES/openblas-0.3.12-tests.patch @@ -1,8 +1,8 @@ diff --git a/Makefile b/Makefile -index ae8f7de..61f325c 100644 +index a9af62a2..2f0f11c0 100644 --- a/Makefile +++ b/Makefile -@@ -133,13 +133,13 @@ tests : +@@ -138,13 +138,13 @@ tests : ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) touch $(LIBNAME) ifndef NO_FBLAS @@ -11,7 +11,7 @@ index ae8f7de..61f325c 100644 endif - $(MAKE) -C utest all + $(MAKE) -C utest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all - ifndef NO_CBLAS + ifneq ($(NO_CBLAS), 1) - $(MAKE) -C ctest all + $(MAKE) -C ctest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all ifeq ($(CPP_THREAD_SAFETY_TEST), 1) diff --git a/SPECS/openblas.spec b/SPECS/openblas.spec index 73e99b8..d1d590f 100644 --- a/SPECS/openblas.spec +++ b/SPECS/openblas.spec @@ -14,31 +14,25 @@ # "obsoleted" features are still kept in the spec. Name: openblas -Version: 0.3.10 -Release: 2%{?dist} +Version: 0.3.12 +Release: 1%{?dist} Summary: An optimized BLAS library based on GotoBLAS2 Group: Development/Libraries License: BSD URL: https://github.com/xianyi/OpenBLAS/ Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz # Use system lapack -Patch0: openblas-0.3.10-system-lapack.patch +Patch0: openblas-0.3.12-system-lapack.patch # Drop extra p from threaded library name -Patch1: openblas-0.3.10-libname.patch +Patch1: openblas-0.3.12-libname.patch # Supply the proper flags to the test makefile -Patch2: openblas-0.3.10-tests.patch +Patch2: openblas-0.3.12-tests.patch # Enable optimizations for all LAPACK sources -Patch3: openblas-0.3.10-noopt.patch +Patch3: openblas-0.3.12-noopt.patch # Pass ASMFLAGS to assembler compiler -Patch4: openblas-0.3.10-asmflags.patch +Patch4: openblas-0.3.12-asmflags.patch # Remove optimization pragmas on ppc64le -Patch5: openblas-0.3.10-power-optimize.patch -# https://github.com/xianyi/OpenBLAS/pull/2669 -Patch6: openblas-0.3.10-zarch-gcc-version-detection.patch -# https://github.com/xianyi/OpenBLAS/pull/2672 -Patch7: openblas-0.3.10-concurrency.patch -# https://github.com/Reference-LAPACK/lapack/pull/458 -Patch8: openblas-0.3.10-lapacke-macro.patch +Patch5: openblas-0.3.12-power-optimize.patch BuildRequires: gcc BuildRequires: gcc-gfortran @@ -239,9 +233,6 @@ cd OpenBLAS-%{version} %patch3 -p1 -b .noopt %patch4 -p1 -b .asmflags %patch5 -p1 -b .power-optimize -%patch6 -p1 -b .zarch-gcc-version-detection -%patch7 -p1 -b .concurrency -%patch8 -p1 -b .lapacke-macro # Fix source permissions find -name \*.f -exec chmod 644 {} \; @@ -684,6 +675,10 @@ rm -rf %{buildroot}%{_libdir}/pkgconfig %endif %changelog +* Wed Oct 28 2020 Nikola Forró - 0.3.12-1 +- Rebase to version 0.3.12 + related: #1847435 + * Wed Oct 21 2020 Nikola Forró - 0.3.10-2 - Fix macro used in LAPACKE_zgesvdq related: #1847435