import openblas-0.3.12-1.el8

This commit is contained in:
CentOS Sources 2020-10-30 06:11:18 +00:00 committed by Andrew Lukoshko
parent 5a735ea468
commit 283fe7f8c7
12 changed files with 262 additions and 334 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/v0.3.10.tar.gz
SOURCES/v0.3.12.tar.gz

View File

@ -1 +1 @@
cbe3fdd0e6ee235debc611d76976dac62f3ddc1c SOURCES/v0.3.10.tar.gz
ae647fed597ae891a7f122b9ddc6b15d4b7e0656 SOURCES/v0.3.12.tar.gz

View File

@ -1,54 +0,0 @@
diff --git a/cpp_thread_test/cpp_thread_safety_common.h b/cpp_thread_test/cpp_thread_safety_common.h
index 60ab5bb..8005369 100644
--- a/cpp_thread_test/cpp_thread_safety_common.h
+++ b/cpp_thread_test/cpp_thread_safety_common.h
@@ -5,6 +5,14 @@ inline void pauser(){
std::getline(std::cin, dummy);
}
+void FailIfThreadsAreZero(uint32_t numConcurrentThreads) {
+ if(numConcurrentThreads == 0) {
+ std::cout<<"ERROR: Invalid parameter 0 for number of concurrent calls into OpenBLAS!"<<std::endl;
+ std::cout<<"CBLAS DGEMV thread safety test FAILED!"<<std::endl;
+ exit(-1);
+ }
+}
+
void FillMatrices(std::vector<std::vector<double>>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution<double>& rngdist, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numMat){
for(uint32_t i=0; i<numMat; i++){
for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize*randomMatSize); j++){
diff --git a/cpp_thread_test/dgemm_thread_safety.cpp b/cpp_thread_test/dgemm_thread_safety.cpp
index 1c52875..104c64f 100644
--- a/cpp_thread_test/dgemm_thread_safety.cpp
+++ b/cpp_thread_test/dgemm_thread_safety.cpp
@@ -46,6 +46,8 @@ int main(int argc, char* argv[]){
std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
std::cout<<"This test will need "<<(static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
+
+ FailIfThreadsAreZero(numConcurrentThreads);
std::cout<<"Initializing random number generator..."<<std::flush;
std::mt19937_64 PRNG = InitPRNG();
diff --git a/cpp_thread_test/dgemv_thread_safety.cpp b/cpp_thread_test/dgemv_thread_safety.cpp
index 5411fec..20ea381 100644
--- a/cpp_thread_test/dgemv_thread_safety.cpp
+++ b/cpp_thread_test/dgemv_thread_safety.cpp
@@ -18,7 +18,7 @@ int main(int argc, char* argv[]){
uint32_t maxHwThreads = omp_get_max_threads();
if (maxHwThreads < 52)
- numConcurrentThreads = maxHwThreads -4;
+ numConcurrentThreads = maxHwThreads;
if (argc > 4){
std::cout<<"ERROR: too many arguments for thread safety tester"<<std::endl;
@@ -47,6 +47,8 @@ int main(int argc, char* argv[]){
std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
std::cout<<"This test will need "<<((static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*8)+(static_cast<uint64_t>(randomMatSize)*numConcurrentThreads*8*2))/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
+
+ FailIfThreadsAreZero(numConcurrentThreads);
std::cout<<"Initializing random number generator..."<<std::flush;
std::mt19937_64 PRNG = InitPRNG();

View File

@ -1,13 +0,0 @@
diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
index f58a5c4..4928b1b 100644
--- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
@@ -71,7 +71,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp,
goto exit_level_0;
}
liwork = iwork_query;
- lcwork = LAPACK_C2INT(cwork_query);
+ lcwork = LAPACK_Z2INT(cwork_query);
lrwork = (lapack_int)rwork_query;
/* Allocate memory for work arrays */
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );

View File

@ -1,57 +0,0 @@
diff --git a/Makefile.system b/Makefile.system
index c947a19..cbf419a 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -282,9 +282,11 @@ endif
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
+GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
+GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
endif
@@ -570,20 +572,27 @@ ifeq ($(ARCH), zarch)
DYNAMIC_CORE = ZARCH_GENERIC
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
-GCC_GE_52 := $(subst 0,,$(shell expr `$(CC) -dumpversion` \>= "5.2"))
+ifeq ($(GCCVERSIONGT5), 1)
+ ZARCH_SUPPORT_Z13 := 1
+else ifeq ($(GCCVERSIONEQ5), 1)
+ifeq ($(GCCMINORVERSIONGTEQ2), 1)
+ ZARCH_SUPPORT_Z13 := 1
+endif
+endif
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
-RHEL_WITH_Z13 := $(subst 0,,$(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"))
+ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1)
+ ZARCH_SUPPORT_Z13 := 1
+endif
endif
-ifeq ($(or $(GCC_GE_52),$(RHEL_WITH_Z13)), 1)
+ifeq ($(ZARCH_SUPPORT_Z13), 1)
DYNAMIC_CORE += Z13
else
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
endif
-GCC_MAJOR_GE_7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
-ifeq ($(GCC_MAJOR_GE_7), 1)
+ifeq ($(GCCVERSIONGTEQ7), 1)
DYNAMIC_CORE += Z14
else
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
@@ -597,7 +606,6 @@ ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9
endif
ifeq ($(C_COMPILER), GCC)
-GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
ifeq ($(GCCVERSIONGT5), 1)
DYNAMIC_CORE += POWER9
else

View File

@ -1,8 +1,8 @@
diff --git a/kernel/Makefile b/kernel/Makefile
index 9b468a6..87df5fa 100644
index e52781c6..c925837a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -113,25 +113,32 @@ endif
@@ -151,25 +151,32 @@ endif
all : libs
scabs1.$(SUFFIX): $(KERNELDIR)/$(SCABS_KERNEL)
@ -42,7 +42,7 @@ index 9b468a6..87df5fa 100644
endif
setparam$(TSUFFIX).c : setparam-ref.c
@@ -142,25 +149,32 @@ kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
@@ -180,25 +187,32 @@ kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
@ -83,10 +83,10 @@ index 9b468a6..87df5fa 100644
#ifdef DYNAMIC_ARCH
clean ::
diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1
index 9707032..7835f0d 100644
index 7ad94118..3487a9e3 100644
--- a/kernel/Makefile.L1
+++ b/kernel/Makefile.L1
@@ -522,339 +522,443 @@ XBLASOBJS += \
@@ -545,217 +545,282 @@ endif
$(KDIR)samax_k$(TSUFFIX).$(SUFFIX) $(KDIR)samax_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAMAXKERNEL)
@ -432,6 +432,11 @@ index 9707032..7835f0d 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UCOMPLEX -DXDOUBLE $< -o $@
ifeq ($(BUILD_BFLOAT16),1)
$(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL)
@@ -771,126 +836,165 @@ $(KDIR)dbf16tod_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(BF16TOKERNEL)
endif
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
- $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -635,12 +640,12 @@ index 9707032..7835f0d 100644
diff --git a/kernel/Makefile.L2 b/kernel/Makefile.L2
index 2aeb8f0..e7c49d2 100644
index 79399c34..6d605f82 100644
--- a/kernel/Makefile.L2
+++ b/kernel/Makefile.L2
@@ -220,209 +220,278 @@ XBLASOBJS += \
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)
@@ -236,251 +236,320 @@ XBLASOBJS += \
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
- $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -650,7 +655,9 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UDOUBLE -UCOMPLEX -DTRANS $< -o $@
endif
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
$(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
- $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -660,6 +667,7 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DDOUBLE -UCOMPLEX -DTRANS $< -o $@
endif
$(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL)
- $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@
@ -671,6 +679,8 @@ index 2aeb8f0..e7c49d2 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
$(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
- $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -710,6 +720,10 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
- $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@ -750,6 +764,7 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
endif
$(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL)
- $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@ -791,6 +806,9 @@ index 2aeb8f0..e7c49d2 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
ifeq ($(BUILD_SINGLE),1)
$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM)
- $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -800,6 +818,10 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UCOMPLEX -UDOUBLE -DLOWER $< -o $@
endif
ifeq ($(BUILD_DOUBLE),1)
$(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM)
- $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@
@ -810,6 +832,7 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UCOMPLEX -DDOUBLE -DLOWER $< -o $@
endif
$(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL)
- $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@
@ -821,6 +844,8 @@ index 2aeb8f0..e7c49d2 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@
ifeq ($(BUILD_COMPLEX),1)
$(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM)
- $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -830,6 +855,9 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DCOMPLEX -UDOUBLE -DLOWER $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM)
- $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@
@ -840,6 +868,7 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DCOMPLEX -DDOUBLE -DLOWER $< -o $@
endif
$(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL)
- $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@
@ -851,21 +880,29 @@ index 2aeb8f0..e7c49d2 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@
ifeq ($(BUILD_SINGLE),1)
$(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM)
- $(CC) -c $(CFLAGS) -UDOUBLE $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UDOUBLE $< -o $@
endif
ifeq ($(BUILD_DOUBLE),1)
$(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM)
- $(CC) -c $(CFLAGS) -DDOUBLE $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DDOUBLE $< -o $@
endif
$(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM)
- $(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DXDOUBLE $< -o $@
ifeq ($(BUILD_COMPLEX),1)
$(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM)
- $(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -885,6 +922,9 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -UDOUBLE -DCONJ -DXCONJ $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM)
- $(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@
@ -905,6 +945,7 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DDOUBLE -DCONJ -DXCONJ $< -o $@
endif
$(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM)
- $(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@
@ -926,6 +967,8 @@ index 2aeb8f0..e7c49d2 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DXDOUBLE -DCONJ -DXCONJ $< -o $@
ifeq ($(BUILD_COMPLEX),1)
$(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM)
- $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -945,6 +988,9 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM)
- $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@
@ -965,7 +1011,7 @@ index 2aeb8f0..e7c49d2 100644
- $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
endif
$(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL)
- $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -986,17 +1032,18 @@ index 2aeb8f0..e7c49d2 100644
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) -c $$CFLAGS -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
index da6c5fd..c3f92fb 100644
index 2ba593c2..110674b2 100644
--- a/kernel/Makefile.L3
+++ b/kernel/Makefile.L3
@@ -449,119 +449,150 @@ XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
@@ -526,119 +526,150 @@ XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
ifeq ($(BUILD_HALF),1)
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
ifeq ($(BUILD_BFLOAT16),1)
$(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA)
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
@ -1030,51 +1077,55 @@ index da6c5fd..c3f92fb 100644
+ $(CC) $$CFLAGS -c -DXDOUBLE -DCOMPLEX $< -o $@
ifeq ($(BUILD_HALF), 1)
ifeq ($(BUILD_BFLOAT16), 1)
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY)
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s
- $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s
- m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@
- rm sbgemmotcopy.s sbgemmotcopy_nomacros.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@
+ $(CC) $$CFLAGS -E -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s
+ m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@
rm shgemmotcopy.s shgemmotcopy_nomacros.s
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@
+ rm shgemmotcopy.s shgemmotcopy_nomacros.s
else
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
$(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY)
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY)
$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s
- $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s
- m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@
- rm sbgemmitcopy.s sbgemmitcopy_nomacros.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s
m4 shgemmitcopy.s > shgemmitcopy_nomacros.s
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@
+ $(CC) $$CFLAGS -E -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s
+ m4 shgemmitcopy.s > shgemmitcopy_nomacros.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@
rm shgemmitcopy.s shgemmitcopy_nomacros.s
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@
+ rm shgemmitcopy.s shgemmitcopy_nomacros.s
else
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
endif
@ -1087,9 +1138,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s
- $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s
+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s
m4 sgemmotcopy.s > sgemmotcopy_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1111,9 +1162,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s
- $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s
+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s
m4 sgemmitcopy.s > sgemmitcopy_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1129,9 +1180,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s
- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s
m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1157,9 +1208,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s
- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s
m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1172,7 +1223,7 @@ index da6c5fd..c3f92fb 100644
endif
endif
@@ -569,65 +600,81 @@ endif
@@ -646,65 +677,81 @@ endif
ifdef EXPRECISION
$(KDIR)$(QGEMMONCOPYOBJ) : $(KERNELDIR)/$(QGEMMONCOPY)
@ -1220,9 +1271,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s
- $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s
+ $(CC) $$CFLAGS -UDOUBLE -UCOMPLEX -E $< -o - > cgemm_itcopy.s
m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1255,9 +1306,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s
- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s
m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1270,7 +1321,7 @@ index da6c5fd..c3f92fb 100644
endif
endif
@@ -635,18 +682,22 @@ endif
@@ -712,18 +759,22 @@ endif
ifdef EXPRECISION
$(KDIR)$(XGEMMONCOPYOBJ) : $(KERNELDIR)/$(XGEMMONCOPY)
@ -1297,13 +1348,13 @@ index da6c5fd..c3f92fb 100644
endif
@@ -654,1780 +705,2344 @@ endif
@@ -731,12 +782,15 @@ endif
$(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s
- $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s
+ $(CC) $$CFLAGS -E -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s
m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1315,30 +1366,33 @@ index da6c5fd..c3f92fb 100644
+ $(CC) $$CFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@
endif
ifeq ($(BUILD_HALF), 1)
ifdef USE_DIRECT_SGEMM
@@ -750,1768 +804,2329 @@ ifeq ($(BUILD_BFLOAT16), 1)
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
$(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s
- $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s
- m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@
- rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s
m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@
+ $(CC) $$CFLAGS -E -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel$(TSUFFIX).s
+ m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@
rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s
+ rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s
else
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
- $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$CFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
endif
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s
- $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s
+ $(CC) $$CFLAGS -E -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s
m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1357,9 +1411,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s
- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s
m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1373,9 +1427,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s
- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s
m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1389,9 +1443,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s
- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1405,9 +1459,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s
- $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s
+ $(CC) $$CFLAGS -E -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s
m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s
- $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1421,9 +1475,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s
- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s
m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1437,9 +1491,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s
- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s
m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1453,9 +1507,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s
- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s
m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1469,9 +1523,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s
- $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s
+ $(CC) $$CFLAGS -E -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s
m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
- $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1507,9 +1561,9 @@ index da6c5fd..c3f92fb 100644
ifdef USE_TRMM
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s
m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1523,9 +1577,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s
m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1539,9 +1593,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s
m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1555,9 +1609,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1571,9 +1625,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s
m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1587,9 +1641,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s
m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1603,9 +1657,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s
m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1619,9 +1673,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s
m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1655,9 +1709,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s
m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1671,9 +1725,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s
m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1687,9 +1741,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s
m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1703,9 +1757,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s
m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1719,9 +1773,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s
m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1735,9 +1789,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s
m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1751,9 +1805,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s
m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1767,9 +1821,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s
m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1783,9 +1837,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s
m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1799,9 +1853,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s
m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1815,9 +1869,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s
m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1831,9 +1885,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s
m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1847,9 +1901,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s
m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1863,9 +1917,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s
m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1879,9 +1933,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s
m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1895,9 +1949,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s
m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -1927,9 +1981,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
- $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -2147,9 +2201,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s
- $(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s
+ $(CC) $$CFLAGS -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s
m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s
- $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
@ -3998,11 +4052,11 @@ index da6c5fd..c3f92fb 100644
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -UDOUBLE -UCOMPLEX $< -o $@
ifeq ($(BUILD_HALF),1)
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
ifeq ($(BUILD_BFLOAT16),1)
$(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA)
- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
@ -4031,27 +4085,27 @@ index da6c5fd..c3f92fb 100644
+ $(CC) $$PFLAGS -c -DXDOUBLE -DCOMPLEX $< -o $@
ifeq ($(BUILD_HALF), 1)
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY)
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
ifeq ($(BUILD_BFLOAT16), 1)
$(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY)
- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
$(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY)
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
$(SBGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMOTCOPY)
- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
$(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY)
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
$(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY)
- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
$(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY)
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
$(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY)
- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
endif
@ -4080,7 +4134,7 @@ index da6c5fd..c3f92fb 100644
endif
$(D<GEMMONCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMONCOPY)
$(DGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMONCOPY)
- $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DDOUBLE -UCOMPLEX $< -o $@
@ -4206,13 +4260,13 @@ index da6c5fd..c3f92fb 100644
endif
@@ -2436,1301 +3051,1732 @@ endif
@@ -2520,1301 +3135,1732 @@ endif
ifeq ($(BUILD_HALF), 1)
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
ifeq ($(BUILD_BFLOAT16), 1)
$(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
- $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
+ $(CC) $$PFLAGS -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
@ -4242,9 +4296,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
- $(CC) $(PFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s
- $(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
+ $(CC) $$PFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s
+ $(CC) $$PFLAGS -E -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
- $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
@ -4318,9 +4372,9 @@ index da6c5fd..c3f92fb 100644
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
ifeq ($(OS), AIX)
- $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
- $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
+ [[ "$<" == *.S ]] && CFLAGS="$(CFLAGS) $(ASMFLAGS)" || CFLAGS="$(CFLAGS)"; \
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
+ $(CC) $$CFLAGS -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s
- $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
+ [[ "$<" == *.S ]] && PFLAGS="$(PFLAGS) $(ASMFLAGS)" || PFLAGS="$(PFLAGS)"; \
@ -6370,7 +6424,7 @@ index da6c5fd..c3f92fb 100644
##### BLAS extensions ######
@@ -3740,112 +4786,128 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c
@@ -3824,112 +4870,128 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c
endif
$(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN)
@ -6515,7 +6569,7 @@ index da6c5fd..c3f92fb 100644
ifndef COMATCOPY_CN
@@ -3853,112 +4915,128 @@ COMATCOPY_CN = ../arm/zomatcopy_cn.c
@@ -3937,112 +4999,128 @@ COMATCOPY_CN = ../arm/zomatcopy_cn.c
endif
$(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN)
@ -6660,7 +6714,7 @@ index da6c5fd..c3f92fb 100644
@@ -3967,112 +5045,128 @@ ZOMATCOPY_CN = ../arm/zomatcopy_cn.c
@@ -4051,112 +5129,128 @@ ZOMATCOPY_CN = ../arm/zomatcopy_cn.c
endif
$(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN)
@ -6805,7 +6859,7 @@ index da6c5fd..c3f92fb 100644
ifndef SGEADD_K
@@ -4080,26 +5174,30 @@ SGEADD_K = ../generic/geadd.c
@@ -4164,26 +5258,30 @@ SGEADD_K = ../generic/geadd.c
endif
$(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
@ -6841,7 +6895,7 @@ index da6c5fd..c3f92fb 100644
+ $(CC) $$CFLAGS -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@
diff --git a/kernel/Makefile.LA b/kernel/Makefile.LA
index 8834271..d9c6739 100644
index 88342718..d9c67390 100644
--- a/kernel/Makefile.LA
+++ b/kernel/Makefile.LA
@@ -14,38 +14,50 @@ XBLASOBJS += xneg_tcopy$(TSUFFIX).$(SUFFIX) xlaswp_ncopy$(TSUFFIX).$(SUFFIX)

View File

@ -1,8 +1,8 @@
diff --git a/Makefile.system b/Makefile.system
index 47d8eec..c947a19 100644
index 30d8f4cc..803219a8 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -1331,16 +1331,16 @@ ifndef SMP
@@ -1425,16 +1425,16 @@ ifndef SMP
LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
else

View File

@ -1,5 +1,5 @@
diff --git a/lapack-netlib/INSTALL/Makefile b/lapack-netlib/INSTALL/Makefile
index 1007c1b..348b8d6 100644
index 1007c1bc..348b8d60 100644
--- a/lapack-netlib/INSTALL/Makefile
+++ b/lapack-netlib/INSTALL/Makefile
@@ -46,5 +46,5 @@ cleanexe:
@ -11,10 +11,10 @@ index 1007c1b..348b8d6 100644
+slamch.o: slamch.f ; $(FC) $(FFLAGS) -c -o $@ $<
+dlamch.o: dlamch.f ; $(FC) $(FFLAGS) -c -o $@ $<
diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile
index 9f79e20..cf47a02 100644
index 83baac87..bc42e82d 100644
--- a/lapack-netlib/SRC/Makefile
+++ b/lapack-netlib/SRC/Makefile
@@ -613,9 +613,9 @@ cleanobj:
@@ -643,9 +643,9 @@ cleanobj:
cleanlib:
rm -f $(LAPACKLIB)
@ -31,14 +31,17 @@ index 9f79e20..cf47a02 100644
+cla_wwaddw.o: cla_wwaddw.f ; $(FC) $(FFLAGS) -c -o $@ $<
+zla_wwaddw.o: zla_wwaddw.f ; $(FC) $(FFLAGS) -c -o $@ $<
diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile
index 87432fd..bd484e6 100644
index e21ebd6c..8582e944 100644
--- a/lapack-netlib/TESTING/MATGEN/Makefile
+++ b/lapack-netlib/TESTING/MATGEN/Makefile
@@ -97,5 +97,5 @@ cleanobj:
cleanlib:
@@ -110,8 +110,8 @@ cleanlib:
rm -f $(TMGLIB)
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
-slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
-dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
+slaran.o: slaran.f ; $(FC) $(FFLAGS) -c -o $@ $<
endif
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
-dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
+dlaran.o: dlaran.f ; $(FC) $(FFLAGS) -c -o $@ $<
endif

View File

@ -1,5 +1,5 @@
diff --git a/kernel/power/drot.c b/kernel/power/drot.c
index baeb542..daeae9f 100644
index 951c2f9c..98788c0f 100644
--- a/kernel/power/drot.c
+++ b/kernel/power/drot.c
@@ -37,8 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@ -8,11 +8,11 @@ index baeb542..daeae9f 100644
-#pragma GCC optimize "O1"
-
#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#if defined(__VEC__) || defined(__ALTIVEC__)
#include "drot_microk_power8.c"
#endif
diff --git a/kernel/power/srot.c b/kernel/power/srot.c
index 6af813c..0a172d5 100644
index a53342f6..73b477d9 100644
--- a/kernel/power/srot.c
+++ b/kernel/power/srot.c
@@ -37,8 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@ -21,11 +21,11 @@ index 6af813c..0a172d5 100644
-#pragma GCC optimize "O1"
-
#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#if defined(__VEC__) || defined(__ALTIVEC__)
#include "srot_microk_power8.c"
#endif
diff --git a/kernel/power/zscal.c b/kernel/power/zscal.c
index a1b441d..3c80ba0 100644
index 5526f4d6..3c2efbfa 100644
--- a/kernel/power/zscal.c
+++ b/kernel/power/zscal.c
@@ -36,8 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@ -34,6 +34,6 @@ index a1b441d..3c80ba0 100644
-#pragma GCC optimize "O1"
-
#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(DOUBLE)
#include "zscal_microk_power8.c"

View File

@ -1,8 +1,8 @@
diff --git a/Makefile b/Makefile
index ae8f7de..61f325c 100644
index a9af62a2..2f0f11c0 100644
--- a/Makefile
+++ b/Makefile
@@ -133,13 +133,13 @@ tests :
@@ -138,13 +138,13 @@ tests :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
touch $(LIBNAME)
ifndef NO_FBLAS
@ -11,7 +11,7 @@ index ae8f7de..61f325c 100644
endif
- $(MAKE) -C utest all
+ $(MAKE) -C utest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all
ifndef NO_CBLAS
ifneq ($(NO_CBLAS), 1)
- $(MAKE) -C ctest all
+ $(MAKE) -C ctest FC="$(FC)" CC="$(CC)" COMMON_OPT="$(COMMON_OPT)" FCOMMON_OPT="$(FCOMMON_OPT)" all
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)

View File

@ -14,31 +14,25 @@
# "obsoleted" features are still kept in the spec.
Name: openblas
Version: 0.3.10
Release: 2%{?dist}
Version: 0.3.12
Release: 1%{?dist}
Summary: An optimized BLAS library based on GotoBLAS2
Group: Development/Libraries
License: BSD
URL: https://github.com/xianyi/OpenBLAS/
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz
# Use system lapack
Patch0: openblas-0.3.10-system-lapack.patch
Patch0: openblas-0.3.12-system-lapack.patch
# Drop extra p from threaded library name
Patch1: openblas-0.3.10-libname.patch
Patch1: openblas-0.3.12-libname.patch
# Supply the proper flags to the test makefile
Patch2: openblas-0.3.10-tests.patch
Patch2: openblas-0.3.12-tests.patch
# Enable optimizations for all LAPACK sources
Patch3: openblas-0.3.10-noopt.patch
Patch3: openblas-0.3.12-noopt.patch
# Pass ASMFLAGS to assembler compiler
Patch4: openblas-0.3.10-asmflags.patch
Patch4: openblas-0.3.12-asmflags.patch
# Remove optimization pragmas on ppc64le
Patch5: openblas-0.3.10-power-optimize.patch
# https://github.com/xianyi/OpenBLAS/pull/2669
Patch6: openblas-0.3.10-zarch-gcc-version-detection.patch
# https://github.com/xianyi/OpenBLAS/pull/2672
Patch7: openblas-0.3.10-concurrency.patch
# https://github.com/Reference-LAPACK/lapack/pull/458
Patch8: openblas-0.3.10-lapacke-macro.patch
Patch5: openblas-0.3.12-power-optimize.patch
BuildRequires: gcc
BuildRequires: gcc-gfortran
@ -239,9 +233,6 @@ cd OpenBLAS-%{version}
%patch3 -p1 -b .noopt
%patch4 -p1 -b .asmflags
%patch5 -p1 -b .power-optimize
%patch6 -p1 -b .zarch-gcc-version-detection
%patch7 -p1 -b .concurrency
%patch8 -p1 -b .lapacke-macro
# Fix source permissions
find -name \*.f -exec chmod 644 {} \;
@ -684,6 +675,10 @@ rm -rf %{buildroot}%{_libdir}/pkgconfig
%endif
%changelog
* Wed Oct 28 2020 Nikola Forró <nforro@redhat.com> - 0.3.12-1
- Rebase to version 0.3.12
related: #1847435
* Wed Oct 21 2020 Nikola Forró <nforro@redhat.com> - 0.3.10-2
- Fix macro used in LAPACKE_zgesvdq
related: #1847435