500 lines
16 KiB
Diff
500 lines
16 KiB
Diff
From dc6ac9eab0c59bcf56c1c512c099723215609fb2 Mon Sep 17 00:00:00 2001
|
|
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
Date: Tue, 12 Feb 2019 15:33:48 +0100
|
|
Subject: [PATCH 1/4] Fix declaration of input arguments in the x86_64
|
|
s/dGEMV_T and s/dGEMV_N kernels
|
|
|
|
Arguments 0 and 1 need to be tagged as both input and output
|
|
---
|
|
kernel/x86_64/dgemv_n_4.c | 10 +++++-----
|
|
kernel/x86_64/dgemv_t_4.c | 18 +++++++++---------
|
|
kernel/x86_64/sgemv_n_4.c | 14 +++++++-------
|
|
kernel/x86_64/sgemv_t_4.c | 18 +++++++++---------
|
|
4 files changed, 30 insertions(+), 30 deletions(-)
|
|
|
|
diff --git a/kernel/x86_64/dgemv_n_4.c b/kernel/x86_64/dgemv_n_4.c
|
|
index 6d2530e81..6d33641e9 100644
|
|
--- a/kernel/x86_64/dgemv_n_4.c
|
|
+++ b/kernel/x86_64/dgemv_n_4.c
|
|
@@ -111,9 +111,9 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (ap[0]), // 4
|
|
@@ -166,9 +166,9 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *a
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
:
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (ap), // 4
|
|
diff --git a/kernel/x86_64/dgemv_t_4.c b/kernel/x86_64/dgemv_t_4.c
|
|
index a7478e3a8..ed672a757 100644
|
|
--- a/kernel/x86_64/dgemv_t_4.c
|
|
+++ b/kernel/x86_64/dgemv_t_4.c
|
|
@@ -127,9 +127,9 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT *ap0, FLOAT *ap1, FLOAT *x, FLOAT
|
|
"movsd %%xmm11,8(%2) \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (y), // 2
|
|
"r" (ap0), // 3
|
|
"r" (ap1), // 4
|
|
@@ -195,9 +195,9 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
|
|
"movsd %%xmm10, (%2) \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (y), // 2
|
|
"r" (ap), // 3
|
|
"r" (x) // 4
|
|
@@ -259,9 +259,9 @@ static void add_y(BLASLONG n, FLOAT da , FLOAT *src, FLOAT *dest, BLASLONG inc_d
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (&da), // 2
|
|
"r" (src), // 3
|
|
"r" (dest) // 4
|
|
diff --git a/kernel/x86_64/sgemv_n_4.c b/kernel/x86_64/sgemv_n_4.c
|
|
index 65305ac59..63697970f 100644
|
|
--- a/kernel/x86_64/sgemv_n_4.c
|
|
+++ b/kernel/x86_64/sgemv_n_4.c
|
|
@@ -149,9 +149,9 @@ static void sgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (ap[0]), // 4
|
|
@@ -223,9 +223,9 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *a
|
|
|
|
"3: \n\t"
|
|
:
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n1) // 1
|
|
:
|
|
- "r" (i), // 0
|
|
- "r" (n1), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (ap), // 4
|
|
@@ -277,9 +277,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest)
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
:
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
"r" (src), // 2
|
|
"r" (dest) // 3
|
|
: "cc",
|
|
diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c
|
|
index 065e5b385..86ecaf516 100644
|
|
--- a/kernel/x86_64/sgemv_t_4.c
|
|
+++ b/kernel/x86_64/sgemv_t_4.c
|
|
@@ -139,9 +139,9 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT *ap0, FLOAT *ap1, FLOAT *x, FLOAT
|
|
"movss %%xmm11,4(%2) \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (y), // 2
|
|
"r" (ap0), // 3
|
|
"r" (ap1), // 4
|
|
@@ -208,9 +208,9 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
|
|
"movss %%xmm10, (%2) \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (y), // 2
|
|
"r" (ap), // 3
|
|
"r" (x) // 4
|
|
@@ -272,9 +272,9 @@ static void add_y(BLASLONG n, FLOAT da , FLOAT *src, FLOAT *dest, BLASLONG inc_d
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (&da), // 2
|
|
"r" (src), // 3
|
|
"r" (dest) // 4
|
|
|
|
From 91481a3e4e88b26be920aff7d5c9e72ee82d6abc Mon Sep 17 00:00:00 2001
|
|
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
Date: Tue, 12 Feb 2019 15:51:43 +0100
|
|
Subject: [PATCH 2/4] Fix declaration of input arguments in inline assembly
|
|
|
|
Argument 0 is modified as it doubles as a counter
|
|
---
|
|
kernel/x86_64/dscal.c | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c
|
|
index ef9a0a6ba..d0d7801fd 100644
|
|
--- a/kernel/x86_64/dscal.c
|
|
+++ b/kernel/x86_64/dscal.c
|
|
@@ -136,8 +136,8 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
|
|
"jnz 1b \n\t"
|
|
|
|
:
|
|
+ "+r" (n) // 0
|
|
:
|
|
- "r" (n), // 0
|
|
"r" (x), // 1
|
|
"r" (x1), // 2
|
|
"r" (alpha), // 3
|
|
|
|
From b824fa70ebdd0b66ed045dbb17c08519525af782 Mon Sep 17 00:00:00 2001
|
|
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
Date: Tue, 12 Feb 2019 16:00:18 +0100
|
|
Subject: [PATCH 3/4] Fix declaration of assembly arguments in SSYMV and DSYMV
|
|
microkernels
|
|
|
|
Arguments 0 and 1 are both input and output
|
|
---
|
|
kernel/x86_64/dsymv_U_microk_bulldozer-2.c | 6 +++---
|
|
kernel/x86_64/dsymv_U_microk_haswell-2.c | 6 +++---
|
|
kernel/x86_64/dsymv_U_microk_nehalem-2.c | 6 +++---
|
|
kernel/x86_64/dsymv_U_microk_sandy-2.c | 6 +++---
|
|
kernel/x86_64/ssymv_U_microk_bulldozer-2.c | 6 +++---
|
|
kernel/x86_64/ssymv_U_microk_haswell-2.c | 6 +++---
|
|
kernel/x86_64/ssymv_U_microk_nehalem-2.c | 6 +++---
|
|
kernel/x86_64/ssymv_U_microk_sandy-2.c | 6 +++---
|
|
8 files changed, 24 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/kernel/x86_64/dsymv_U_microk_bulldozer-2.c b/kernel/x86_64/dsymv_U_microk_bulldozer-2.c
|
|
index d7166fe4b..ae287b6d8 100644
|
|
--- a/kernel/x86_64/dsymv_U_microk_bulldozer-2.c
|
|
+++ b/kernel/x86_64/dsymv_U_microk_bulldozer-2.c
|
|
@@ -106,9 +106,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"vmovsd %%xmm3 ,24(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/dsymv_U_microk_haswell-2.c b/kernel/x86_64/dsymv_U_microk_haswell-2.c
|
|
index d83d20f8e..4778f644a 100644
|
|
--- a/kernel/x86_64/dsymv_U_microk_haswell-2.c
|
|
+++ b/kernel/x86_64/dsymv_U_microk_haswell-2.c
|
|
@@ -107,9 +107,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/dsymv_U_microk_nehalem-2.c b/kernel/x86_64/dsymv_U_microk_nehalem-2.c
|
|
index 1344c75f7..065182286 100644
|
|
--- a/kernel/x86_64/dsymv_U_microk_nehalem-2.c
|
|
+++ b/kernel/x86_64/dsymv_U_microk_nehalem-2.c
|
|
@@ -101,9 +101,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"movsd %%xmm3 , 24(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/dsymv_U_microk_sandy-2.c b/kernel/x86_64/dsymv_U_microk_sandy-2.c
|
|
index 1ef6fbafd..d84e703bd 100644
|
|
--- a/kernel/x86_64/dsymv_U_microk_sandy-2.c
|
|
+++ b/kernel/x86_64/dsymv_U_microk_sandy-2.c
|
|
@@ -116,9 +116,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/ssymv_U_microk_bulldozer-2.c b/kernel/x86_64/ssymv_U_microk_bulldozer-2.c
|
|
index 8c01ab806..4a4f4d68d 100644
|
|
--- a/kernel/x86_64/ssymv_U_microk_bulldozer-2.c
|
|
+++ b/kernel/x86_64/ssymv_U_microk_bulldozer-2.c
|
|
@@ -90,9 +90,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"vmovss %%xmm3 ,12(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/ssymv_U_microk_haswell-2.c b/kernel/x86_64/ssymv_U_microk_haswell-2.c
|
|
index a32e59b44..e6a09ccf8 100644
|
|
--- a/kernel/x86_64/ssymv_U_microk_haswell-2.c
|
|
+++ b/kernel/x86_64/ssymv_U_microk_haswell-2.c
|
|
@@ -112,9 +112,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/ssymv_U_microk_nehalem-2.c b/kernel/x86_64/ssymv_U_microk_nehalem-2.c
|
|
index b8e6ee732..c56ff3b15 100644
|
|
--- a/kernel/x86_64/ssymv_U_microk_nehalem-2.c
|
|
+++ b/kernel/x86_64/ssymv_U_microk_nehalem-2.c
|
|
@@ -106,9 +106,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"movss %%xmm3 , 12(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
diff --git a/kernel/x86_64/ssymv_U_microk_sandy-2.c b/kernel/x86_64/ssymv_U_microk_sandy-2.c
|
|
index e8650650c..c4919a39a 100644
|
|
--- a/kernel/x86_64/ssymv_U_microk_sandy-2.c
|
|
+++ b/kernel/x86_64/ssymv_U_microk_sandy-2.c
|
|
@@ -120,9 +120,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (i), // 0
|
|
- "r" (n), // 1
|
|
+ "+r" (i), // 0
|
|
+ "+r" (n) // 1
|
|
+ :
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
"r" (a0), // 4
|
|
|
|
From ab1630f9fac57245fbbfc20af91a060354e41c71 Mon Sep 17 00:00:00 2001
|
|
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
Date: Tue, 12 Feb 2019 16:14:02 +0100
|
|
Subject: [PATCH 4/4] Fix declaration of arguments in inline assembly
|
|
|
|
Argument 0 is modified so should be input and output
|
|
---
|
|
kernel/x86_64/dsymv_L_microk_bulldozer-2.c | 4 ++--
|
|
kernel/x86_64/dsymv_L_microk_haswell-2.c | 4 ++--
|
|
kernel/x86_64/dsymv_L_microk_nehalem-2.c | 4 ++--
|
|
kernel/x86_64/dsymv_L_microk_sandy-2.c | 4 ++--
|
|
kernel/x86_64/ssymv_L_microk_bulldozer-2.c | 4 ++--
|
|
kernel/x86_64/ssymv_L_microk_haswell-2.c | 4 ++--
|
|
kernel/x86_64/ssymv_L_microk_nehalem-2.c | 4 ++--
|
|
kernel/x86_64/ssymv_L_microk_sandy-2.c | 8 ++++----
|
|
8 files changed, 18 insertions(+), 18 deletions(-)
|
|
|
|
diff --git a/kernel/x86_64/dsymv_L_microk_bulldozer-2.c b/kernel/x86_64/dsymv_L_microk_bulldozer-2.c
|
|
index d84470cc4..bfa07b6d0 100644
|
|
--- a/kernel/x86_64/dsymv_L_microk_bulldozer-2.c
|
|
+++ b/kernel/x86_64/dsymv_L_microk_bulldozer-2.c
|
|
@@ -113,8 +113,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vmovsd %%xmm3 ,24(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/dsymv_L_microk_haswell-2.c b/kernel/x86_64/dsymv_L_microk_haswell-2.c
|
|
index 866782ee6..6241879d5 100644
|
|
--- a/kernel/x86_64/dsymv_L_microk_haswell-2.c
|
|
+++ b/kernel/x86_64/dsymv_L_microk_haswell-2.c
|
|
@@ -105,8 +105,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/dsymv_L_microk_nehalem-2.c b/kernel/x86_64/dsymv_L_microk_nehalem-2.c
|
|
index 38479f77a..a161dcd8b 100644
|
|
--- a/kernel/x86_64/dsymv_L_microk_nehalem-2.c
|
|
+++ b/kernel/x86_64/dsymv_L_microk_nehalem-2.c
|
|
@@ -108,8 +108,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"movsd %%xmm3 , 24(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/dsymv_L_microk_sandy-2.c b/kernel/x86_64/dsymv_L_microk_sandy-2.c
|
|
index b4e6ab369..b205b1019 100644
|
|
--- a/kernel/x86_64/dsymv_L_microk_sandy-2.c
|
|
+++ b/kernel/x86_64/dsymv_L_microk_sandy-2.c
|
|
@@ -114,8 +114,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/ssymv_L_microk_bulldozer-2.c b/kernel/x86_64/ssymv_L_microk_bulldozer-2.c
|
|
index 9002228f3..602c3edf2 100644
|
|
--- a/kernel/x86_64/ssymv_L_microk_bulldozer-2.c
|
|
+++ b/kernel/x86_64/ssymv_L_microk_bulldozer-2.c
|
|
@@ -98,8 +98,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vmovss %%xmm3 ,12(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/ssymv_L_microk_haswell-2.c b/kernel/x86_64/ssymv_L_microk_haswell-2.c
|
|
index 69db008b6..fdfe4349a 100644
|
|
--- a/kernel/x86_64/ssymv_L_microk_haswell-2.c
|
|
+++ b/kernel/x86_64/ssymv_L_microk_haswell-2.c
|
|
@@ -99,8 +99,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/ssymv_L_microk_nehalem-2.c b/kernel/x86_64/ssymv_L_microk_nehalem-2.c
|
|
index c0fe5d640..6bb9c02f6 100644
|
|
--- a/kernel/x86_64/ssymv_L_microk_nehalem-2.c
|
|
+++ b/kernel/x86_64/ssymv_L_microk_nehalem-2.c
|
|
@@ -113,8 +113,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, F
|
|
"movss %%xmm3 , 12(%9) \n\t" // save temp2
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
diff --git a/kernel/x86_64/ssymv_L_microk_sandy-2.c b/kernel/x86_64/ssymv_L_microk_sandy-2.c
|
|
index 093ca8073..0c78212e7 100644
|
|
--- a/kernel/x86_64/ssymv_L_microk_sandy-2.c
|
|
+++ b/kernel/x86_64/ssymv_L_microk_sandy-2.c
|
|
@@ -109,8 +109,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|
|
@@ -217,8 +217,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
|
|
"vzeroupper \n\t"
|
|
|
|
:
|
|
- :
|
|
- "r" (from), // 0
|
|
+ "+r" (from) // 0
|
|
+ :
|
|
"r" (to), // 1
|
|
"r" (x), // 2
|
|
"r" (y), // 3
|