2012-05-11 21:49:43 +00:00
|
|
|
diff -rup c/sysdeps/x86_64/fpu/multiarch/e_log.c d/sysdeps/x86_64/fpu/multiarch/e_log.c
|
|
|
|
--- c/sysdeps/x86_64/fpu/multiarch/e_log.c 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/fpu/multiarch/e_log.c 2012-05-11 12:53:39.491061476 -0600
|
|
|
|
@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double
|
|
|
|
|
|
|
|
libm_ifunc (__ieee754_log,
|
|
|
|
HAS_FMA4 ? __ieee754_log_fma4
|
|
|
|
- : (HAS_AVX ? __ieee754_log_avx
|
|
|
|
- : __ieee754_log_sse2));
|
|
|
|
+ : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
|
|
|
|
strong_alias (__ieee754_log, __log_finite)
|
|
|
|
|
|
|
|
# define __ieee754_log __ieee754_log_sse2
|
|
|
|
diff -rup c/sysdeps/x86_64/fpu/multiarch/s_atan.c d/sysdeps/x86_64/fpu/multiarch/s_atan.c
|
|
|
|
--- c/sysdeps/x86_64/fpu/multiarch/s_atan.c 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/fpu/multiarch/s_atan.c 2012-05-11 12:53:39.491061476 -0600
|
|
|
|
@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
|
|
|
|
# define __atan_fma4 ((void *) 0)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
|
|
|
|
+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
|
|
|
|
+ HAS_AVX ? __atan_avx : __atan_sse2));
|
|
|
|
|
|
|
|
# define atan __atan_sse2
|
|
|
|
#endif
|
|
|
|
diff -rup c/sysdeps/x86_64/fpu/multiarch/s_sin.c d/sysdeps/x86_64/fpu/multiarch/s_sin.c
|
|
|
|
--- c/sysdeps/x86_64/fpu/multiarch/s_sin.c 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/fpu/multiarch/s_sin.c 2012-05-11 12:53:39.491061476 -0600
|
|
|
|
@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
|
|
|
|
# define __sin_fma4 ((void *) 0)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
|
|
|
|
+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
|
|
|
|
+ HAS_AVX ? __cos_avx : __cos_sse2));
|
|
|
|
weak_alias (__cos, cos)
|
|
|
|
|
|
|
|
-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
|
|
|
|
+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
|
|
|
|
+ HAS_AVX ? __sin_avx : __sin_sse2));
|
|
|
|
weak_alias (__sin, sin)
|
|
|
|
|
|
|
|
# define __cos __cos_sse2
|
|
|
|
diff -rup c/sysdeps/x86_64/fpu/multiarch/s_tan.c d/sysdeps/x86_64/fpu/multiarch/s_tan.c
|
|
|
|
--- c/sysdeps/x86_64/fpu/multiarch/s_tan.c 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/fpu/multiarch/s_tan.c 2012-05-11 12:53:39.491061476 -0600
|
|
|
|
@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
|
|
|
|
# define __tan_fma4 ((void *) 0)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
|
|
|
|
+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
|
|
|
|
+ HAS_AVX ? __tan_avx : __tan_sse2));
|
|
|
|
|
|
|
|
# define tan __tan_sse2
|
|
|
|
#endif
|
|
|
|
diff -rup c/sysdeps/x86_64/multiarch/init-arch.c d/sysdeps/x86_64/multiarch/init-arch.c
|
|
|
|
--- c/sysdeps/x86_64/multiarch/init-arch.c 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/multiarch/init-arch.c 2012-05-11 12:55:29.169490958 -0600
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
/* Initialize CPU feature data.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
|
|
|
+ Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
|
|
|
Contributed by Ulrich Drepper <drepper@redhat.com>.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
@@ -14,9 +14,8 @@
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
- License along with the GNU C Library; if not, write to the Free
|
|
|
|
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
|
|
- 02111-1307 USA. */
|
|
|
|
+ License along with the GNU C Library; if not, see
|
|
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
#include <atomic.h>
|
|
|
|
#include <cpuid.h>
|
|
|
|
@@ -144,6 +143,23 @@ __init_cpu_features (void)
|
|
|
|
else
|
|
|
|
kind = arch_kind_other;
|
|
|
|
|
|
|
|
+ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
|
|
|
|
+ {
|
|
|
|
+ /* Determine if AVX is usable. */
|
|
|
|
+ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
|
|
|
|
+ && ({ unsigned int xcrlow;
|
|
|
|
+ unsigned int xcrhigh;
|
|
|
|
+ asm ("xgetbv"
|
|
|
|
+ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
|
|
|
|
+ (xcrlow & (bit_YMM_state | bit_XMM_state)) ==
|
|
|
|
+ (bit_YMM_state | bit_XMM_state); }))
|
2012-05-14 17:53:42 +00:00
|
|
|
+ __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
|
2012-05-11 21:49:43 +00:00
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* FMA4 depends on AVX support. */
|
|
|
|
+ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4) && __cpu_features.feature[index_AVX_Usable])
|
2012-05-14 17:53:42 +00:00
|
|
|
+ __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
|
2012-05-11 21:49:43 +00:00
|
|
|
+
|
|
|
|
__cpu_features.family = family;
|
|
|
|
__cpu_features.model = model;
|
|
|
|
atomic_write_barrier ();
|
|
|
|
diff -rup c/sysdeps/x86_64/multiarch/init-arch.h d/sysdeps/x86_64/multiarch/init-arch.h
|
|
|
|
--- c/sysdeps/x86_64/multiarch/init-arch.h 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/multiarch/init-arch.h 2012-05-11 12:55:29.170490953 -0600
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
/* This file is part of the GNU C Library.
|
|
|
|
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
|
|
|
+ Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
@@ -12,9 +12,8 @@
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
- License along with the GNU C Library; if not, write to the Free
|
|
|
|
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
|
|
- 02111-1307 USA. */
|
|
|
|
+ License along with the GNU C Library; if not, see
|
|
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
#define bit_Fast_Rep_String (1 << 0)
|
|
|
|
#define bit_Fast_Copy_Backward (1 << 1)
|
|
|
|
@@ -22,16 +21,24 @@
|
|
|
|
#define bit_Prefer_SSE_for_memop (1 << 3)
|
|
|
|
#define bit_Fast_Unaligned_Load (1 << 4)
|
|
|
|
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
|
|
|
|
+#define bit_AVX_Usable (1 << 6)
|
|
|
|
+#define bit_FMA4_Usable (1 << 7)
|
|
|
|
|
|
|
|
+/* CPUID Feature flags. */
|
|
|
|
#define bit_SSE2 (1 << 26)
|
|
|
|
#define bit_SSSE3 (1 << 9)
|
|
|
|
#define bit_SSE4_1 (1 << 19)
|
|
|
|
#define bit_SSE4_2 (1 << 20)
|
|
|
|
+#define bit_OSXSAVE (1 << 27)
|
|
|
|
#define bit_AVX (1 << 28)
|
|
|
|
#define bit_POPCOUNT (1 << 23)
|
|
|
|
#define bit_FMA (1 << 12)
|
|
|
|
#define bit_FMA4 (1 << 16)
|
|
|
|
|
|
|
|
+/* XCR0 Feature flags. */
|
|
|
|
+#define bit_XMM_state (1 << 1)
|
|
|
|
+#define bit_YMM_state (2 << 1)
|
|
|
|
+
|
|
|
|
#ifdef __ASSEMBLER__
|
|
|
|
|
|
|
|
# include <ifunc-defines.h>
|
|
|
|
@@ -48,6 +55,8 @@
|
|
|
|
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
+# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
+# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
|
|
|
|
#else /* __ASSEMBLER__ */
|
|
|
|
|
|
|
|
@@ -92,7 +101,7 @@ extern struct cpu_features
|
|
|
|
|
|
|
|
|
|
|
|
extern void __init_cpu_features (void) attribute_hidden;
|
|
|
|
-#define INIT_ARCH()\
|
|
|
|
+# define INIT_ARCH() \
|
|
|
|
do \
|
|
|
|
if (__cpu_features.kind == arch_kind_unknown) \
|
|
|
|
__init_cpu_features (); \
|
|
|
|
@@ -111,37 +120,36 @@ extern const struct cpu_features *__get_
|
|
|
|
|
|
|
|
/* Following are the feature tests used throughout libc. */
|
|
|
|
|
|
|
|
+/* CPUID_* evaluates to true if the feature flag is enabled. */
|
|
|
|
+# define CPUID_OSXSAVE HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
|
|
|
|
+# define CPUID_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
|
|
|
|
+# define CPUID_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
|
|
|
|
+
|
|
|
|
+/* HAS_* evaluates to true if we may use the feature at runtime. */
|
|
|
|
# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
|
|
|
|
# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
|
|
|
|
# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
|
|
|
|
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
|
|
|
|
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
|
|
|
|
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
|
|
|
|
-# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
|
|
|
|
-# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
|
|
|
|
|
|
|
|
# define index_Fast_Rep_String FEATURE_INDEX_1
|
|
|
|
# define index_Fast_Copy_Backward FEATURE_INDEX_1
|
|
|
|
# define index_Slow_BSF FEATURE_INDEX_1
|
|
|
|
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
|
|
|
|
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
|
|
|
|
+# define index_AVX_Usable FEATURE_INDEX_1
|
|
|
|
+# define index_FMA4_Usable FEATURE_INDEX_1
|
|
|
|
|
|
|
|
-#define HAS_ARCH_FEATURE(idx, bit) \
|
|
|
|
- ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
|
|
|
|
-
|
|
|
|
-#define HAS_FAST_REP_STRING \
|
|
|
|
- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
|
|
|
|
-
|
|
|
|
-#define HAS_FAST_COPY_BACKWARD \
|
|
|
|
- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
|
|
|
|
-
|
|
|
|
-#define HAS_SLOW_BSF \
|
|
|
|
- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
|
|
|
|
-
|
|
|
|
-#define HAS_PREFER_SSE_FOR_MEMOP \
|
|
|
|
- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
|
|
|
|
+# define HAS_ARCH_FEATURE(name) \
|
|
|
|
+ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
|
|
|
|
|
|
|
|
-#define HAS_FAST_UNALIGNED_LOAD \
|
|
|
|
- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
|
|
|
|
+# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
|
|
|
|
+# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
|
|
|
|
+# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
|
|
|
|
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
|
|
|
|
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
|
|
|
|
+# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
|
|
|
|
+# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
|
|
|
|
|
|
|
|
#endif /* __ASSEMBLER__ */
|
|
|
|
diff -rup c/sysdeps/x86_64/multiarch/strcmp.S d/sysdeps/x86_64/multiarch/strcmp.S
|
|
|
|
--- c/sysdeps/x86_64/multiarch/strcmp.S 2012-01-01 05:16:32.000000000 -0700
|
|
|
|
+++ d/sysdeps/x86_64/multiarch/strcmp.S 2012-05-11 12:55:29.172490943 -0600
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
/* strcmp with SSE4.2
|
|
|
|
- Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
|
|
|
+ Copyright (C) 2009-2012 Free Software Foundation, Inc.
|
|
|
|
Contributed by Intel Corporation.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
@@ -107,7 +107,7 @@ ENTRY(__strcasecmp)
|
|
|
|
1:
|
|
|
|
# ifdef HAVE_AVX_SUPPORT
|
|
|
|
leaq __strcasecmp_avx(%rip), %rax
|
|
|
|
- testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
|
|
|
|
+ testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
|
|
|
jnz 2f
|
|
|
|
# endif
|
|
|
|
leaq __strcasecmp_sse42(%rip), %rax
|
|
|
|
@@ -130,7 +130,7 @@ ENTRY(__strncasecmp)
|
|
|
|
1:
|
|
|
|
# ifdef HAVE_AVX_SUPPORT
|
|
|
|
leaq __strncasecmp_avx(%rip), %rax
|
|
|
|
- testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
|
|
|
|
+ testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
|
|
|
jnz 2f
|
|
|
|
# endif
|
|
|
|
leaq __strncasecmp_sse42(%rip), %rax
|
|
|
|
Only in d/sysdeps/x86_64/multiarch: strcmp.S.orig
|