From b113737687ca5b10fb3b100104442c3c555625de Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Fri, 9 Mar 2012 15:19:08 -0700 Subject: [PATCH] - Fix AVX checks (#801650) --- glibc-rh794797-2.patch | 4 +- glibc-rh801650-1.patch | 90 ++++++++++++++++++ glibc-rh801650-2.patch | 207 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 glibc-rh801650-1.patch create mode 100644 glibc-rh801650-2.patch diff --git a/glibc-rh794797-2.patch b/glibc-rh794797-2.patch index 1c6eece..c1b2989 100644 --- a/glibc-rh794797-2.patch +++ b/glibc-rh794797-2.patch @@ -1,5 +1,5 @@ ---- vfprintf.c 2012-03-07 12:16:21.000000000 -0700 -+++ /home/law/UPSTREAM/glibc/stdio-common/vfprintf.c 2012-03-07 12:00:28.006630851 -0700 +--- a/stdio-common/vfprintf.c 2012-03-07 12:16:21.000000000 -0700 ++++ b/stdio-common/vfprintf.c 2012-03-07 12:00:28.006630851 -0700 @@ -1,4 +1,4 @@ -/* Copyright (C) 1991-2008, 2009, 2010, 2011 Free Software Foundation, Inc. +/* Copyright (C) 1991-2011, 2012 Free Software Foundation, Inc. diff --git a/glibc-rh801650-1.patch b/glibc-rh801650-1.patch new file mode 100644 index 0000000..b20da75 --- /dev/null +++ b/glibc-rh801650-1.patch @@ -0,0 +1,90 @@ +commit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880 +Author: Ulrich Drepper +Date: Thu Jan 26 07:45:14 2012 -0500 + + Reset bit_AVX in __cpu_features is OS support is missing + +diff --git a/ChangeLog b/ChangeLog +index a0b355e..1e2284f 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,10 @@ ++2012-01-26 Ulrich Drepper ++ ++ [BZ #13583] ++ * sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE. ++ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If ++ bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary. ++ + 2012-01-25 Joseph Myers + + * elf/tst-unique3.cc (gets): Remove declaration. +diff --git a/NEWS b/NEWS +index 5e062af..42e09c1 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,4 +1,4 @@ +-GNU C Library NEWS -- history of user-visible changes. 2012-1-10 ++GNU C Library NEWS -- history of user-visible changes. 2012-1-26 + Copyright (C) 1992-2009, 2010, 2011, 2012 Free Software Foundation, Inc. + See the end for copying conditions. + +@@ -10,7 +10,7 @@ Version 2.16 + * The following bugs are resolved with this release: + + 13525, 13526, 13527, 13528, 13529, 13531, 13532, 13533, 13547, 13530, +- 13551, 13552, 13553, 13555, 13559 ++ 13551, 13552, 13553, 13555, 13559, 13583 + + * ISO C11 support: + +diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c +index 65b0ee9..4fabbee 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.c ++++ b/sysdeps/x86_64/multiarch/init-arch.c +@@ -1,6 +1,6 @@ + /* Initialize CPU feature data. + This file is part of the GNU C Library. +- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Contributed by Ulrich Drepper . + + The GNU C Library is free software; you can redistribute it and/or +@@ -144,6 +144,18 @@ __init_cpu_features (void) + else + kind = arch_kind_other; + ++ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) ++ { ++ /* Reset the AVX bit in case OSXSAVE is disabled. */ ++ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0 ++ || ({ unsigned int xcrlow; ++ unsigned int xcrhigh; ++ asm ("xgetbv" ++ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); ++ (xcrlow & 6) != 6; })) ++ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; ++ } ++ + __cpu_features.family = family; + __cpu_features.model = model; + atomic_write_barrier (); +diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h +index 2a1df39..408e5ae 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.h ++++ b/sysdeps/x86_64/multiarch/init-arch.h +@@ -1,5 +1,5 @@ + /* This file is part of the GNU C Library. +- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public +@@ -27,6 +27,7 @@ + #define bit_SSSE3 (1 << 9) + #define bit_SSE4_1 (1 << 19) + #define bit_SSE4_2 (1 << 20) ++#define bit_OSXSAVE (1 << 27) + #define bit_AVX (1 << 28) + #define bit_POPCOUNT (1 << 23) + #define bit_FMA (1 << 12) diff --git a/glibc-rh801650-2.patch b/glibc-rh801650-2.patch new file mode 100644 index 0000000..bc8469b --- /dev/null +++ b/glibc-rh801650-2.patch @@ -0,0 +1,207 @@ +commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf +Author: Ulrich Drepper +Date: Thu Jan 26 09:45:54 2012 -0500 + + Really fix AVX tests + + There is no problem with strcmp, it doesn't use the YMM registers. + The math routines might since gcc perhaps generates such code. + Introduce bit_YMM_USBALE and use it in the math routines. + +diff --git a/ChangeLog b/ChangeLog +index 1e2284f..b413d27 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -2,8 +2,17 @@ + + [BZ #13583] + * sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE. ++ Clean up HAS_* macros. + * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If +- bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary. ++ bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if ++ possible. ++ * sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not ++ HAS_AVX. ++ * sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise. ++ * sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise. ++ * sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise. ++ * sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise. ++ * sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise. + + 2012-01-25 Joseph Myers + +diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +index 6867c6e..3a615fc 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double); + + libm_ifunc (__ieee754_atan2, + HAS_FMA4 ? __ieee754_atan2_fma4 +- : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); ++ : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + strong_alias (__ieee754_atan2, __atan2_finite) + + # define __ieee754_atan2 __ieee754_atan2_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c +index 3c65028..7b2320a 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c +@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double); + + libm_ifunc (__ieee754_exp, + HAS_FMA4 ? __ieee754_exp_fma4 +- : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); ++ : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); + strong_alias (__ieee754_exp, __exp_finite) + + # define __ieee754_exp __ieee754_exp_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c +index 3b468d0..ab277d6 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log.c +@@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double); + + libm_ifunc (__ieee754_log, + HAS_FMA4 ? __ieee754_log_fma4 +- : (HAS_AVX ? __ieee754_log_avx ++ : (HAS_YMM_USABLE ? __ieee754_log_avx + : __ieee754_log_sse2)); + strong_alias (__ieee754_log, __log_finite) + +diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c +index 3160201..78c7e09 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c +@@ -12,7 +12,8 @@ extern double __atan_fma4 (double); + # define __atan_fma4 ((void *) 0) + # endif + +-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2); ++libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : ++ HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); + + # define atan __atan_sse2 + #endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c +index 1ba9dbc..417acd0 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c +@@ -17,10 +17,12 @@ extern double __sin_fma4 (double); + # define __sin_fma4 ((void *) 0) + # endif + +-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2); ++libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : ++ HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); + weak_alias (__cos, cos) + +-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2); ++libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : ++ HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); + weak_alias (__sin, sin) + + # define __cos __cos_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c +index 8f6601e..3047155 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c +@@ -12,7 +12,8 @@ extern double __tan_fma4 (double); + # define __tan_fma4 ((void *) 0) + # endif + +-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2); ++libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : ++ HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); + + # define tan __tan_sse2 + #endif +diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c +index 4fabbee..76d146c 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.c ++++ b/sysdeps/x86_64/multiarch/init-arch.c +@@ -147,13 +147,13 @@ __init_cpu_features (void) + if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) + { + /* Reset the AVX bit in case OSXSAVE is disabled. */ +- if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0 +- || ({ unsigned int xcrlow; +- unsigned int xcrhigh; +- asm ("xgetbv" +- : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); +- (xcrlow & 6) != 6; })) +- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; ++ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0 ++ && ({ unsigned int xcrlow; ++ unsigned int xcrhigh; ++ asm ("xgetbv" ++ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); ++ (xcrlow & 6) == 6; })) ++ __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable; + } + + __cpu_features.family = family; +diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h +index 408e5ae..2dc75ab 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.h ++++ b/sysdeps/x86_64/multiarch/init-arch.h +@@ -22,6 +22,7 @@ + #define bit_Prefer_SSE_for_memop (1 << 3) + #define bit_Fast_Unaligned_Load (1 << 4) + #define bit_Prefer_PMINUB_for_stringop (1 << 5) ++#define bit_YMM_Usable (1 << 6) + + #define bit_SSE2 (1 << 26) + #define bit_SSSE3 (1 << 9) +@@ -49,6 +50,7 @@ + # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE + # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE + # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE ++# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE + + #else /* __ASSEMBLER__ */ + +@@ -93,7 +95,7 @@ extern struct cpu_features + + + extern void __init_cpu_features (void) attribute_hidden; +-#define INIT_ARCH()\ ++# define INIT_ARCH() \ + do \ + if (__cpu_features.kind == arch_kind_unknown) \ + __init_cpu_features (); \ +@@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void) + # define index_Slow_BSF FEATURE_INDEX_1 + # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 + # define index_Fast_Unaligned_Load FEATURE_INDEX_1 ++# define index_YMM_Usable FEATURE_INDEX_1 + +-#define HAS_ARCH_FEATURE(idx, bit) \ +- ((__get_cpu_features ()->feature[idx] & (bit)) != 0) ++# define HAS_ARCH_FEATURE(name) \ ++ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) + +-#define HAS_FAST_REP_STRING \ +- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String) ++# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) + +-#define HAS_FAST_COPY_BACKWARD \ +- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward) ++# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) + +-#define HAS_SLOW_BSF \ +- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) ++# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) + +-#define HAS_PREFER_SSE_FOR_MEMOP \ +- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) ++# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) + +-#define HAS_FAST_UNALIGNED_LOAD \ +- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load) ++# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) ++ ++# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable) + + #endif /* __ASSEMBLER__ */