fdc014428b
Related: #2097327 Signed-off-by: Daiki Ueno <dueno@redhat.com>
94 lines
2.6 KiB
Diff
94 lines
2.6 KiB
Diff
From ef8a26638432066d8e683b216142d695fd16d222 Mon Sep 17 00:00:00 2001
|
||
From: Daiki Ueno <ueno@gnu.org>
|
||
Date: Mon, 15 Aug 2022 09:39:18 +0900
|
||
Subject: [PATCH] accelerated: clear AVX bits if it cannot be queried through
|
||
XSAVE
|
||
MIME-Version: 1.0
|
||
Content-Type: text/plain; charset=UTF-8
|
||
Content-Transfer-Encoding: 8bit
|
||
|
||
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
|
||
Architectures Software Developer’s Manual".
|
||
|
||
GnuTLS previously only followed that algorithm when registering the
|
||
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
|
||
that the extension bits are propagated to _gnutls_x86_cpuid_s.
|
||
|
||
Signed-off-by: Daiki Ueno <ueno@gnu.org>
|
||
---
|
||
lib/accelerated/x86/x86-common.c | 37 +++++++++++++++++++++++++++-----
|
||
1 file changed, 32 insertions(+), 5 deletions(-)
|
||
|
||
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
|
||
index 7ddaa594e6..85e2f93d4d 100644
|
||
--- a/lib/accelerated/x86/x86-common.c
|
||
+++ b/lib/accelerated/x86/x86-common.c
|
||
@@ -81,6 +81,26 @@ unsigned int _gnutls_x86_cpuid_s[4];
|
||
# define bit_AVX 0x10000000
|
||
#endif
|
||
|
||
+#ifndef bit_AVX2
|
||
+# define bit_AVX2 0x00000020
|
||
+#endif
|
||
+
|
||
+#ifndef bit_AVX512F
|
||
+# define bit_AVX512F 0x00010000
|
||
+#endif
|
||
+
|
||
+#ifndef bit_AVX512IFMA
|
||
+# define bit_AVX512IFMA 0x00200000
|
||
+#endif
|
||
+
|
||
+#ifndef bit_AVX512BW
|
||
+# define bit_AVX512BW 0x40000000
|
||
+#endif
|
||
+
|
||
+#ifndef bit_AVX512VL
|
||
+# define bit_AVX512VL 0x80000000
|
||
+#endif
|
||
+
|
||
#ifndef bit_OSXSAVE
|
||
# define bit_OSXSAVE 0x8000000
|
||
#endif
|
||
@@ -148,7 +168,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
|
||
{
|
||
uint32_t xcr0;
|
||
|
||
- if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
|
||
+ if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
|
||
return 0;
|
||
|
||
#if defined(_MSC_VER) && !defined(__clang__)
|
||
@@ -236,10 +256,7 @@ static unsigned check_sha(void)
|
||
#ifdef ASM_X86_64
|
||
static unsigned check_avx_movbe(void)
|
||
{
|
||
- if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
|
||
- return 0;
|
||
-
|
||
- return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
|
||
+ return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
|
||
}
|
||
|
||
static unsigned check_pclmul(void)
|
||
@@ -895,6 +912,16 @@ void register_x86_intel_crypto(unsigned capabilities)
|
||
_gnutls_x86_cpuid_s[0] &= ~(1 << 30);
|
||
}
|
||
|
||
+ if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
|
||
+ _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
|
||
+
|
||
+ /* Clear AVX2 bits as well, according to what OpenSSL does.
|
||
+ * Should we clear bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, and
|
||
+ * bit_AVX512CD? */
|
||
+ _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|bit_AVX512F|bit_AVX512IFMA|
|
||
+ bit_AVX512BW|bit_AVX512BW);
|
||
+ }
|
||
+
|
||
if (check_ssse3()) {
|
||
_gnutls_debug_log("Intel SSSE3 was detected\n");
|
||
|
||
--
|
||
2.37.2
|
||
|