From 300c6315d2e644ae81b43fa2dd7bbf68b3afb5b2 Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Thu, 18 Nov 2021 19:02:03 +0100 Subject: [PATCH 1/2] accelerated: fix CPU feature detection for Intel CPUs This fixes read_cpuid_vals to correctly read the CPUID quadruple, as well as to set the bit the ustream CRYPTOGAMS uses to identify Intel CPUs. Suggested by Rafael Gieschke in: https://gitlab.com/gnutls/gnutls/-/issues/1282 Signed-off-by: Daiki Ueno --- lib/accelerated/x86/x86-common.c | 91 +++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 20 deletions(-) diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c index 3845c6b4c9..cf615ef24f 100644 --- a/lib/accelerated/x86/x86-common.c +++ b/lib/accelerated/x86/x86-common.c @@ -81,15 +81,38 @@ unsigned int _gnutls_x86_cpuid_s[4]; # define bit_AVX 0x10000000 #endif -#ifndef OSXSAVE_MASK -/* OSXSAVE|FMA|MOVBE */ -# define OSXSAVE_MASK (0x8000000|0x1000|0x400000) +#ifndef bit_AVX2 +# define bit_AVX2 0x00000020 +#endif + +#ifndef bit_AVX512F +# define bit_AVX512F 0x00010000 +#endif + +#ifndef bit_AVX512IFMA +# define bit_AVX512IFMA 0x00200000 +#endif + +#ifndef bit_AVX512BW +# define bit_AVX512BW 0x40000000 +#endif + +#ifndef bit_AVX512VL +# define bit_AVX512VL 0x80000000 +#endif + +#ifndef bit_OSXSAVE +# define bit_OSXSAVE 0x8000000 #endif #ifndef bit_MOVBE # define bit_MOVBE 0x00400000 #endif +#ifndef OSXSAVE_MASK +# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE) +#endif + #define via_bit_PADLOCK (0x3 << 6) #define via_bit_PADLOCK_PHE (0x3 << 10) #define via_bit_PADLOCK_PHE_SHA512 (0x3 << 25) @@ -127,7 +150,7 @@ static unsigned read_cpuid_vals(unsigned int vals[4]) unsigned t1, t2, t3; vals[0] = vals[1] = vals[2] = vals[3] = 0; - if (!__get_cpuid(1, &t1, &vals[0], &vals[1], &t2)) + if (!__get_cpuid(1, &t1, &t2, &vals[1], &vals[0])) return 0; /* suppress AVX512; it works conditionally on certain CPUs on the original code */ vals[1] &= 0xfffff7ff; @@ -145,7 +168,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx) { uint32_t xcr0; - if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK) + if ((ecx & bit_OSXSAVE) != bit_OSXSAVE) return 0; #if defined(_MSC_VER) && !defined(__clang__) @@ -233,10 +256,7 @@ static unsigned check_sha(void) #ifdef ASM_X86_64 static unsigned check_avx_movbe(void) { - if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0) - return 0; - - return ((_gnutls_x86_cpuid_s[1] & bit_AVX)); + return (_gnutls_x86_cpuid_s[1] & bit_AVX); } static unsigned check_pclmul(void) @@ -514,33 +534,47 @@ void register_x86_padlock_crypto(unsigned capabilities) } #endif -static unsigned check_intel_or_amd(void) +enum x86_cpu_vendor { + X86_CPU_VENDOR_OTHER, + X86_CPU_VENDOR_INTEL, + X86_CPU_VENDOR_AMD, +}; + +static enum x86_cpu_vendor check_x86_cpu_vendor(void) { unsigned int a, b, c, d; - if (!__get_cpuid(0, &a, &b, &c, &d)) - return 0; + if (!__get_cpuid(0, &a, &b, &c, &d)) { + return X86_CPU_VENDOR_OTHER; + } - if ((memcmp(&b, "Genu", 4) == 0 && - memcmp(&d, "ineI", 4) == 0 && - memcmp(&c, "ntel", 4) == 0) || - (memcmp(&b, "Auth", 4) == 0 && - memcmp(&d, "enti", 4) == 0 && memcmp(&c, "cAMD", 4) == 0)) { - return 1; + if (memcmp(&b, "Genu", 4) == 0 && + memcmp(&d, "ineI", 4) == 0 && + memcmp(&c, "ntel", 4) == 0) { + return X86_CPU_VENDOR_INTEL; } - return 0; + if (memcmp(&b, "Auth", 4) == 0 && + memcmp(&d, "enti", 4) == 0 && + memcmp(&c, "cAMD", 4) == 0) { + return X86_CPU_VENDOR_AMD; + } + + return X86_CPU_VENDOR_OTHER; } static void register_x86_intel_crypto(unsigned capabilities) { int ret; + enum x86_cpu_vendor vendor; memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s)); - if (check_intel_or_amd() == 0) + vendor = check_x86_cpu_vendor(); + if (vendor == X86_CPU_VENDOR_OTHER) { return; + } if (capabilities == 0) { if (!read_cpuid_vals(_gnutls_x86_cpuid_s)) @@ -549,6 +583,23 @@ void register_x86_intel_crypto(unsigned capabilities) capabilities_to_intel_cpuid(capabilities); } + /* CRYPTOGAMS uses the (1 << 30) bit as an indicator of Intel CPUs */ + if (vendor == X86_CPU_VENDOR_INTEL) { + _gnutls_x86_cpuid_s[0] |= 1 << 30; + } else { + _gnutls_x86_cpuid_s[0] &= ~(1 << 30); + } + + if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) { + _gnutls_x86_cpuid_s[1] &= ~bit_AVX; + + /* Clear AVX2 bits as well, according to what OpenSSL does. + * Should we clear bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, and + * bit_AVX512CD? */ + _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|bit_AVX512F|bit_AVX512IFMA| + bit_AVX512BW|bit_AVX512BW); + } + if (check_ssse3()) { _gnutls_debug_log("Intel SSSE3 was detected\n"); -- 2.37.3 From cd509dac9e6d1bf76fd12c72c1fd61f1708c254a Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Mon, 15 Aug 2022 09:39:18 +0900 Subject: [PATCH 2/2] accelerated: clear AVX bits if it cannot be queried through XSAVE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32 Architectures Software Developer’s Manual". GnuTLS previously only followed that algorithm when registering the crypto backend, while the CRYPTOGAMS derived SHA code assembly expects that the extension bits are propagated to _gnutls_x86_cpuid_s. Signed-off-by: Daiki Ueno --- lib/accelerated/x86/x86-common.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c index cf615ef24f..655d0c65f2 100644 --- a/lib/accelerated/x86/x86-common.c +++ b/lib/accelerated/x86/x86-common.c @@ -210,7 +210,8 @@ static void capabilities_to_intel_cpuid(unsigned capabilities) } if (capabilities & INTEL_AVX) { - if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) { + if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) && + check_4th_gen_intel_features(a[1])) { _gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE; } else { _gnutls_debug_log @@ -256,7 +257,7 @@ static unsigned check_sha(void) #ifdef ASM_X86_64 static unsigned check_avx_movbe(void) { - return (_gnutls_x86_cpuid_s[1] & bit_AVX); + return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE); } static unsigned check_pclmul(void) @@ -579,6 +580,19 @@ void register_x86_intel_crypto(unsigned capabilities) if (capabilities == 0) { if (!read_cpuid_vals(_gnutls_x86_cpuid_s)) return; + if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) { + _gnutls_x86_cpuid_s[1] &= ~bit_AVX; + + /* Clear AVX2 bits as well, according to what + * OpenSSL does. Should we clear + * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, + * and bit_AVX512CD? */ + _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2| + bit_AVX512F| + bit_AVX512IFMA| + bit_AVX512BW| + bit_AVX512BW); + } } else { capabilities_to_intel_cpuid(capabilities); } -- 2.37.3