gnutls/SOURCES/gnutls-3.6.16-cpuid.patch

248 lines
6.9 KiB
Diff
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 300c6315d2e644ae81b43fa2dd7bbf68b3afb5b2 Mon Sep 17 00:00:00 2001
From: Daiki Ueno <ueno@gnu.org>
Date: Thu, 18 Nov 2021 19:02:03 +0100
Subject: [PATCH 1/2] accelerated: fix CPU feature detection for Intel CPUs
This fixes read_cpuid_vals to correctly read the CPUID quadruple, as
well as to set the bit the ustream CRYPTOGAMS uses to identify Intel
CPUs.
Suggested by Rafael Gieschke in:
https://gitlab.com/gnutls/gnutls/-/issues/1282
Signed-off-by: Daiki Ueno <ueno@gnu.org>
---
lib/accelerated/x86/x86-common.c | 91 +++++++++++++++++++++++++-------
1 file changed, 71 insertions(+), 20 deletions(-)
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
index 3845c6b4c9..cf615ef24f 100644
--- a/lib/accelerated/x86/x86-common.c
+++ b/lib/accelerated/x86/x86-common.c
@@ -81,15 +81,38 @@ unsigned int _gnutls_x86_cpuid_s[4];
# define bit_AVX 0x10000000
#endif
-#ifndef OSXSAVE_MASK
-/* OSXSAVE|FMA|MOVBE */
-# define OSXSAVE_MASK (0x8000000|0x1000|0x400000)
+#ifndef bit_AVX2
+# define bit_AVX2 0x00000020
+#endif
+
+#ifndef bit_AVX512F
+# define bit_AVX512F 0x00010000
+#endif
+
+#ifndef bit_AVX512IFMA
+# define bit_AVX512IFMA 0x00200000
+#endif
+
+#ifndef bit_AVX512BW
+# define bit_AVX512BW 0x40000000
+#endif
+
+#ifndef bit_AVX512VL
+# define bit_AVX512VL 0x80000000
+#endif
+
+#ifndef bit_OSXSAVE
+# define bit_OSXSAVE 0x8000000
#endif
#ifndef bit_MOVBE
# define bit_MOVBE 0x00400000
#endif
+#ifndef OSXSAVE_MASK
+# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
+#endif
+
#define via_bit_PADLOCK (0x3 << 6)
#define via_bit_PADLOCK_PHE (0x3 << 10)
#define via_bit_PADLOCK_PHE_SHA512 (0x3 << 25)
@@ -127,7 +150,7 @@ static unsigned read_cpuid_vals(unsigned int vals[4])
unsigned t1, t2, t3;
vals[0] = vals[1] = vals[2] = vals[3] = 0;
- if (!__get_cpuid(1, &t1, &vals[0], &vals[1], &t2))
+ if (!__get_cpuid(1, &t1, &t2, &vals[1], &vals[0]))
return 0;
/* suppress AVX512; it works conditionally on certain CPUs on the original code */
vals[1] &= 0xfffff7ff;
@@ -145,7 +168,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
{
uint32_t xcr0;
- if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
+ if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
return 0;
#if defined(_MSC_VER) && !defined(__clang__)
@@ -233,10 +256,7 @@ static unsigned check_sha(void)
#ifdef ASM_X86_64
static unsigned check_avx_movbe(void)
{
- if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
- return 0;
-
- return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
+ return (_gnutls_x86_cpuid_s[1] & bit_AVX);
}
static unsigned check_pclmul(void)
@@ -514,33 +534,47 @@ void register_x86_padlock_crypto(unsigned capabilities)
}
#endif
-static unsigned check_intel_or_amd(void)
+enum x86_cpu_vendor {
+ X86_CPU_VENDOR_OTHER,
+ X86_CPU_VENDOR_INTEL,
+ X86_CPU_VENDOR_AMD,
+};
+
+static enum x86_cpu_vendor check_x86_cpu_vendor(void)
{
unsigned int a, b, c, d;
- if (!__get_cpuid(0, &a, &b, &c, &d))
- return 0;
+ if (!__get_cpuid(0, &a, &b, &c, &d)) {
+ return X86_CPU_VENDOR_OTHER;
+ }
- if ((memcmp(&b, "Genu", 4) == 0 &&
- memcmp(&d, "ineI", 4) == 0 &&
- memcmp(&c, "ntel", 4) == 0) ||
- (memcmp(&b, "Auth", 4) == 0 &&
- memcmp(&d, "enti", 4) == 0 && memcmp(&c, "cAMD", 4) == 0)) {
- return 1;
+ if (memcmp(&b, "Genu", 4) == 0 &&
+ memcmp(&d, "ineI", 4) == 0 &&
+ memcmp(&c, "ntel", 4) == 0) {
+ return X86_CPU_VENDOR_INTEL;
}
- return 0;
+ if (memcmp(&b, "Auth", 4) == 0 &&
+ memcmp(&d, "enti", 4) == 0 &&
+ memcmp(&c, "cAMD", 4) == 0) {
+ return X86_CPU_VENDOR_AMD;
+ }
+
+ return X86_CPU_VENDOR_OTHER;
}
static
void register_x86_intel_crypto(unsigned capabilities)
{
int ret;
+ enum x86_cpu_vendor vendor;
memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s));
- if (check_intel_or_amd() == 0)
+ vendor = check_x86_cpu_vendor();
+ if (vendor == X86_CPU_VENDOR_OTHER) {
return;
+ }
if (capabilities == 0) {
if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
@@ -549,6 +583,23 @@ void register_x86_intel_crypto(unsigned capabilities)
capabilities_to_intel_cpuid(capabilities);
}
+ /* CRYPTOGAMS uses the (1 << 30) bit as an indicator of Intel CPUs */
+ if (vendor == X86_CPU_VENDOR_INTEL) {
+ _gnutls_x86_cpuid_s[0] |= 1 << 30;
+ } else {
+ _gnutls_x86_cpuid_s[0] &= ~(1 << 30);
+ }
+
+ if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
+ _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
+
+ /* Clear AVX2 bits as well, according to what OpenSSL does.
+ * Should we clear bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, and
+ * bit_AVX512CD? */
+ _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|bit_AVX512F|bit_AVX512IFMA|
+ bit_AVX512BW|bit_AVX512BW);
+ }
+
if (check_ssse3()) {
_gnutls_debug_log("Intel SSSE3 was detected\n");
--
2.37.3
From cd509dac9e6d1bf76fd12c72c1fd61f1708c254a Mon Sep 17 00:00:00 2001
From: Daiki Ueno <ueno@gnu.org>
Date: Mon, 15 Aug 2022 09:39:18 +0900
Subject: [PATCH 2/2] accelerated: clear AVX bits if it cannot be queried
through XSAVE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
Architectures Software Developers Manual".
GnuTLS previously only followed that algorithm when registering the
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
that the extension bits are propagated to _gnutls_x86_cpuid_s.
Signed-off-by: Daiki Ueno <ueno@gnu.org>
---
lib/accelerated/x86/x86-common.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
index cf615ef24f..655d0c65f2 100644
--- a/lib/accelerated/x86/x86-common.c
+++ b/lib/accelerated/x86/x86-common.c
@@ -210,7 +210,8 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
}
if (capabilities & INTEL_AVX) {
- if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
+ if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
+ check_4th_gen_intel_features(a[1])) {
_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
} else {
_gnutls_debug_log
@@ -256,7 +257,7 @@ static unsigned check_sha(void)
#ifdef ASM_X86_64
static unsigned check_avx_movbe(void)
{
- return (_gnutls_x86_cpuid_s[1] & bit_AVX);
+ return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
}
static unsigned check_pclmul(void)
@@ -579,6 +580,19 @@ void register_x86_intel_crypto(unsigned capabilities)
if (capabilities == 0) {
if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
return;
+ if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
+ _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
+
+ /* Clear AVX2 bits as well, according to what
+ * OpenSSL does. Should we clear
+ * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
+ * and bit_AVX512CD? */
+ _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
+ bit_AVX512F|
+ bit_AVX512IFMA|
+ bit_AVX512BW|
+ bit_AVX512BW);
+ }
} else {
capabilities_to_intel_cpuid(capabilities);
}
--
2.37.3