From fdd1e62fc4917fe1d331d95296aeae1ff2f080d8 Mon Sep 17 00:00:00 2001 From: Dmitry Belyavskiy Date: Wed, 14 Aug 2024 13:03:07 +0200 Subject: [PATCH] Speedup SSL_add_{file,dir}_cert_subjects_to_stack Resolves: RHEL-54232 --- ...eedup-SSL_add_cert_subjects_to_stack.patch | 201 ++++++++++++++++++ openssl.spec | 4 + 2 files changed, 205 insertions(+) create mode 100644 0127-speedup-SSL_add_cert_subjects_to_stack.patch diff --git a/0127-speedup-SSL_add_cert_subjects_to_stack.patch b/0127-speedup-SSL_add_cert_subjects_to_stack.patch new file mode 100644 index 0000000..a6bd503 --- /dev/null +++ b/0127-speedup-SSL_add_cert_subjects_to_stack.patch @@ -0,0 +1,201 @@ +From e2e469593a15681983d16e36d856bf8fb7de8589 Mon Sep 17 00:00:00 2001 +From: Clemens Lang +Date: Wed, 31 Jul 2024 12:45:11 +0200 +Subject: [PATCH] Speed up SSL_add_{file,dir}_cert_subjects_to_stack +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The X509_NAME comparison function converts its arguments to DER using +i2d_X509_NAME before comparing the results using memcmp(). For every +invocation of the comparison function (of which there are many when +loading many certificates), it allocates two buffers of the appropriate +size for the DER encoding. + +Switching to static buffers (possibly of X509_NAME_MAX size as defined +in crypto/x509/x_name.c) would not work with multithreaded use, e.g., +when two threads sort two separate STACK_OF(X509_NAME)s at the same +time. A suitable re-usable buffer could have been added to the +STACK_OF(X509_NAME) if sk_X509_NAME_compfunc did have a void* argument, +or a pointer to the STACK_OF(X509_NAME) – but it does not. + +Instead, copy the solution chosen in SSL_load_client_CA_file() by +filling an LHASH_OF(X509_NAME) with all existing names in the stack and +using that to deduplicate, rather than relying on sk_X509_NAME_find(), +which ends up being very slow. + +Adjust SSL_add_dir_cert_subjects_to_stack() to keep a local +LHASH_OF(X509_NAME)s over the complete directory it is processing. + +In a small benchmark that calls SSL_add_dir_cert_subjects_to_stack() +twice, once on a directory with one entry, and once with a directory +with 1000 certificates, and repeats this in a loop 10 times, this change +yields a speed-up of 5.32: + +| Benchmark 1: ./bench 10 dir-1 dir-1000 +| Time (mean ± σ): 6.685 s ± 0.017 s [User: 6.402 s, System: 0.231 s] +| Range (min … max): 6.658 s … 6.711 s 10 runs +| +| Benchmark 2: LD_LIBRARY_PATH=. ./bench 10 dir-1 dir-1000 +| Time (mean ± σ): 1.256 s ± 0.013 s [User: 1.034 s, System: 0.212 s] +| Range (min … max): 1.244 s … 1.286 s 10 runs +| +| Summary +| LD_LIBRARY_PATH=. ./bench 10 dir-1 dir-1000 ran +| 5.32 ± 0.06 times faster than ./bench 10 dir-1 dir-1000 + +In the worst case scenario where many entries are added to a stack that +is then repeatedly used to add more certificates, and with a larger test +size, the speedup is still very significant. With 15000 certificates, +a single pass to load them, followed by attempting to load a subset of +1000 of these 15000 certificates, followed by a single certificate, the +new approach is ~85 times faster: + +| Benchmark 1: ./bench 1 dir-15000 dir-1000 dir-1 +| Time (mean ± σ): 176.295 s ± 4.147 s [User: 174.593 s, System: 0.448 s] +| Range (min … max): 173.774 s … 185.594 s 10 runs +| +| Benchmark 2: LD_LIBRARY_PATH=. ./bench 1 dir-15000 dir-1000 dir-1 +| Time (mean ± σ): 2.087 s ± 0.034 s [User: 1.679 s, System: 0.393 s] +| Range (min … max): 2.057 s … 2.167 s 10 runs +| +| Summary +| LD_LIBRARY_PATH=. ./bench 1 dir-15000 dir-1000 dir-1 ran +| 84.48 ± 2.42 times faster than ./bench 1 dir-15000 dir-1000 dir-1 + +Signed-off-by: Clemens Lang +--- + ssl/ssl_cert.c | 74 ++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 65 insertions(+), 9 deletions(-) + +diff --git a/ssl/ssl_cert.c b/ssl/ssl_cert.c +index 0ff407bf55edc..5e5ffe39d0655 100644 +--- a/ssl/ssl_cert.c ++++ b/ssl/ssl_cert.c +@@ -813,16 +813,14 @@ STACK_OF(X509_NAME) *SSL_load_client_CA_file(const char *file) + return SSL_load_client_CA_file_ex(file, NULL, NULL); + } + +-int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, +- const char *file) ++static int add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, ++ const char *file, ++ LHASH_OF(X509_NAME) *name_hash) + { + BIO *in; + X509 *x = NULL; + X509_NAME *xn = NULL; + int ret = 1; +- int (*oldcmp) (const X509_NAME *const *a, const X509_NAME *const *b); +- +- oldcmp = sk_X509_NAME_set_cmp_func(stack, xname_sk_cmp); + + in = BIO_new(BIO_s_file()); + +@@ -842,12 +840,15 @@ int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, + xn = X509_NAME_dup(xn); + if (xn == NULL) + goto err; +- if (sk_X509_NAME_find(stack, xn) >= 0) { ++ if (lh_X509_NAME_retrieve(name_hash, xn) != NULL) { + /* Duplicate. */ + X509_NAME_free(xn); + } else if (!sk_X509_NAME_push(stack, xn)) { + X509_NAME_free(xn); + goto err; ++ } else { ++ /* Successful insert, add to hash table */ ++ lh_X509_NAME_insert(name_hash, xn); + } + } + +@@ -859,7 +860,42 @@ int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, + done: + BIO_free(in); + X509_free(x); +- (void)sk_X509_NAME_set_cmp_func(stack, oldcmp); ++ return ret; ++} ++ ++int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, ++ const char *file) ++{ ++ X509_NAME *xn = NULL; ++ int ret = 1; ++ int idx = 0; ++ int num = 0; ++ LHASH_OF(X509_NAME) *name_hash = lh_X509_NAME_new(xname_hash, xname_cmp); ++ ++ if (name_hash == NULL) { ++ ERR_raise(ERR_LIB_SSL, ERR_R_CRYPTO_LIB); ++ goto err; ++ } ++ ++ /* ++ * Pre-populate the lhash with the existing entries of the stack, since ++ * using the LHASH_OF is much faster for duplicate checking. That's because ++ * xname_cmp converts the X509_NAMEs to DER involving a memory allocation ++ * for every single invocation of the comparison function. ++ */ ++ num = sk_X509_NAME_num(stack); ++ for (idx = 0; idx < num; idx++) { ++ xn = sk_X509_NAME_value(stack, idx); ++ lh_X509_NAME_insert(name_hash, xn); ++ } ++ ++ ret = add_file_cert_subjects_to_stack(stack, file, name_hash); ++ goto done; ++ ++ err: ++ ret = 0; ++ done: ++ lh_X509_NAME_free(name_hash); + return ret; + } + +@@ -869,8 +905,27 @@ int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, + OPENSSL_DIR_CTX *d = NULL; + const char *filename; + int ret = 0; ++ X509_NAME *xn = NULL; ++ int idx = 0; ++ int num = 0; ++ LHASH_OF(X509_NAME) *name_hash = lh_X509_NAME_new(xname_hash, xname_cmp); ++ ++ if (name_hash == NULL) { ++ ERR_raise(ERR_LIB_SSL, ERR_R_CRYPTO_LIB); ++ goto err; ++ } + +- /* Note that a side effect is that the CAs will be sorted by name */ ++ /* ++ * Pre-populate the lhash with the existing entries of the stack, since ++ * using the LHASH_OF is much faster for duplicate checking. That's because ++ * xname_cmp converts the X509_NAMEs to DER involving a memory allocation ++ * for every single invocation of the comparison function. ++ */ ++ num = sk_X509_NAME_num(stack); ++ for (idx = 0; idx < num; idx++) { ++ xn = sk_X509_NAME_value(stack, idx); ++ lh_X509_NAME_insert(name_hash, xn); ++ } + + while ((filename = OPENSSL_DIR_read(&d, dir))) { + char buf[1024]; +@@ -899,7 +954,7 @@ int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, + #endif + if (r <= 0 || r >= (int)sizeof(buf)) + goto err; +- if (!SSL_add_file_cert_subjects_to_stack(stack, buf)) ++ if (!add_file_cert_subjects_to_stack(stack, buf, name_hash)) + goto err; + } + +@@ -915,6 +970,7 @@ int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, + err: + if (d) + OPENSSL_DIR_end(&d); ++ lh_X509_NAME_free(name_hash); + + return ret; + } diff --git a/openssl.spec b/openssl.spec index 66baaf6..e053321 100644 --- a/openssl.spec +++ b/openssl.spec @@ -166,6 +166,8 @@ Patch124: 0124-PBMAC1-PKCS12-FIPS-support.patch Patch125: 0125-PBMAC1-PKCS12-FIPS-default.patch # https://github.com/openssl/openssl/issues/25127 Patch126: 0126-pkeyutl-encap.patch +# https://github.com/openssl/openssl/issues/25056 +Patch127: 0127-speedup-SSL_add_cert_subjects_to_stack.patch License: Apache-2.0 URL: http://www.openssl.org/ @@ -520,6 +522,8 @@ ln -s /etc/crypto-policies/back-ends/openssl_fips.config $RPM_BUILD_ROOT%{_sysco Related: RHEL-41261 - Enable KTLS, temporary disable KTLS tests Related: RHEL-47335 +- Speedup SSL_add_{file,dir}_cert_subjects_to_stack + Resolves: RHEL-54232 * Fri Aug 09 2024 Dmitry Belyavskiy - 1:3.2.2-9 - An interface to create PKCS #12 files in FIPS compliant way