From ea75c725eebcca4cc71be3c1fba4d8ad4d6c06ac Mon Sep 17 00:00:00 2001 From: Dmitry Belyavskiy Date: Wed, 22 Jun 2022 12:35:27 +0200 Subject: [PATCH] Fix PPC64 Montgomery multiplication bug Related: rhbz#2098199 --- 0067-fix-ppc64-montgomery.patch | 662 ++++++++++++++++++++++++++++++++ openssl.spec | 4 + 2 files changed, 666 insertions(+) create mode 100644 0067-fix-ppc64-montgomery.patch diff --git a/0067-fix-ppc64-montgomery.patch b/0067-fix-ppc64-montgomery.patch new file mode 100644 index 0000000..a572ef8 --- /dev/null +++ b/0067-fix-ppc64-montgomery.patch @@ -0,0 +1,662 @@ +diff --git a/crypto/bn/asm/ppc64-mont-fixed.pl b/crypto/bn/asm/ppc64-mont-fixed.pl +index 56df89dc27da..e69de29bb2d1 100755 +--- a/crypto/bn/asm/ppc64-mont-fixed.pl ++++ b/crypto/bn/asm/ppc64-mont-fixed.pl +@@ -1,581 +0,0 @@ +-#! /usr/bin/env perl +-# Copyright 2021 The OpenSSL Project Authors. All Rights Reserved. +-# +-# Licensed under the Apache License 2.0 (the "License"). You may not use +-# this file except in compliance with the License. You can obtain a copy +-# in the file LICENSE in the source distribution or at +-# https://www.openssl.org/source/license.html +- +-# ==================================================================== +-# Written by Amitay Isaacs , Martin Schwenke +-# & Alastair D'Silva for +-# the OpenSSL project. +-# ==================================================================== +- +-# +-# Fixed length (n=6), unrolled PPC Montgomery Multiplication +-# +- +-# 2021 +-# +-# Although this is a generic implementation for unrolling Montgomery +-# Multiplication for arbitrary values of n, this is currently only +-# used for n = 6 to improve the performance of ECC p384. +-# +-# Unrolling allows intermediate results to be stored in registers, +-# rather than on the stack, improving performance by ~7% compared to +-# the existing PPC assembly code. +-# +-# The ISA 3.0 implementation uses combination multiply/add +-# instructions (maddld, maddhdu) to improve performance by an +-# additional ~10% on Power 9. +-# +-# Finally, saving non-volatile registers into volatile vector +-# registers instead of onto the stack saves a little more. +-# +-# On a Power 9 machine we see an overall improvement of ~18%. +-# +- +-use strict; +-use warnings; +- +-my ($flavour, $output, $dir, $xlate); +- +-# $output is the last argument if it looks like a file (it has an extension) +-# $flavour is the first argument if it doesn't look like a file +-$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +-$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; +- +-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +-die "can't locate ppc-xlate.pl"; +- +-open STDOUT,"| $^X $xlate $flavour \"$output\"" +- or die "can't call $xlate: $!"; +- +-if ($flavour !~ /64/) { +- die "bad flavour ($flavour) - only ppc64 permitted"; +-} +- +-my $SIZE_T= 8; +- +-# Registers are global so the code is remotely readable +- +-# Parameters for Montgomery multiplication +-my $sp = "r1"; +-my $toc = "r2"; +-my $rp = "r3"; +-my $ap = "r4"; +-my $bp = "r5"; +-my $np = "r6"; +-my $n0 = "r7"; +-my $num = "r8"; +- +-my $i = "r9"; +-my $c0 = "r10"; +-my $bp0 = "r11"; +-my $bpi = "r11"; +-my $bpj = "r11"; +-my $tj = "r12"; +-my $apj = "r12"; +-my $npj = "r12"; +-my $lo = "r14"; +-my $c1 = "r14"; +- +-# Non-volatile registers used for tp[i] +-# +-# 12 registers are available but the limit on unrolling is 10, +-# since registers from $tp[0] to $tp[$n+1] are used. +-my @tp = ("r20" .. "r31"); +- +-# volatile VSRs for saving non-volatile GPRs - faster than stack +-my @vsrs = ("v32" .. "v46"); +- +-package Mont; +- +-sub new($$) +-{ +- my ($class, $n) = @_; +- +- if ($n > 10) { +- die "Can't unroll for BN length ${n} (maximum 10)" +- } +- +- my $self = { +- code => "", +- n => $n, +- }; +- bless $self, $class; +- +- return $self; +-} +- +-sub add_code($$) +-{ +- my ($self, $c) = @_; +- +- $self->{code} .= $c; +-} +- +-sub get_code($) +-{ +- my ($self) = @_; +- +- return $self->{code}; +-} +- +-sub get_function_name($) +-{ +- my ($self) = @_; +- +- return "bn_mul_mont_fixed_n" . $self->{n}; +-} +- +-sub get_label($$) +-{ +- my ($self, $l) = @_; +- +- return "L" . $l . "_" . $self->{n}; +-} +- +-sub get_labels($@) +-{ +- my ($self, @labels) = @_; +- +- my %out = (); +- +- foreach my $l (@labels) { +- $out{"$l"} = $self->get_label("$l"); +- } +- +- return \%out; +-} +- +-sub nl($) +-{ +- my ($self) = @_; +- +- $self->add_code("\n"); +-} +- +-sub copy_result($) +-{ +- my ($self) = @_; +- +- my ($n) = $self->{n}; +- +- for (my $j = 0; $j < $n; $j++) { +- $self->add_code(<<___); +- std $tp[$j],`$j*$SIZE_T`($rp) +-___ +- } +- +-} +- +-sub mul_mont_fixed($) +-{ +- my ($self) = @_; +- +- my ($n) = $self->{n}; +- my $fname = $self->get_function_name(); +- my $label = $self->get_labels("outer", "enter", "sub", "copy", "end"); +- +- $self->add_code(<<___); +- +-.globl .${fname} +-.align 5 +-.${fname}: +- +-___ +- +- $self->save_registers(); +- +- $self->add_code(<<___); +- ld $n0,0($n0) +- +- ld $bp0,0($bp) +- +- ld $apj,0($ap) +-___ +- +- $self->mul_c_0($tp[0], $apj, $bp0, $c0); +- +- for (my $j = 1; $j < $n - 1; $j++) { +- $self->add_code(<<___); +- ld $apj,`$j*$SIZE_T`($ap) +-___ +- $self->mul($tp[$j], $apj, $bp0, $c0); +- } +- +- $self->add_code(<<___); +- ld $apj,`($n-1)*$SIZE_T`($ap) +-___ +- +- $self->mul_last($tp[$n-1], $tp[$n], $apj, $bp0, $c0); +- +- $self->add_code(<<___); +- li $tp[$n+1],0 +- +-___ +- +- $self->add_code(<<___); +- li $i,0 +- mtctr $num +- b $label->{"enter"} +- +-.align 4 +-$label->{"outer"}: +- ldx $bpi,$bp,$i +- +- ld $apj,0($ap) +-___ +- +- $self->mul_add_c_0($tp[0], $tp[0], $apj, $bpi, $c0); +- +- for (my $j = 1; $j < $n; $j++) { +- $self->add_code(<<___); +- ld $apj,`$j*$SIZE_T`($ap) +-___ +- $self->mul_add($tp[$j], $tp[$j], $apj, $bpi, $c0); +- } +- +- $self->add_code(<<___); +- addc $tp[$n],$tp[$n],$c0 +- addze $tp[$n+1],$tp[$n+1] +-___ +- +- $self->add_code(<<___); +-.align 4 +-$label->{"enter"}: +- mulld $bpi,$tp[0],$n0 +- +- ld $npj,0($np) +-___ +- +- $self->mul_add_c_0($lo, $tp[0], $bpi, $npj, $c0); +- +- for (my $j = 1; $j < $n; $j++) { +- $self->add_code(<<___); +- ld $npj,`$j*$SIZE_T`($np) +-___ +- $self->mul_add($tp[$j-1], $tp[$j], $npj, $bpi, $c0); +- } +- +- $self->add_code(<<___); +- addc $tp[$n-1],$tp[$n],$c0 +- addze $tp[$n],$tp[$n+1] +- +- addi $i,$i,$SIZE_T +- bdnz $label->{"outer"} +- +- and. $tp[$n],$tp[$n],$tp[$n] +- bne $label->{"sub"} +- +- cmpld $tp[$n-1],$npj +- blt $label->{"copy"} +- +-$label->{"sub"}: +-___ +- +- # +- # Reduction +- # +- +- $self->add_code(<<___); +- ld $bpj,`0*$SIZE_T`($np) +- subfc $c1,$bpj,$tp[0] +- std $c1,`0*$SIZE_T`($rp) +- +-___ +- for (my $j = 1; $j < $n - 1; $j++) { +- $self->add_code(<<___); +- ld $bpj,`$j*$SIZE_T`($np) +- subfe $c1,$bpj,$tp[$j] +- std $c1,`$j*$SIZE_T`($rp) +- +-___ +- } +- +- $self->add_code(<<___); +- subfe $c1,$npj,$tp[$n-1] +- std $c1,`($n-1)*$SIZE_T`($rp) +- +-___ +- +- $self->add_code(<<___); +- addme. $tp[$n],$tp[$n] +- beq $label->{"end"} +- +-$label->{"copy"}: +-___ +- +- $self->copy_result(); +- +- $self->add_code(<<___); +- +-$label->{"end"}: +-___ +- +- $self->restore_registers(); +- +- $self->add_code(<<___); +- li r3,1 +- blr +-.size .${fname},.-.${fname} +-___ +- +-} +- +-package Mont::GPR; +- +-our @ISA = ('Mont'); +- +-sub new($$) +-{ +- my ($class, $n) = @_; +- +- return $class->SUPER::new($n); +-} +- +-sub save_registers($) +-{ +- my ($self) = @_; +- +- my $n = $self->{n}; +- +- $self->add_code(<<___); +- std $lo,-8($sp) +-___ +- +- for (my $j = 0; $j <= $n+1; $j++) { +- $self->{code}.=<<___; +- std $tp[$j],-`($j+2)*8`($sp) +-___ +- } +- +- $self->add_code(<<___); +- +-___ +-} +- +-sub restore_registers($) +-{ +- my ($self) = @_; +- +- my $n = $self->{n}; +- +- $self->add_code(<<___); +- ld $lo,-8($sp) +-___ +- +- for (my $j = 0; $j <= $n+1; $j++) { +- $self->{code}.=<<___; +- ld $tp[$j],-`($j+2)*8`($sp) +-___ +- } +- +- $self->{code} .=<<___; +- +-___ +-} +- +-# Direct translation of C mul() +-sub mul($$$$$) +-{ +- my ($self, $r, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- mulld $lo,$a,$w +- addc $r,$lo,$c +- mulhdu $c,$a,$w +- addze $c,$c +- +-___ +-} +- +-# Like mul() but $c is ignored as an input - an optimisation to save a +-# preliminary instruction that would set input $c to 0 +-sub mul_c_0($$$$$) +-{ +- my ($self, $r, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- mulld $r,$a,$w +- mulhdu $c,$a,$w +- +-___ +-} +- +-# Like mul() but does not to the final addition of CA into $c - an +-# optimisation to save an instruction +-sub mul_last($$$$$$) +-{ +- my ($self, $r1, $r2, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- mulld $lo,$a,$w +- addc $r1,$lo,$c +- mulhdu $c,$a,$w +- +- addze $r2,$c +-___ +-} +- +-# Like C mul_add() but allow $r_out and $r_in to be different +-sub mul_add($$$$$$) +-{ +- my ($self, $r_out, $r_in, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- mulld $lo,$a,$w +- addc $lo,$lo,$c +- mulhdu $c,$a,$w +- addze $c,$c +- addc $r_out,$r_in,$lo +- addze $c,$c +- +-___ +-} +- +-# Like mul_add() but $c is ignored as an input - an optimisation to save a +-# preliminary instruction that would set input $c to 0 +-sub mul_add_c_0($$$$$$) +-{ +- my ($self, $r_out, $r_in, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- mulld $lo,$a,$w +- addc $r_out,$r_in,$lo +- mulhdu $c,$a,$w +- addze $c,$c +- +-___ +-} +- +-package Mont::GPR_300; +- +-our @ISA = ('Mont::GPR'); +- +-sub new($$) +-{ +- my ($class, $n) = @_; +- +- my $mont = $class->SUPER::new($n); +- +- return $mont; +-} +- +-sub get_function_name($) +-{ +- my ($self) = @_; +- +- return "bn_mul_mont_300_fixed_n" . $self->{n}; +-} +- +-sub get_label($$) +-{ +- my ($self, $l) = @_; +- +- return "L" . $l . "_300_" . $self->{n}; +-} +- +-# Direct translation of C mul() +-sub mul($$$$$) +-{ +- my ($self, $r, $a, $w, $c, $last) = @_; +- +- $self->add_code(<<___); +- maddld $r,$a,$w,$c +- maddhdu $c,$a,$w,$c +- +-___ +-} +- +-# Save the last carry as the final entry +-sub mul_last($$$$$) +-{ +- my ($self, $r1, $r2, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- maddld $r1,$a,$w,$c +- maddhdu $r2,$a,$w,$c +- +-___ +-} +- +-# Like mul() but $c is ignored as an input - an optimisation to save a +-# preliminary instruction that would set input $c to 0 +-sub mul_c_0($$$$$) +-{ +- my ($self, $r, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- mulld $r,$a,$w +- mulhdu $c,$a,$w +- +-___ +-} +- +-# Like C mul_add() but allow $r_out and $r_in to be different +-sub mul_add($$$$$$) +-{ +- my ($self, $r_out, $r_in, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- maddld $lo,$a,$w,$c +- maddhdu $c,$a,$w,$c +- addc $r_out,$r_in,$lo +- addze $c,$c +- +-___ +-} +- +-# Like mul_add() but $c is ignored as an input - an optimisation to save a +-# preliminary instruction that would set input $c to 0 +-sub mul_add_c_0($$$$$$) +-{ +- my ($self, $r_out, $r_in, $a, $w, $c) = @_; +- +- $self->add_code(<<___); +- maddld $lo,$a,$w,$r_in +- maddhdu $c,$a,$w,$r_in +-___ +- +- if ($r_out ne $lo) { +- $self->add_code(<<___); +- mr $r_out,$lo +-___ +- } +- +- $self->nl(); +-} +- +- +-package main; +- +-my $code; +- +-$code.=<<___; +-.machine "any" +-.text +-___ +- +-my $mont; +- +-$mont = new Mont::GPR(6); +-$mont->mul_mont_fixed(); +-$code .= $mont->get_code(); +- +-$mont = new Mont::GPR_300(6); +-$mont->mul_mont_fixed(); +-$code .= $mont->get_code(); +- +-$code =~ s/\`([^\`]*)\`/eval $1/gem; +- +-$code.=<<___; +-.asciz "Montgomery Multiplication for PPC by , " +-___ +- +-print $code; +-close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/bn/bn_ppc.c b/crypto/bn/bn_ppc.c +index 1e9421bee213..3ee76ea96574 100644 +--- a/crypto/bn/bn_ppc.c ++++ b/crypto/bn/bn_ppc.c +@@ -19,12 +19,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, int num); + int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, int num); +- int bn_mul_mont_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap, +- const BN_ULONG *bp, const BN_ULONG *np, +- const BN_ULONG *n0, int num); +- int bn_mul_mont_300_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap, +- const BN_ULONG *bp, const BN_ULONG *np, +- const BN_ULONG *n0, int num); + + if (num < 4) + return 0; +@@ -40,14 +34,5 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + * no opportunity to figure it out... + */ + +-#if defined(_ARCH_PPC64) +- if (num == 6) { +- if (OPENSSL_ppccap_P & PPC_MADD300) +- return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num); +- else +- return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num); +- } +-#endif +- + return bn_mul_mont_int(rp, ap, bp, np, n0, num); + } +diff --git a/crypto/bn/build.info b/crypto/bn/build.info +index 987a70ae263b..4f8d0689b5ea 100644 +--- a/crypto/bn/build.info ++++ b/crypto/bn/build.info +@@ -79,7 +79,7 @@ IF[{- !$disabled{asm} -}] + + $BNASM_ppc32=bn_ppc.c bn-ppc.s ppc-mont.s + $BNDEF_ppc32=OPENSSL_BN_ASM_MONT +- $BNASM_ppc64=$BNASM_ppc32 ppc64-mont-fixed.s ++ $BNASM_ppc64=$BNASM_ppc32 + $BNDEF_ppc64=$BNDEF_ppc32 + + $BNASM_c64xplus=asm/bn-c64xplus.asm +@@ -173,7 +173,6 @@ GENERATE[parisc-mont.s]=asm/parisc-mont.pl + GENERATE[bn-ppc.s]=asm/ppc.pl + GENERATE[ppc-mont.s]=asm/ppc-mont.pl + GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl +-GENERATE[ppc64-mont-fixed.s]=asm/ppc64-mont-fixed.pl + + GENERATE[alpha-mont.S]=asm/alpha-mont.pl + +diff --git a/test/recipes/30-test_evp_data/evppkey_ecdsa.txt b/test/recipes/30-test_evp_data/evppkey_ecdsa.txt +index f36982845db4..1543ed9f7534 100644 +--- a/test/recipes/30-test_evp_data/evppkey_ecdsa.txt ++++ b/test/recipes/30-test_evp_data/evppkey_ecdsa.txt +@@ -97,6 +97,18 @@ Key = P-256-PUBLIC + Input = "Hello World" + Output = 3046022100e7515177ec3817b77a4a94066ab3070817b7aa9d44a8a09f040da250116e8972022100ba59b0f631258e59a9026be5d84f60685f4cf22b9165a0c2736d5c21c8ec1862 + ++PublicKey=P-384-PUBLIC ++-----BEGIN PUBLIC KEY----- ++MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAES/TlL5WEJ+u1kV+4yVlVUbTTo/2rZ7rd ++nWwwk/QlukNjDfcfQvDrfOqpTZ9kSKhd0wMxWIJJ/S/cCzCex+2EgbwW8ngAwT19 ++twD8guGxyFRaoMDTtW47/nifwYqRaIfC ++-----END PUBLIC KEY----- ++ ++DigestVerify = SHA384 ++Key = P-384-PUBLIC ++Input = "123400" ++Output = 304d0218389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68b023100ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970 ++ + # Oneshot tests + OneShotDigestVerify = SHA256 + Key = P-256-PUBLIC diff --git a/openssl.spec b/openssl.spec index 7a92ccf..64ccc85 100644 --- a/openssl.spec +++ b/openssl.spec @@ -128,6 +128,8 @@ Patch65: 0065-CVE-2022-1292.patch # https://github.com/openssl/openssl/pull/18444 # https://github.com/openssl/openssl/pull/18467 Patch66: 0066-replace-expired-certs.patch +# https://github.com/openssl/openssl/pull/18512 +Patch67: 0067-fix-ppc64-montgomery.patch License: ASL 2.0 URL: http://www.openssl.org/ @@ -466,6 +468,8 @@ install -m644 %{SOURCE9} \ - Related: rhbz#2053289 - Improve diagnostics when passing unsupported groups in TLS - Related: rhbz#2070197 +- Fix PPC64 Montgomery multiplication bug +- Related: rhbz#2098199 * Wed Jun 08 2022 Clemens Lang - 1:3.0.1-35 - Add explicit indicators for signatures in FIPS mode and mark signature