Fix PPC64 Montgomery multiplication bug

Related: rhbz#2098199
2022-06-22 12:35:27 +02:00 · 2022-06-22 12:35:27 +02:00 · ea75c725ee
commit ea75c725ee
parent f4e1bded66
2 changed files with 666 additions and 0 deletions
--- a/0067-fix-ppc64-montgomery.patch
+++ b/0067-fix-ppc64-montgomery.patch
@ -0,0 +1,662 @@
 diff --git a/crypto/bn/asm/ppc64-mont-fixed.pl b/crypto/bn/asm/ppc64-mont-fixed.pl
 index 56df89dc27da..e69de29bb2d1 100755
 --- a/crypto/bn/asm/ppc64-mont-fixed.pl
 +++ b/crypto/bn/asm/ppc64-mont-fixed.pl
@@ -1,581 +0,0 @@
 -#! /usr/bin/env perl
 -# Copyright 2021 The OpenSSL Project Authors. All Rights Reserved.
 -#
 -# Licensed under the Apache License 2.0 (the "License").  You may not use
 -# this file except in compliance with the License.  You can obtain a copy
 -# in the file LICENSE in the source distribution or at
 -# https://www.openssl.org/source/license.html
 -
 -# ====================================================================
 -# Written by Amitay Isaacs <amitay@ozlabs.org>, Martin Schwenke
 -# <martin@meltin.net> & Alastair D'Silva <alastair@d-silva.org> for
 -# the OpenSSL project.
 -# ====================================================================
 -
 -#
 -# Fixed length (n=6), unrolled PPC Montgomery Multiplication
 -#
 -
 -# 2021
 -#
 -# Although this is a generic implementation for unrolling Montgomery
 -# Multiplication for arbitrary values of n, this is currently only
 -# used for n = 6 to improve the performance of ECC p384.
 -#
 -# Unrolling allows intermediate results to be stored in registers,
 -# rather than on the stack, improving performance by ~7% compared to
 -# the existing PPC assembly code.
 -#
 -# The ISA 3.0 implementation uses combination multiply/add
 -# instructions (maddld, maddhdu) to improve performance by an
 -# additional ~10% on Power 9.
 -#
 -# Finally, saving non-volatile registers into volatile vector
 -# registers instead of onto the stack saves a little more.
 -#
 -# On a Power 9 machine we see an overall improvement of ~18%.
 -#
 -
 -use strict;
 -use warnings;
 -
 -my ($flavour, $output, $dir, $xlate);
 -
 -# $output is the last argument if it looks like a file (it has an extension)
 -# $flavour is the first argument if it doesn't look like a file
 -$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
 -$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
 -
 -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 -die "can't locate ppc-xlate.pl";
 -
 -open STDOUT,"| $^X $xlate $flavour \"$output\""
 -    or die "can't call $xlate: $!";
 -
 -if ($flavour !~ /64/) {
 -	die "bad flavour ($flavour) - only ppc64 permitted";
 -}
 -
 -my $SIZE_T= 8;
 -
 -# Registers are global so the code is remotely readable
 -
 -# Parameters for Montgomery multiplication
 -my $sp	= "r1";
 -my $toc	= "r2";
 -my $rp	= "r3";
 -my $ap	= "r4";
 -my $bp	= "r5";
 -my $np	= "r6";
 -my $n0	= "r7";
 -my $num	= "r8";
 -
 -my $i	= "r9";
 -my $c0	= "r10";
 -my $bp0	= "r11";
 -my $bpi	= "r11";
 -my $bpj	= "r11";
 -my $tj	= "r12";
 -my $apj	= "r12";
 -my $npj	= "r12";
 -my $lo	= "r14";
 -my $c1	= "r14";
 -
 -# Non-volatile registers used for tp[i]
 -#
 -# 12 registers are available but the limit on unrolling is 10,
 -# since registers from $tp[0] to $tp[$n+1] are used.
 -my @tp = ("r20" .. "r31");
 -
 -# volatile VSRs for saving non-volatile GPRs - faster than stack
 -my @vsrs = ("v32" .. "v46");
 -
 -package Mont;
 -
 -sub new($$)
 -{
 -	my ($class, $n) = @_;
 -
 -	if ($n > 10) {
 -		die "Can't unroll for BN length ${n} (maximum 10)"
 -	}
 -
 -	my $self = {
 -		code => "",
 -		n => $n,
 -	};
 -	bless $self, $class;
 -
 -	return $self;
 -}
 -
 -sub add_code($$)
 -{
 -	my ($self, $c) = @_;
 -
 -	$self->{code} .= $c;
 -}
 -
 -sub get_code($)
 -{
 -	my ($self) = @_;
 -
 -	return $self->{code};
 -}
 -
 -sub get_function_name($)
 -{
 -	my ($self) = @_;
 -
 -	return "bn_mul_mont_fixed_n" . $self->{n};
 -}
 -
 -sub get_label($$)
 -{
 -	my ($self, $l) = @_;
 -
 -	return "L" . $l . "_" . $self->{n};
 -}
 -
 -sub get_labels($@)
 -{
 -	my ($self, @labels) = @_;
 -
 -	my %out = ();
 -
 -	foreach my $l (@labels) {
 -		$out{"$l"} = $self->get_label("$l");
 -	}
 -
 -	return \%out;
 -}
 -
 -sub nl($)
 -{
 -	my ($self) = @_;
 -
 -	$self->add_code("\n");
 -}
 -
 -sub copy_result($)
 -{
 -	my ($self) = @_;
 -
 -	my ($n) = $self->{n};
 -
 -	for (my $j = 0; $j < $n; $j++) {
 -		$self->add_code(<<___);
 -	std		$tp[$j],`$j*$SIZE_T`($rp)
 -___
 -	}
 -
 -}
 -
 -sub mul_mont_fixed($)
 -{
 -	my ($self) = @_;
 -
 -	my ($n) = $self->{n};
 -	my $fname = $self->get_function_name();
 -	my $label = $self->get_labels("outer", "enter", "sub", "copy", "end");
 -
 -	$self->add_code(<<___);
 -
 -.globl	.${fname}
 -.align	5
 -.${fname}:
 -
 -___
 -
 -	$self->save_registers();
 -
 -	$self->add_code(<<___);
 -	ld		$n0,0($n0)
 -
 -	ld		$bp0,0($bp)
 -
 -	ld		$apj,0($ap)
 -___
 -
 -	$self->mul_c_0($tp[0], $apj, $bp0, $c0);
 -
 -	for (my $j = 1; $j < $n - 1; $j++) {
 -		$self->add_code(<<___);
 -	ld		$apj,`$j*$SIZE_T`($ap)
 -___
 -		$self->mul($tp[$j], $apj, $bp0, $c0);
 -	}
 -
 -	$self->add_code(<<___);
 -	ld		$apj,`($n-1)*$SIZE_T`($ap)
 -___
 -
 -	$self->mul_last($tp[$n-1], $tp[$n], $apj, $bp0, $c0);
 -
 -	$self->add_code(<<___);
 -	li		$tp[$n+1],0
 -
 -___
 -
 -	$self->add_code(<<___);
 -	li		$i,0
 -	mtctr		$num
 -	b		$label->{"enter"}
 -
 -.align	4
 -$label->{"outer"}:
 -	ldx		$bpi,$bp,$i
 -
 -	ld		$apj,0($ap)
 -___
 -
 -	$self->mul_add_c_0($tp[0], $tp[0], $apj, $bpi, $c0);
 -
 -	for (my $j = 1; $j < $n; $j++) {
 -		$self->add_code(<<___);
 -	ld		$apj,`$j*$SIZE_T`($ap)
 -___
 -		$self->mul_add($tp[$j], $tp[$j], $apj, $bpi, $c0);
 -	}
 -
 -	$self->add_code(<<___);
 -	addc		$tp[$n],$tp[$n],$c0
 -	addze		$tp[$n+1],$tp[$n+1]
 -___
 -
 -	$self->add_code(<<___);
 -.align	4
 -$label->{"enter"}:
 -	mulld		$bpi,$tp[0],$n0
 -
 -	ld		$npj,0($np)
 -___
 -
 -	$self->mul_add_c_0($lo, $tp[0], $bpi, $npj, $c0);
 -
 -	for (my $j = 1; $j < $n; $j++) {
 -		$self->add_code(<<___);
 -	ld		$npj,`$j*$SIZE_T`($np)
 -___
 -		$self->mul_add($tp[$j-1], $tp[$j], $npj, $bpi, $c0);
 -	}
 -
 -	$self->add_code(<<___);
 -	addc		$tp[$n-1],$tp[$n],$c0
 -	addze		$tp[$n],$tp[$n+1]
 -
 -	addi		$i,$i,$SIZE_T
 -	bdnz		$label->{"outer"}
 -
 -	and.		$tp[$n],$tp[$n],$tp[$n]
 -	bne		$label->{"sub"}
 -
 -	cmpld	$tp[$n-1],$npj
 -	blt		$label->{"copy"}
 -
 -$label->{"sub"}:
 -___
 -
 -	#
 -	# Reduction
 -	#
 -
 -		$self->add_code(<<___);
 -	ld		$bpj,`0*$SIZE_T`($np)
 -	subfc		$c1,$bpj,$tp[0]
 -	std		$c1,`0*$SIZE_T`($rp)
 -
 -___
 -	for (my $j = 1; $j < $n - 1; $j++) {
 -		$self->add_code(<<___);
 -	ld		$bpj,`$j*$SIZE_T`($np)
 -	subfe		$c1,$bpj,$tp[$j]
 -	std		$c1,`$j*$SIZE_T`($rp)
 -
 -___
 -	}
 -
 -		$self->add_code(<<___);
 -	subfe		$c1,$npj,$tp[$n-1]
 -	std		$c1,`($n-1)*$SIZE_T`($rp)
 -
 -___
 -
 -	$self->add_code(<<___);
 -	addme.		$tp[$n],$tp[$n]
 -	beq		$label->{"end"}
 -
 -$label->{"copy"}:
 -___
 -
 -	$self->copy_result();
 -
 -	$self->add_code(<<___);
 -
 -$label->{"end"}:
 -___
 -
 -	$self->restore_registers();
 -
 -	$self->add_code(<<___);
 -	li		r3,1
 -	blr
 -.size .${fname},.-.${fname}
 -___
 -
 -}
 -
 -package Mont::GPR;
 -
 -our @ISA = ('Mont');
 -
 -sub new($$)
 -{
 -    my ($class, $n) = @_;
 -
 -    return $class->SUPER::new($n);
 -}
 -
 -sub save_registers($)
 -{
 -	my ($self) = @_;
 -
 -	my $n = $self->{n};
 -
 -	$self->add_code(<<___);
 -	std	$lo,-8($sp)
 -___
 -
 -	for (my $j = 0; $j <= $n+1; $j++) {
 -		$self->{code}.=<<___;
 -	std	$tp[$j],-`($j+2)*8`($sp)
 -___
 -	}
 -
 -	$self->add_code(<<___);
 -
 -___
 -}
 -
 -sub restore_registers($)
 -{
 -	my ($self) = @_;
 -
 -	my $n = $self->{n};
 -
 -	$self->add_code(<<___);
 -	ld	$lo,-8($sp)
 -___
 -
 -	for (my $j = 0; $j <= $n+1; $j++) {
 -		$self->{code}.=<<___;
 -	ld	$tp[$j],-`($j+2)*8`($sp)
 -___
 -	}
 -
 -	$self->{code} .=<<___;
 -
 -___
 -}
 -
 -# Direct translation of C mul()
 -sub mul($$$$$)
 -{
 -	my ($self, $r, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	mulld		$lo,$a,$w
 -	addc		$r,$lo,$c
 -	mulhdu		$c,$a,$w
 -	addze		$c,$c
 -
 -___
 -}
 -
 -# Like mul() but $c is ignored as an input - an optimisation to save a
 -# preliminary instruction that would set input $c to 0
 -sub mul_c_0($$$$$)
 -{
 -	my ($self, $r, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	mulld		$r,$a,$w
 -	mulhdu		$c,$a,$w
 -
 -___
 -}
 -
 -# Like mul() but does not to the final addition of CA into $c - an
 -# optimisation to save an instruction
 -sub mul_last($$$$$$)
 -{
 -	my ($self, $r1, $r2, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	mulld		$lo,$a,$w
 -	addc		$r1,$lo,$c
 -	mulhdu		$c,$a,$w
 -
 -	addze		$r2,$c
 -___
 -}
 -
 -# Like C mul_add() but allow $r_out and $r_in to be different
 -sub mul_add($$$$$$)
 -{
 -	my ($self, $r_out, $r_in, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	mulld		$lo,$a,$w
 -	addc		$lo,$lo,$c
 -	mulhdu		$c,$a,$w
 -	addze		$c,$c
 -	addc		$r_out,$r_in,$lo
 -	addze		$c,$c
 -
 -___
 -}
 -
 -# Like mul_add() but $c is ignored as an input - an optimisation to save a
 -# preliminary instruction that would set input $c to 0
 -sub mul_add_c_0($$$$$$)
 -{
 -	my ($self, $r_out, $r_in, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	mulld		$lo,$a,$w
 -	addc		$r_out,$r_in,$lo
 -	mulhdu		$c,$a,$w
 -	addze		$c,$c
 -
 -___
 -}
 -
 -package Mont::GPR_300;
 -
 -our @ISA = ('Mont::GPR');
 -
 -sub new($$)
 -{
 -	my ($class, $n) = @_;
 -
 -	my $mont = $class->SUPER::new($n);
 -
 -	return $mont;
 -}
 -
 -sub get_function_name($)
 -{
 -	my ($self) = @_;
 -
 -	return "bn_mul_mont_300_fixed_n" . $self->{n};
 -}
 -
 -sub get_label($$)
 -{
 -	my ($self, $l) = @_;
 -
 -	return "L" . $l . "_300_" . $self->{n};
 -}
 -
 -# Direct translation of C mul()
 -sub mul($$$$$)
 -{
 -	my ($self, $r, $a, $w, $c, $last) = @_;
 -
 -	$self->add_code(<<___);
 -	maddld		$r,$a,$w,$c
 -	maddhdu		$c,$a,$w,$c
 -
 -___
 -}
 -
 -# Save the last carry as the final entry
 -sub mul_last($$$$$)
 -{
 -	my ($self, $r1, $r2, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	maddld		$r1,$a,$w,$c
 -	maddhdu		$r2,$a,$w,$c
 -
 -___
 -}
 -
 -# Like mul() but $c is ignored as an input - an optimisation to save a
 -# preliminary instruction that would set input $c to 0
 -sub mul_c_0($$$$$)
 -{
 -	my ($self, $r, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	mulld          $r,$a,$w
 -	mulhdu          $c,$a,$w
 -
 -___
 -}
 -
 -# Like C mul_add() but allow $r_out and $r_in to be different
 -sub mul_add($$$$$$)
 -{
 -	my ($self, $r_out, $r_in, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	maddld		$lo,$a,$w,$c
 -	maddhdu		$c,$a,$w,$c
 -	addc		$r_out,$r_in,$lo
 -	addze		$c,$c
 -
 -___
 -}
 -
 -# Like mul_add() but $c is ignored as an input - an optimisation to save a
 -# preliminary instruction that would set input $c to 0
 -sub mul_add_c_0($$$$$$)
 -{
 -	my ($self, $r_out, $r_in, $a, $w, $c) = @_;
 -
 -	$self->add_code(<<___);
 -	maddld		$lo,$a,$w,$r_in
 -	maddhdu		$c,$a,$w,$r_in
 -___
 -
 -	if ($r_out ne $lo) {
 -		$self->add_code(<<___);
 -	mr			$r_out,$lo
 -___
 -	}
 -
 -	$self->nl();
 -}
 -
 -
 -package main;
 -
 -my $code;
 -
 -$code.=<<___;
 -.machine "any"
 -.text
 -___
 -
 -my $mont;
 -
 -$mont = new Mont::GPR(6);
 -$mont->mul_mont_fixed();
 -$code .= $mont->get_code();
 -
 -$mont = new Mont::GPR_300(6);
 -$mont->mul_mont_fixed();
 -$code .= $mont->get_code();
 -
 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
 -
 -$code.=<<___;
 -.asciz  "Montgomery Multiplication for PPC by <amitay\@ozlabs.org>, <alastair\@d-silva.org>"
 -___
 -
 -print $code;
 -close STDOUT or die "error closing STDOUT: $!";
 diff --git a/crypto/bn/bn_ppc.c b/crypto/bn/bn_ppc.c
 index 1e9421bee213..3ee76ea96574 100644
 --- a/crypto/bn/bn_ppc.c
 +++ b/crypto/bn/bn_ppc.c
@@ -19,12 +19,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                         const BN_ULONG *np, const BN_ULONG *n0, int num);
     int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                           const BN_ULONG *np, const BN_ULONG *n0, int num);
 -    int bn_mul_mont_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap,
 -                             const BN_ULONG *bp, const BN_ULONG *np,
 -                             const BN_ULONG *n0, int num);
 -    int bn_mul_mont_300_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap,
 -                                 const BN_ULONG *bp, const BN_ULONG *np,
 -                                 const BN_ULONG *n0, int num);
     if (num < 4)
         return 0;
@@ -40,14 +34,5 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
      * no opportunity to figure it out...
      */
 -#if defined(_ARCH_PPC64)
 -    if (num == 6) {
 -        if (OPENSSL_ppccap_P & PPC_MADD300)
 -            return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num);
 -        else
 -            return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num);
 -    }
 -#endif
 -
     return bn_mul_mont_int(rp, ap, bp, np, n0, num);
 }
 diff --git a/crypto/bn/build.info b/crypto/bn/build.info
 index 987a70ae263b..4f8d0689b5ea 100644
 --- a/crypto/bn/build.info
 +++ b/crypto/bn/build.info
@@ -79,7 +79,7 @@ IF[{- !$disabled{asm} -}]
   $BNASM_ppc32=bn_ppc.c bn-ppc.s ppc-mont.s
   $BNDEF_ppc32=OPENSSL_BN_ASM_MONT
 -  $BNASM_ppc64=$BNASM_ppc32 ppc64-mont-fixed.s
 +  $BNASM_ppc64=$BNASM_ppc32
   $BNDEF_ppc64=$BNDEF_ppc32
   $BNASM_c64xplus=asm/bn-c64xplus.asm
@@ -173,7 +173,6 @@ GENERATE[parisc-mont.s]=asm/parisc-mont.pl
 GENERATE[bn-ppc.s]=asm/ppc.pl
 GENERATE[ppc-mont.s]=asm/ppc-mont.pl
 GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl
 -GENERATE[ppc64-mont-fixed.s]=asm/ppc64-mont-fixed.pl
 GENERATE[alpha-mont.S]=asm/alpha-mont.pl
 diff --git a/test/recipes/30-test_evp_data/evppkey_ecdsa.txt b/test/recipes/30-test_evp_data/evppkey_ecdsa.txt
 index f36982845db4..1543ed9f7534 100644
 --- a/test/recipes/30-test_evp_data/evppkey_ecdsa.txt
 +++ b/test/recipes/30-test_evp_data/evppkey_ecdsa.txt
@@ -97,6 +97,18 @@ Key = P-256-PUBLIC
 Input = "Hello World"
 Output = 3046022100e7515177ec3817b77a4a94066ab3070817b7aa9d44a8a09f040da250116e8972022100ba59b0f631258e59a9026be5d84f60685f4cf22b9165a0c2736d5c21c8ec1862
 +PublicKey=P-384-PUBLIC
 +-----BEGIN PUBLIC KEY-----
 +MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAES/TlL5WEJ+u1kV+4yVlVUbTTo/2rZ7rd
 +nWwwk/QlukNjDfcfQvDrfOqpTZ9kSKhd0wMxWIJJ/S/cCzCex+2EgbwW8ngAwT19
 +twD8guGxyFRaoMDTtW47/nifwYqRaIfC
 +-----END PUBLIC KEY-----
 +
 +DigestVerify = SHA384
 +Key = P-384-PUBLIC
 +Input = "123400"
 +Output = 304d0218389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68b023100ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
 +
 # Oneshot tests
 OneShotDigestVerify = SHA256
 Key = P-256-PUBLIC
--- a/openssl.spec
+++ b/openssl.spec
@ -128,6 +128,8 @@ Patch65: 0065-CVE-2022-1292.patch
 # https://github.com/openssl/openssl/pull/18444
 # https://github.com/openssl/openssl/pull/18467
 Patch66: 0066-replace-expired-certs.patch
 # https://github.com/openssl/openssl/pull/18512
 Patch67: 0067-fix-ppc64-montgomery.patch
 License: ASL 2.0
 URL: http://www.openssl.org/
@ -466,6 +468,8 @@ install -m644 %{SOURCE9} \
 - Related: rhbz#2053289
 - Improve diagnostics when passing unsupported groups in TLS
 - Related: rhbz#2070197
 - Fix PPC64 Montgomery multiplication bug
 - Related: rhbz#2098199
 * Wed Jun 08 2022 Clemens Lang <cllang@redhat.com> - 1:3.0.1-35
 - Add explicit indicators for signatures in FIPS mode and mark signature