import libgcrypt-1.8.5-6.el8

2021-11-09 04:54:21 -05:00 · 2021-11-09 04:54:21 -05:00 · a5f66ce153
commit a5f66ce153
parent b38ce0f2c6
8 changed files with 8203 additions and 1 deletions
--- a/SOURCES/libgcrypt-1.8.5-fips-hwfeatures.patch
+++ b/SOURCES/libgcrypt-1.8.5-fips-hwfeatures.patch
@ -0,0 +1,13 @@
 diff -up libgcrypt-1.8.5/src/hwfeatures.c.hw-fips libgcrypt-1.8.5/src/hwfeatures.c
 --- libgcrypt-1.8.5/src/hwfeatures.c.hw-fips	2021-06-25 11:55:55.843819137 +0200
 +++ libgcrypt-1.8.5/src/hwfeatures.c	2021-06-25 11:56:00.925895390 +0200
@@ -205,9 +205,6 @@ _gcry_detect_hw_features (void)
 {
   hw_features = 0;
 -  if (fips_mode ())
 -    return; /* Hardware support is not to be evaluated.  */
 -
   parse_hwf_deny_file ();
 #if defined (HAVE_CPU_ARCH_X86)
--- a/SOURCES/libgcrypt-1.8.5-ppc-aes-gcm.patch
+++ b/SOURCES/libgcrypt-1.8.5-ppc-aes-gcm.patch
--- a/SOURCES/libgcrypt-1.8.5-ppc-bugfix.patch
+++ b/SOURCES/libgcrypt-1.8.5-ppc-bugfix.patch
@ -0,0 +1,274 @@
 diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
 index 4d7f0add..b9a40130 100644
 --- a/cipher/crc-ppc.c
 +++ b/cipher/crc-ppc.c
@@ -154,26 +154,63 @@ static const vector16x_u8 bswap_const ALIGNED_64 =
 #ifdef WORDS_BIGENDIAN
 # define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
 # define CRC_VEC_U64_LOAD(offs, ptr) \
 -          asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
 +	  asm_swap_u64(asm_vec_u64_load(offs, ptr))
 # define CRC_VEC_U64_LOAD_LE(offs, ptr) \
 -	  CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
 +	  CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
 # define CRC_VEC_U64_LOAD_BE(offs, ptr) \
 -         vec_vsx_ld((offs), (const unsigned long long *)(ptr))
 +	  asm_vec_u64_load(offs, ptr)
 # define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
 # define CRC_VEC_SWAP_TO_BE(v) (v)
 # define VEC_U64_LO 1
 # define VEC_U64_HI 0
 +
 +static ASM_FUNC_ATTR_INLINE vector2x_u64
 +asm_vec_u64_load(unsigned long offset, const void *ptr)
 +{
 +  vector2x_u64 vecu64;
 +#if __GNUC__ >= 4
 +  if (__builtin_constant_p (offset) && offset == 0)
 +    __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
 +		      : "=wa" (vecu64)
 +		      : "r" ((uintptr_t)ptr)
 +		      : "memory");
 +  else
 +#endif
 +    __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
 +		      : "=wa" (vecu64)
 +		      : "r" (offset), "r" ((uintptr_t)ptr)
 +		      : "memory", "r0");
 +  return vecu64;
 +}
 #else
 # define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
 -# define CRC_VEC_U64_LOAD(offs, ptr) \
 -	  vec_vsx_ld((offs), (const unsigned long long *)(ptr))
 -# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))
 +# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
 +# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
 # define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
 # define CRC_VEC_SWAP_TO_LE(v) (v)
 # define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
 # define VEC_U64_LO 0
 # define VEC_U64_HI 1
 +static ASM_FUNC_ATTR_INLINE vector2x_u64
 +asm_vec_u64_load_le(unsigned long offset, const void *ptr)
 +{
 +  vector2x_u64 vecu64;
 +#if __GNUC__ >= 4
 +  if (__builtin_constant_p (offset) && offset == 0)
 +    __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
 +		      : "=wa" (vecu64)
 +		      : "r" ((uintptr_t)ptr)
 +		      : "memory");
 +  else
 +#endif
 +    __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
 +		      : "=wa" (vecu64)
 +		      : "r" (offset), "r" ((uintptr_t)ptr)
 +		      : "memory", "r0");
 +  return asm_swap_u64(vecu64);
 +}
 +
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_vec_u64_load_be(unsigned int offset, const void *ptr)
 {
 diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c
 index a758e1ea..31ea25bf 100644
 --- a/cipher/sha512-ppc.c
 +++ b/cipher/sha512-ppc.c
@@ -115,14 +115,62 @@ vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1)
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b)
 {
 -  asm ("vshasigmad %0,%1,%2,%3"
 -       : "=v" (v)
 -       : "v" (v), "g" (a), "g" (b)
 -       : "memory");
 +  __asm__ ("vshasigmad %0,%1,%2,%3"
 +	   : "=v" (v)
 +	   : "v" (v), "g" (a), "g" (b)
 +	   : "memory");
   return v;
 }
 +static ASM_FUNC_ATTR_INLINE vector2x_u64
 +vec_u64_load(unsigned long offset, const void *ptr)
 +{
 +  vector2x_u64 vecu64;
 +#if __GNUC__ >= 4
 +  if (__builtin_constant_p (offset) && offset == 0)
 +    __asm__ ("lxvd2x %x0,0,%1\n\t"
 +	     : "=wa" (vecu64)
 +	     : "r" ((uintptr_t)ptr)
 +	     : "memory");
 +  else
 +#endif
 +    __asm__ ("lxvd2x %x0,%1,%2\n\t"
 +	     : "=wa" (vecu64)
 +	     : "r" (offset), "r" ((uintptr_t)ptr)
 +	     : "memory", "r0");
 +#ifndef WORDS_BIGENDIAN
 +  __asm__ ("xxswapd %x0, %x1"
 +	   : "=wa" (vecu64)
 +	   : "wa" (vecu64));
 +#endif
 +  return vecu64;
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE void
 +vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr)
 +{
 +#ifndef WORDS_BIGENDIAN
 +  __asm__ ("xxswapd %x0, %x1"
 +	   : "=wa" (vecu64)
 +	   : "wa" (vecu64));
 +#endif
 +#if __GNUC__ >= 4
 +  if (__builtin_constant_p (offset) && offset == 0)
 +    __asm__ ("stxvd2x %x0,0,%1\n\t"
 +	     :
 +	     : "wa" (vecu64), "r" ((uintptr_t)ptr)
 +	     : "memory");
 +  else
 +#endif
 +    __asm__ ("stxvd2x %x0,%1,%2\n\t"
 +	     :
 +	     : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr)
 +	     : "memory", "r0");
 +}
 +
 +
 /* SHA2 round in vector registers */
 #define R(a,b,c,d,e,f,g,h,k,w) do                             \
     {                                                         \
@@ -168,13 +216,13 @@ _gcry_sha512_transform_ppc8(u64 state[8],
   vector2x_u64 a, b, c, d, e, f, g, h, t1, t2;
   u64 w[16];
 -  h0 = vec_vsx_ld (8 * 0, (unsigned long long *)state);
 +  h0 = vec_u64_load (8 * 0, (unsigned long long *)state);
   h1 = vec_rol_elems (h0, 1);
 -  h2 = vec_vsx_ld (8 * 2, (unsigned long long *)state);
 +  h2 = vec_u64_load (8 * 2, (unsigned long long *)state);
   h3 = vec_rol_elems (h2, 1);
 -  h4 = vec_vsx_ld (8 * 4, (unsigned long long *)state);
 +  h4 = vec_u64_load (8 * 4, (unsigned long long *)state);
   h5 = vec_rol_elems (h4, 1);
 -  h6 = vec_vsx_ld (8 * 6, (unsigned long long *)state);
 +  h6 = vec_u64_load (8 * 6, (unsigned long long *)state);
   h7 = vec_rol_elems (h6, 1);
   while (nblks >= 2)
@@ -514,10 +562,10 @@ _gcry_sha512_transform_ppc8(u64 state[8],
   h2 = vec_merge_idx0_elems (h2, h3);
   h4 = vec_merge_idx0_elems (h4, h5);
   h6 = vec_merge_idx0_elems (h6, h7);
 -  vec_vsx_st (h0, 8 * 0, (unsigned long long *)state);
 -  vec_vsx_st (h2, 8 * 2, (unsigned long long *)state);
 -  vec_vsx_st (h4, 8 * 4, (unsigned long long *)state);
 -  vec_vsx_st (h6, 8 * 6, (unsigned long long *)state);
 +  vec_u64_store (h0, 8 * 0, (unsigned long long *)state);
 +  vec_u64_store (h2, 8 * 2, (unsigned long long *)state);
 +  vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
 +  vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
   return sizeof(w);
 }
 diff --git a/configure.ac b/configure.ac
 index b6b6455a..be35ce42 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -1745,10 +1745,12 @@ AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
 	AC_COMPILE_IFELSE([AC_LANG_SOURCE(
 	[[#include <altivec.h>
 	  typedef vector unsigned char block;
 +	  typedef vector unsigned int vecu32;
 	  block fn(block in)
 	  {
 	    block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
 -	    return vec_cipher_be (t, in);
 +	    vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
 +	    return vec_cipher_be (t, in) ^ (block)y;
 	  }
 	  ]])],
 	[gcry_cv_cc_ppc_altivec=yes])
@@ -1769,10 +1771,12 @@ if test "$gcry_cv_cc_ppc_altivec" = "no" &&
     AC_COMPILE_IFELSE([AC_LANG_SOURCE(
       [[#include <altivec.h>
 	typedef vector unsigned char block;
 +	typedef vector unsigned int vecu32;
 	block fn(block in)
 	{
 	  block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
 -	  return vec_cipher_be (t, in);
 +	  vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
 +	  return vec_cipher_be (t, in) ^ (block)y;
 	}]])],
       [gcry_cv_cc_ppc_altivec_cflags=yes])])
   if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
 diff --git a/configure.ac b/configure.ac
 index 202ac888..fd447906 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -2562,13 +2562,13 @@ if test "$found" = "1" ; then
          GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo"
       ;;
       powerpc64le-*-*)
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
       ;;
       powerpc64-*-*)
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
       ;;
       powerpc-*-*)
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
       ;;
    esac
 fi
@@ -2635,17 +2635,17 @@ if test "$found" = "1" ; then
       ;;
       powerpc64le-*-*)
          # Build with the crypto extension implementation
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
       ;;
       powerpc64-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
       ;;
       powerpc-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
    esac
 fi
@@ -2667,17 +2667,17 @@ if test "$found" = "1" ; then
       ;;
       powerpc64le-*-*)
          # Build with the crypto extension implementation
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
       ;;
       powerpc64-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
       ;;
       powerpc-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
 -         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
 +         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
    esac
    if test x"$neonsupport" = xyes ; then
--- a/SOURCES/libgcrypt-1.8.5-ppc-chacha20-poly1305.patch
+++ b/SOURCES/libgcrypt-1.8.5-ppc-chacha20-poly1305.patch
--- a/SOURCES/libgcrypt-1.8.5-ppc-crc32.patch
+++ b/SOURCES/libgcrypt-1.8.5-ppc-crc32.patch
@ -0,0 +1,794 @@
 diff --git a/cipher/Makefile.am b/cipher/Makefile.am
 index cb41c251..1728e9f9 100644
 --- a/cipher/Makefile.am
 +++ b/cipher/Makefile.am
@@ -67,7 +67,7 @@ cast5.c cast5-amd64.S cast5-arm.S \
 chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \
   chacha20-armv7-neon.S \
 crc.c \
 -  crc-intel-pclmul.c \
 +  crc-intel-pclmul.c crc-ppc.c \
 des.c des-amd64.S \
 dsa.c \
 elgamal.c \
@@ -159,3 +159,9 @@ sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile
 sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile
 	`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
 +
 +crc-ppc.o: $(srcdir)/crc-ppc.c Makefile
 +	`echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< `
 +
 +crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile
 +	`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
 diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
 new file mode 100644
 index 00000000..4d7f0add
 --- /dev/null
 +++ b/cipher/crc-ppc.c
@@ -0,0 +1,619 @@
 +/* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation
 + * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
 + *
 + * This file is part of Libgcrypt.
 + *
 + * Libgcrypt is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU Lesser General Public License as
 + * published by the Free Software Foundation; either version 2.1 of
 + * the License, or (at your option) any later version.
 + *
 + * Libgcrypt is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with this program; if not, write to the Free Software
 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 + *
 + */
 +
 +#include <config.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +
 +#include "g10lib.h"
 +
 +#include "bithelp.h"
 +#include "bufhelp.h"
 +
 +
 +#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
 +    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
 +    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
 +    __GNUC__ >= 4
 +
 +#include <altivec.h>
 +#include "bufhelp.h"
 +
 +
 +#define ALWAYS_INLINE inline __attribute__((always_inline))
 +#define NO_INLINE __attribute__((noinline))
 +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
 +
 +#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
 +#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
 +#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
 +
 +#define ALIGNED_64 __attribute__ ((aligned (64)))
 +
 +
 +typedef vector unsigned char vector16x_u8;
 +typedef vector unsigned int vector4x_u32;
 +typedef vector unsigned long long vector2x_u64;
 +
 +
 +/* Constants structure for generic reflected/non-reflected CRC32 PMULL
 + * functions. */
 +struct crc32_consts_s
 +{
 +  /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
 +  unsigned long long k[6];
 +  /* my_p: { floor(x^64 / P(x)), P(x) } */
 +  unsigned long long my_p[2];
 +};
 +
 +/* PMULL constants for CRC32 and CRC32RFC1510. */
 +static const struct crc32_consts_s crc32_consts ALIGNED_64 =
 +{
 +  { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
 +    U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
 +    U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
 +    U64_C(0x163cd6124), 0                   /* y = 2 */
 +  },
 +  { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
 +    U64_C(0x1f7011641), U64_C(0x1db710641)
 +  }
 +};
 +
 +/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
 +static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_64 =
 +{
 +  { /* k[6] = x^(32*y) mod P(x) << 32*/
 +    U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
 +    U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
 +    U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
 +  },
 +  { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
 +    U64_C(0x1f845fe24), U64_C(0x1864cfb00)
 +  }
 +};
 +
 +
 +static ASM_FUNC_ATTR_INLINE vector2x_u64
 +asm_vpmsumd(vector2x_u64 a, vector2x_u64 b)
 +{
 +  __asm__("vpmsumd %0, %1, %2"
 +	  : "=v" (a)
 +	  : "v" (a), "v" (b));
 +  return a;
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE vector2x_u64
 +asm_swap_u64(vector2x_u64 a)
 +{
 +  __asm__("xxswapd %x0, %x1"
 +	  : "=wa" (a)
 +	  : "wa" (a));
 +  return a;
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE vector4x_u32
 +vec_sld_u32(vector4x_u32 a, vector4x_u32 b, unsigned int idx)
 +{
 +  return vec_sld (a, b, (4 * idx) & 15);
 +}
 +
 +
 +static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_64 =
 +  {
 +    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 +    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 +    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 +    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 +  };
 +static const byte crc32_shuf_shift[3 * 16] ALIGNED_64 =
 +  {
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +    0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
 +    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +  };
 +static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_64 =
 +  {
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 +    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
 +  };
 +static const vector16x_u8 bswap_const ALIGNED_64 =
 +  { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
 +
 +
 +#define CRC_VEC_SWAP(v) ({ vector2x_u64 __vecu64 = (v); \
 +                           vec_perm(__vecu64, __vecu64, bswap_const); })
 +
 +#ifdef WORDS_BIGENDIAN
 +# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
 +# define CRC_VEC_U64_LOAD(offs, ptr) \
 +          asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
 +# define CRC_VEC_U64_LOAD_LE(offs, ptr) \
 +	  CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
 +# define CRC_VEC_U64_LOAD_BE(offs, ptr) \
 +         vec_vsx_ld((offs), (const unsigned long long *)(ptr))
 +# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
 +# define CRC_VEC_SWAP_TO_BE(v) (v)
 +# define VEC_U64_LO 1
 +# define VEC_U64_HI 0
 +#else
 +# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
 +# define CRC_VEC_U64_LOAD(offs, ptr) \
 +	  vec_vsx_ld((offs), (const unsigned long long *)(ptr))
 +# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))
 +# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
 +# define CRC_VEC_SWAP_TO_LE(v) (v)
 +# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
 +# define VEC_U64_LO 0
 +# define VEC_U64_HI 1
 +
 +static ASM_FUNC_ATTR_INLINE vector2x_u64
 +asm_vec_u64_load_be(unsigned int offset, const void *ptr)
 +{
 +  static const vector16x_u8 vec_load_le_const =
 +    { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
 +  vector2x_u64 vecu64;
 +
 +#if __GNUC__ >= 4
 +  if (__builtin_constant_p (offset) && offset == 0)
 +    __asm__ ("lxvd2x %%vs32,0,%1\n\t"
 +	     "vperm %0,%%v0,%%v0,%2\n\t"
 +	     : "=v" (vecu64)
 +	     : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const)
 +	     : "memory", "v0");
 +#endif
 +  else
 +    __asm__ ("lxvd2x %%vs32,%1,%2\n\t"
 +	     "vperm %0,%%v0,%%v0,%3\n\t"
 +	     : "=v" (vecu64)
 +	     : "r" (offset), "r" ((uintptr_t)(ptr)),
 +	       "v" (vec_load_le_const)
 +	     : "memory", "r0", "v0");
 +
 +  return vecu64;
 +}
 +#endif
 +
 +
 +static ASM_FUNC_ATTR_INLINE void
 +crc32r_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
 +		     const struct crc32_consts_s *consts)
 +{
 +  vector4x_u32 zero = { 0, 0, 0, 0 };
 +  vector2x_u64 low_64bit_mask = CRC_VEC_U64_DEF((u64)-1, 0);
 +  vector2x_u64 low_32bit_mask = CRC_VEC_U64_DEF((u32)-1, 0);
 +  vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
 +  vector2x_u64 k1k2 = CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]);
 +  vector2x_u64 k3k4 = CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]);
 +  vector2x_u64 k4lo = CRC_VEC_U64_DEF(k3k4[VEC_U64_HI], 0);
 +  vector2x_u64 k5lo = CRC_VEC_U64_LOAD(0, &consts->k[5 - 1]);
 +  vector2x_u64 crc = CRC_VEC_U64_DEF(*pcrc, 0);
 +  vector2x_u64 crc0, crc1, crc2, crc3;
 +  vector2x_u64 v0;
 +
 +  if (inlen >= 8 * 16)
 +    {
 +      crc0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
 +      crc0 ^= crc;
 +      crc1 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
 +      crc2 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
 +      crc3 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
 +
 +      inbuf += 4 * 16;
 +      inlen -= 4 * 16;
 +
 +      /* Fold by 4. */
 +      while (inlen >= 4 * 16)
 +	{
 +	  v0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
 +	  crc0 = asm_vpmsumd(crc0, k1k2) ^ v0;
 +
 +	  v0 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
 +	  crc1 = asm_vpmsumd(crc1, k1k2) ^ v0;
 +
 +	  v0 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
 +	  crc2 = asm_vpmsumd(crc2, k1k2) ^ v0;
 +
 +	  v0 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
 +	  crc3 = asm_vpmsumd(crc3, k1k2) ^ v0;
 +
 +	  inbuf += 4 * 16;
 +	  inlen -= 4 * 16;
 +	}
 +
 +      /* Fold 4 to 1. */
 +      crc1 ^= asm_vpmsumd(crc0, k3k4);
 +      crc2 ^= asm_vpmsumd(crc1, k3k4);
 +      crc3 ^= asm_vpmsumd(crc2, k3k4);
 +      crc = crc3;
 +    }
 +  else
 +    {
 +      v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
 +      crc ^= v0;
 +
 +      inbuf += 16;
 +      inlen -= 16;
 +    }
 +
 +  /* Fold by 1. */
 +  while (inlen >= 16)
 +    {
 +      v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
 +      crc = asm_vpmsumd(k3k4, crc);
 +      crc ^= v0;
 +
 +      inbuf += 16;
 +      inlen -= 16;
 +    }
 +
 +  /* Partial fold. */
 +  if (inlen)
 +    {
 +      /* Load last input and add padding zeros. */
 +      vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);
 +      vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(inlen, crc32_refl_shuf_shift);
 +      vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_refl_shuf_shift);
 +
 +      v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
 +      v0 &= mask;
 +
 +      crc = CRC_VEC_SWAP_TO_LE(crc);
 +      v0 |= (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 +				   (vector16x_u8)shr_shuf);
 +      crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 +				   (vector16x_u8)shl_shuf);
 +      crc = asm_vpmsumd(k3k4, crc);
 +      crc ^= v0;
 +
 +      inbuf += inlen;
 +      inlen -= inlen;
 +    }
 +
 +  /* Final fold. */
 +
 +  /* reduce 128-bits to 96-bits */
 +  v0 = asm_swap_u64(crc);
 +  v0 &= low_64bit_mask;
 +  crc = asm_vpmsumd(k4lo, crc);
 +  crc ^= v0;
 +
 +  /* reduce 96-bits to 64-bits */
 +  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 +				 (vector4x_u32)crc, 3);  /* [x0][x3][x2][x1] */
 +  v0 &= low_64bit_mask;                                  /* [00][00][x2][x1] */
 +  crc = crc & low_32bit_mask;                            /* [00][00][00][x0] */
 +  crc = v0 ^ asm_vpmsumd(k5lo, crc);                     /* [00][00][xx][xx] */
 +
 +  /* barrett reduction */
 +  v0 = crc << 32;                                        /* [00][00][x0][00] */
 +  v0 = asm_vpmsumd(my_p, v0);
 +  v0 = asm_swap_u64(v0);
 +  v0 = asm_vpmsumd(my_p, v0);
 +  crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 +				  zero, 1);              /* [00][x1][x0][00] */
 +  crc ^= v0;
 +
 +  *pcrc = (u32)crc[VEC_U64_HI];
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE u32
 +crc32r_ppc8_ce_reduction_4 (u32 data, u32 crc,
 +			    const struct crc32_consts_s *consts)
 +{
 +  vector4x_u32 zero = { 0, 0, 0, 0 };
 +  vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
 +  vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data, 0);
 +  v0 = asm_vpmsumd(v0, my_p);                          /* [00][00][xx][xx] */
 +  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
 +				 zero, 3);             /* [x0][00][00][00] */
 +  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
 +				 (vector4x_u32)v0, 3); /* [00][x0][00][00] */
 +  v0 = asm_vpmsumd(v0, my_p);                          /* [00][00][xx][xx] */
 +  return (v0[VEC_U64_LO] >> 32) ^ crc;
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE void
 +crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 +		     const struct crc32_consts_s *consts)
 +{
 +  u32 crc = *pcrc;
 +  u32 data;
 +
 +  while (inlen >= 4)
 +    {
 +      data = buf_get_le32(inbuf);
 +      data ^= crc;
 +
 +      inlen -= 4;
 +      inbuf += 4;
 +
 +      crc = crc32r_ppc8_ce_reduction_4 (data, 0, consts);
 +    }
 +
 +  switch (inlen)
 +    {
 +    case 0:
 +      break;
 +    case 1:
 +      data = inbuf[0];
 +      data ^= crc;
 +      data <<= 24;
 +      crc >>= 8;
 +      crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
 +      break;
 +    case 2:
 +      data = inbuf[0] << 0;
 +      data |= inbuf[1] << 8;
 +      data ^= crc;
 +      data <<= 16;
 +      crc >>= 16;
 +      crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
 +      break;
 +    case 3:
 +      data = inbuf[0] << 0;
 +      data |= inbuf[1] << 8;
 +      data |= inbuf[2] << 16;
 +      data ^= crc;
 +      data <<= 8;
 +      crc >>= 24;
 +      crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
 +      break;
 +    }
 +
 +  *pcrc = crc;
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE void
 +crc32_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
 +		    const struct crc32_consts_s *consts)
 +{
 +  vector4x_u32 zero = { 0, 0, 0, 0 };
 +  vector2x_u64 low_96bit_mask = CRC_VEC_U64_DEF(~0, ~((u64)(u32)-1 << 32));
 +  vector2x_u64 p_my = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->my_p[0]));
 +  vector2x_u64 p_my_lo, p_my_hi;
 +  vector2x_u64 k2k1 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]));
 +  vector2x_u64 k4k3 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]));
 +  vector2x_u64 k4hi = CRC_VEC_U64_DEF(0, consts->k[4 - 1]);
 +  vector2x_u64 k5hi = CRC_VEC_U64_DEF(0, consts->k[5 - 1]);
 +  vector2x_u64 crc = CRC_VEC_U64_DEF(0, _gcry_bswap64(*pcrc));
 +  vector2x_u64 crc0, crc1, crc2, crc3;
 +  vector2x_u64 v0;
 +
 +  if (inlen >= 8 * 16)
 +    {
 +      crc0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
 +      crc0 ^= crc;
 +      crc1 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
 +      crc2 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
 +      crc3 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
 +
 +      inbuf += 4 * 16;
 +      inlen -= 4 * 16;
 +
 +      /* Fold by 4. */
 +      while (inlen >= 4 * 16)
 +	{
 +	  v0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
 +	  crc0 = asm_vpmsumd(crc0, k2k1) ^ v0;
 +
 +	  v0 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
 +	  crc1 = asm_vpmsumd(crc1, k2k1) ^ v0;
 +
 +	  v0 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
 +	  crc2 = asm_vpmsumd(crc2, k2k1) ^ v0;
 +
 +	  v0 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
 +	  crc3 = asm_vpmsumd(crc3, k2k1) ^ v0;
 +
 +	  inbuf += 4 * 16;
 +	  inlen -= 4 * 16;
 +	}
 +
 +      /* Fold 4 to 1. */
 +      crc1 ^= asm_vpmsumd(crc0, k4k3);
 +      crc2 ^= asm_vpmsumd(crc1, k4k3);
 +      crc3 ^= asm_vpmsumd(crc2, k4k3);
 +      crc = crc3;
 +    }
 +  else
 +    {
 +      v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
 +      crc ^= v0;
 +
 +      inbuf += 16;
 +      inlen -= 16;
 +    }
 +
 +  /* Fold by 1. */
 +  while (inlen >= 16)
 +    {
 +      v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
 +      crc = asm_vpmsumd(k4k3, crc);
 +      crc ^= v0;
 +
 +      inbuf += 16;
 +      inlen -= 16;
 +    }
 +
 +  /* Partial fold. */
 +  if (inlen)
 +    {
 +      /* Load last input and add padding zeros. */
 +      vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);
 +      vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(32 - inlen, crc32_refl_shuf_shift);
 +      vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_shuf_shift);
 +
 +      v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
 +      v0 &= mask;
 +
 +      crc = CRC_VEC_SWAP_TO_LE(crc);
 +      crc2 = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 +				    (vector16x_u8)shr_shuf);
 +      v0 |= crc2;
 +      v0 = CRC_VEC_SWAP(v0);
 +      crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 +				   (vector16x_u8)shl_shuf);
 +      crc = asm_vpmsumd(k4k3, crc);
 +      crc ^= v0;
 +
 +      inbuf += inlen;
 +      inlen -= inlen;
 +    }
 +
 +  /* Final fold. */
 +
 +  /* reduce 128-bits to 96-bits */
 +  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 +				 (vector4x_u32)zero, 2);
 +  crc = asm_vpmsumd(k4hi, crc);
 +  crc ^= v0; /* bottom 32-bit are zero */
 +
 +  /* reduce 96-bits to 64-bits */
 +  v0 = crc & low_96bit_mask;    /* [00][x2][x1][00] */
 +  crc >>= 32;                   /* [00][x3][00][x0] */
 +  crc = asm_vpmsumd(k5hi, crc); /* [00][xx][xx][00] */
 +  crc ^= v0;                    /* top and bottom 32-bit are zero */
 +
 +  /* barrett reduction */
 +  p_my_hi = p_my;
 +  p_my_lo = p_my;
 +  p_my_hi[VEC_U64_LO] = 0;
 +  p_my_lo[VEC_U64_HI] = 0;
 +  v0 = crc >> 32;                                        /* [00][00][00][x1] */
 +  crc = asm_vpmsumd(p_my_hi, crc);                       /* [00][xx][xx][xx] */
 +  crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 +				  (vector4x_u32)crc, 3); /* [x0][00][x2][x1] */
 +  crc = asm_vpmsumd(p_my_lo, crc);                       /* [00][xx][xx][xx] */
 +  crc ^= v0;
 +
 +  *pcrc = _gcry_bswap32(crc[VEC_U64_LO]);
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE u32
 +crc32_ppc8_ce_reduction_4 (u32 data, u32 crc,
 +			   const struct crc32_consts_s *consts)
 +{
 +  vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
 +  vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data << 32, 0);
 +  v0 = asm_vpmsumd(v0, my_p); /* [00][x1][x0][00] */
 +  v0[VEC_U64_LO] = 0;         /* [00][x1][00][00] */
 +  v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */
 +  return _gcry_bswap32(v0[VEC_U64_LO]) ^ crc;
 +}
 +
 +
 +static ASM_FUNC_ATTR_INLINE void
 +crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 +		    const struct crc32_consts_s *consts)
 +{
 +  u32 crc = *pcrc;
 +  u32 data;
 +
 +  while (inlen >= 4)
 +    {
 +      data = buf_get_le32(inbuf);
 +      data ^= crc;
 +      data = _gcry_bswap32(data);
 +
 +      inlen -= 4;
 +      inbuf += 4;
 +
 +      crc = crc32_ppc8_ce_reduction_4 (data, 0, consts);
 +    }
 +
 +  switch (inlen)
 +    {
 +    case 0:
 +      break;
 +    case 1:
 +      data = inbuf[0];
 +      data ^= crc;
 +      data = data & 0xffU;
 +      crc = crc >> 8;
 +      crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
 +      break;
 +    case 2:
 +      data = inbuf[0] << 0;
 +      data |= inbuf[1] << 8;
 +      data ^= crc;
 +      data = _gcry_bswap32(data << 16);
 +      crc = crc >> 16;
 +      crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
 +      break;
 +    case 3:
 +      data = inbuf[0] << 0;
 +      data |= inbuf[1] << 8;
 +      data |= inbuf[2] << 16;
 +      data ^= crc;
 +      data = _gcry_bswap32(data << 8);
 +      crc = crc >> 24;
 +      crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
 +      break;
 +    }
 +
 +  *pcrc = crc;
 +}
 +
 +void ASM_FUNC_ATTR
 +_gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
 +{
 +  const struct crc32_consts_s *consts = &crc32_consts;
 +
 +  if (!inlen)
 +    return;
 +
 +  if (inlen >= 16)
 +    crc32r_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
 +  else
 +    crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
 +}
 +
 +void ASM_FUNC_ATTR
 +_gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
 +{
 +  const struct crc32_consts_s *consts = &crc24rfc2440_consts;
 +
 +  if (!inlen)
 +    return;
 +
 +  /* Note: *pcrc in input endian. */
 +
 +  if (inlen >= 16)
 +    crc32_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
 +  else
 +    crc32_less_than_16 (pcrc, inbuf, inlen, consts);
 +}
 +
 +#endif
 diff --git a/cipher/crc.c b/cipher/crc.c
 index a1ce50b6..bbb159ce 100644
 --- a/cipher/crc.c
 +++ b/cipher/crc.c
@@ -43,11 +43,27 @@
 #endif /* USE_INTEL_PCLMUL */
 +/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector
 + * accelerated code. */
 +#undef USE_PPC_VPMSUM
 +#ifdef ENABLE_PPC_CRYPTO_SUPPORT
 +# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
 +     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
 +#  if __GNUC__ >= 4
 +#   define USE_PPC_VPMSUM 1
 +#  endif
 +# endif
 +#endif /* USE_PPC_VPMSUM */
 +
 +
 typedef struct
 {
   u32 CRC;
 #ifdef USE_INTEL_PCLMUL
   unsigned int use_pclmul:1;           /* Intel PCLMUL shall be used.  */
 +#endif
 +#ifdef USE_PPC_VPMSUM
 +  unsigned int use_vpmsum:1;           /* POWER vpmsum shall be used. */
 #endif
   byte buf[4];
 }
@@ -61,6 +77,20 @@ void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf,
 				      size_t inlen);
 #endif
 +#ifdef USE_ARM_PMULL
 +/*-- crc-armv8-ce.c --*/
 +void _gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen);
 +void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf,
 +					size_t inlen);
 +#endif
 +
 +#ifdef USE_PPC_VPMSUM
 +/*-- crc-ppc.c --*/
 +void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen);
 +void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf,
 +				     size_t inlen);
 +#endif
 +
 /*
  * Code generated by universal_crc by Danjel McGougan
@@ -361,11 +391,13 @@ static void
 crc32_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
 -#ifdef USE_INTEL_PCLMUL
   u32 hwf = _gcry_get_hw_features ();
 -
 +#ifdef USE_INTEL_PCLMUL
   ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
 #endif
 +#ifdef USE_PPC_VPMSUM
 +  ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
 +#endif
   (void)flags;
@@ -386,6 +418,13 @@ crc32_write (void *context, const void *inbuf_arg, size_t inlen)
       return;
     }
 #endif
 +#ifdef USE_PPC_VPMSUM
 +  if (ctx->use_vpmsum)
 +    {
 +      _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
 +      return;
 +    }
 +#endif
   if (!inbuf || !inlen)
     return;
@@ -444,6 +483,10 @@ crc32rfc1510_init (void *context, unsigned int flags)
   ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
 #endif
 +#ifdef USE_PPC_VPMSUM
 +  u32 hwf = _gcry_get_hw_features ();
 +  ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
 +#endif
   (void)flags;
@@ -774,6 +817,10 @@ crc24rfc2440_init (void *context, unsigned int flags)
   ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
 #endif
 +#ifdef USE_PPC_VPMSUM
 +  u32 hwf = _gcry_get_hw_features ();
 +  ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
 +#endif
   (void)flags;
@@ -794,6 +841,13 @@ crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen)
       return;
     }
 #endif
 +#ifdef USE_PPC_VPMSUM
 +  if (ctx->use_vpmsum)
 +    {
 +      _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
 +      return;
 +    }
 +#endif
   if (!inbuf || !inlen)
     return;
 diff --git a/configure.ac b/configure.ac
 index 953a20e9..b6b6455a 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -1916,6 +1916,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto
 		    "vadduwm %v0, %v1, %v22;\n"
 		    "vshasigmaw %v0, %v1, 0, 15;\n"
 		    "vshasigmad %v0, %v1, 0, 15;\n"
 +		    "vpmsumd %v11, %v11, %v11;\n"
 		  );
             ]])],
           [gcry_cv_gcc_inline_asm_ppc_altivec=yes])
@@ -2556,6 +2557,15 @@ if test "$found" = "1" ; then
          # Build with the assembly implementation
          GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo"
       ;;
 +      powerpc64le-*-*)
 +         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
 +      ;;
 +      powerpc64-*-*)
 +         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
 +      ;;
 +      powerpc-*-*)
 +         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
 +      ;;
    esac
 fi
--- a/SOURCES/libgcrypt-1.8.5-ppc-sha2.patch
+++ b/SOURCES/libgcrypt-1.8.5-ppc-sha2.patch
--- a/SOURCES/libgcrypt-1.9.3-CVE-2021-33560.patch
+++ b/SOURCES/libgcrypt-1.9.3-CVE-2021-33560.patch
@ -0,0 +1,100 @@
 commit 3462280f2e23e16adf3ed5176e0f2413d8861320
 Author: NIIBE Yutaka <gniibe@fsij.org>
 Date:   Fri May 21 11:15:07 2021 +0900
    cipher: Fix ElGamal encryption for other implementations.
    * cipher/elgamal.c (gen_k): Remove support of smaller K.
    (do_encrypt): Never use smaller K.
    (sign): Folllow the change of gen_k.
    --
    Cherry-pick master commit of:
            632d80ef30e13de6926d503aa697f92b5dbfbc5e
    This change basically reverts encryption changes in two commits:
            74386120dad6b3da62db37f7044267c8ef34689b
            78531373a342aeb847950f404343a05e36022065
    Use of smaller K for ephemeral key in ElGamal encryption is only good,
    when we can guarantee that recipient's key is generated by our
    implementation (or compatible).
    For detail, please see:
        Luca De Feo, Bertram Poettering, Alessandro Sorniotti,
        "On the (in)security of ElGamal in OpenPGP";
        in the proceedings of  CCS'2021.
    CVE-id: CVE-2021-33560
    GnuPG-bug-id: 5328
    Suggested-by: Luca De Feo, Bertram Poettering, Alessandro Sorniotti
    Signed-off-by: NIIBE Yutaka <gniibe@fsij.org>
 diff --git a/cipher/elgamal.c b/cipher/elgamal.c
 index 9835122f..eead4502 100644
 --- a/cipher/elgamal.c
 +++ b/cipher/elgamal.c
@@ -66,7 +66,7 @@ static const char *elg_names[] =
 static int test_keys (ELG_secret_key *sk, unsigned int nbits, int nodie);
 -static gcry_mpi_t gen_k (gcry_mpi_t p, int small_k);
 +static gcry_mpi_t gen_k (gcry_mpi_t p);
 static gcry_err_code_t generate (ELG_secret_key *sk, unsigned nbits,
                                  gcry_mpi_t **factors);
 static int  check_secret_key (ELG_secret_key *sk);
@@ -189,11 +189,10 @@ test_keys ( ELG_secret_key *sk, unsigned int nbits, int nodie )
 /****************
  * Generate a random secret exponent k from prime p, so that k is
 - * relatively prime to p-1.  With SMALL_K set, k will be selected for
 - * better encryption performance - this must never be used signing!
 + * relatively prime to p-1.
  */
 static gcry_mpi_t
 -gen_k( gcry_mpi_t p, int small_k )
 +gen_k( gcry_mpi_t p )
 {
   gcry_mpi_t k = mpi_alloc_secure( 0 );
   gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(p) );
@@ -202,18 +201,7 @@ gen_k( gcry_mpi_t p, int small_k )
   unsigned int nbits, nbytes;
   char *rndbuf = NULL;
 -  if (small_k)
 -    {
 -      /* Using a k much lesser than p is sufficient for encryption and
 -       * it greatly improves the encryption performance.  We use
 -       * Wiener's table and add a large safety margin. */
 -      nbits = wiener_map( orig_nbits ) * 3 / 2;
 -      if( nbits >= orig_nbits )
 -        BUG();
 -    }
 -  else
 -    nbits = orig_nbits;
 -
 +  nbits = orig_nbits;
   nbytes = (nbits+7)/8;
   if( DBG_CIPHER )
@@ -492,7 +480,7 @@ do_encrypt(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey )
    * error code.
    */
 -  k = gen_k( pkey->p, 1 );
 +  k = gen_k( pkey->p );
   mpi_powm (a, pkey->g, k, pkey->p);
   /* b = (y^k * input) mod p
@@ -608,7 +596,7 @@ sign(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_secret_key *skey )
     *
     */
     mpi_sub_ui(p_1, p_1, 1);
 -    k = gen_k( skey->p, 0 /* no small K ! */ );
 +    k = gen_k( skey->p );
     mpi_powm( a, skey->g, k, skey->p );
     mpi_mul(t, skey->x, a );
     mpi_subm(t, input, t, p_1 );
--- a/SPECS/libgcrypt.spec
+++ b/SPECS/libgcrypt.spec
@ -1,6 +1,6 @@
 Name: libgcrypt
 Version: 1.8.5
-Release: 4%{?dist}
+Release: 6%{?dist}
 URL: http://www.gnupg.org/
 Source0: libgcrypt-%{version}-hobbled.tar.xz
 # The original libgcrypt sources now contain potentially patented ECC
@ -53,6 +53,20 @@ Patch30: libgcrypt-1.8.5-fips-module.patch
 Patch31: libgcrypt-1.8.5-aes-perf.patch
 # FIPS selftest for PBKDF2
 Patch32: libgcrypt-1.8.5-kdf-selftest.patch
 # ppc64 performance for SHA2 (#1855231)
 Patch33: libgcrypt-1.8.5-ppc-sha2.patch
 # ppc64 performance for CRC32 (#1855231)
 Patch34: libgcrypt-1.8.5-ppc-crc32.patch
 # ppc64 bugfixes (#1855231)
 Patch35: libgcrypt-1.8.5-ppc-bugfix.patch
 # ppc64 performance AES-GCM (#1855231)
 Patch36: libgcrypt-1.8.5-ppc-aes-gcm.patch
 # ppc64 performance AES-GCM (#1855231)
 Patch37: libgcrypt-1.9.3-CVE-2021-33560.patch
 # We can use HW optimizations in FIPS (#1976137)
 Patch38: libgcrypt-1.8.5-fips-hwfeatures.patch
 # ppc64 performance chacha20 and poly1305 (#1855231)
 Patch39: libgcrypt-1.8.5-ppc-chacha20-poly1305.patch
 %define gcrylibdir %{_libdir}
@ -106,6 +120,13 @@ applications using libgcrypt.
 %patch30 -p1 -b .fips-module
 %patch31 -p1 -b .aes-perf
 %patch32 -p1 -b .kdf-selftest
 %patch33 -p1 -b .ppc-sha2
 %patch34 -p1 -b .ppc-crc32
 %patch35 -p1 -b .ppc-bugfix
 %patch36 -p1 -b .ppc-aes-gcm
 %patch37 -p1 -b .CVE-2021-33560
 %patch38 -p1 -b .hw-fips
 %patch39 -p1 -b .ppc-chacha
 cp %{SOURCE4} cipher/
 cp %{SOURCE5} %{SOURCE6} tests/
@ -221,6 +242,14 @@ exit 0
 %license COPYING
 %changelog
 * Mon Jun 28 2021 Jakub Jelen <jjelen@redhat.com> - 1.8.5-6
 - Fix for CVE-2021-33560 (#1971421)
 - Enable HW optimizations in FIPS (#1976137)
 - Performance enchancements for ChaCha20 and Poly1305 (#1855231)
 * Thu May 13 2021 Jakub Jelen <jjelen@redhat.com> - 1.8.5-5
 - Performance enchancements for AES-GCM, CRC32 and SHA2 (#1855231)
 * Mon Jun 15 2020 Tomáš Mráz <tmraz@redhat.com> 1.8.5-4
 - add PBKDF2 selftest for FIPS POST