275 lines
9.0 KiB
Diff
275 lines
9.0 KiB
Diff
|
diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
|
||
|
index 4d7f0add..b9a40130 100644
|
||
|
--- a/cipher/crc-ppc.c
|
||
|
+++ b/cipher/crc-ppc.c
|
||
|
@@ -154,26 +154,63 @@ static const vector16x_u8 bswap_const ALIGNED_64 =
|
||
|
#ifdef WORDS_BIGENDIAN
|
||
|
# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
|
||
|
# define CRC_VEC_U64_LOAD(offs, ptr) \
|
||
|
- asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
|
||
|
+ asm_swap_u64(asm_vec_u64_load(offs, ptr))
|
||
|
# define CRC_VEC_U64_LOAD_LE(offs, ptr) \
|
||
|
- CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
|
||
|
+ CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
|
||
|
# define CRC_VEC_U64_LOAD_BE(offs, ptr) \
|
||
|
- vec_vsx_ld((offs), (const unsigned long long *)(ptr))
|
||
|
+ asm_vec_u64_load(offs, ptr)
|
||
|
# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
|
||
|
# define CRC_VEC_SWAP_TO_BE(v) (v)
|
||
|
# define VEC_U64_LO 1
|
||
|
# define VEC_U64_HI 0
|
||
|
+
|
||
|
+static ASM_FUNC_ATTR_INLINE vector2x_u64
|
||
|
+asm_vec_u64_load(unsigned long offset, const void *ptr)
|
||
|
+{
|
||
|
+ vector2x_u64 vecu64;
|
||
|
+#if __GNUC__ >= 4
|
||
|
+ if (__builtin_constant_p (offset) && offset == 0)
|
||
|
+ __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "r" ((uintptr_t)ptr)
|
||
|
+ : "memory");
|
||
|
+ else
|
||
|
+#endif
|
||
|
+ __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "r" (offset), "r" ((uintptr_t)ptr)
|
||
|
+ : "memory", "r0");
|
||
|
+ return vecu64;
|
||
|
+}
|
||
|
#else
|
||
|
# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
|
||
|
-# define CRC_VEC_U64_LOAD(offs, ptr) \
|
||
|
- vec_vsx_ld((offs), (const unsigned long long *)(ptr))
|
||
|
-# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))
|
||
|
+# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
|
||
|
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
|
||
|
# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
|
||
|
# define CRC_VEC_SWAP_TO_LE(v) (v)
|
||
|
# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
|
||
|
# define VEC_U64_LO 0
|
||
|
# define VEC_U64_HI 1
|
||
|
|
||
|
+static ASM_FUNC_ATTR_INLINE vector2x_u64
|
||
|
+asm_vec_u64_load_le(unsigned long offset, const void *ptr)
|
||
|
+{
|
||
|
+ vector2x_u64 vecu64;
|
||
|
+#if __GNUC__ >= 4
|
||
|
+ if (__builtin_constant_p (offset) && offset == 0)
|
||
|
+ __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "r" ((uintptr_t)ptr)
|
||
|
+ : "memory");
|
||
|
+ else
|
||
|
+#endif
|
||
|
+ __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "r" (offset), "r" ((uintptr_t)ptr)
|
||
|
+ : "memory", "r0");
|
||
|
+ return asm_swap_u64(vecu64);
|
||
|
+}
|
||
|
+
|
||
|
static ASM_FUNC_ATTR_INLINE vector2x_u64
|
||
|
asm_vec_u64_load_be(unsigned int offset, const void *ptr)
|
||
|
{
|
||
|
diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c
|
||
|
index a758e1ea..31ea25bf 100644
|
||
|
--- a/cipher/sha512-ppc.c
|
||
|
+++ b/cipher/sha512-ppc.c
|
||
|
@@ -115,14 +115,62 @@ vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1)
|
||
|
static ASM_FUNC_ATTR_INLINE vector2x_u64
|
||
|
vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b)
|
||
|
{
|
||
|
- asm ("vshasigmad %0,%1,%2,%3"
|
||
|
- : "=v" (v)
|
||
|
- : "v" (v), "g" (a), "g" (b)
|
||
|
- : "memory");
|
||
|
+ __asm__ ("vshasigmad %0,%1,%2,%3"
|
||
|
+ : "=v" (v)
|
||
|
+ : "v" (v), "g" (a), "g" (b)
|
||
|
+ : "memory");
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
|
||
|
+static ASM_FUNC_ATTR_INLINE vector2x_u64
|
||
|
+vec_u64_load(unsigned long offset, const void *ptr)
|
||
|
+{
|
||
|
+ vector2x_u64 vecu64;
|
||
|
+#if __GNUC__ >= 4
|
||
|
+ if (__builtin_constant_p (offset) && offset == 0)
|
||
|
+ __asm__ ("lxvd2x %x0,0,%1\n\t"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "r" ((uintptr_t)ptr)
|
||
|
+ : "memory");
|
||
|
+ else
|
||
|
+#endif
|
||
|
+ __asm__ ("lxvd2x %x0,%1,%2\n\t"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "r" (offset), "r" ((uintptr_t)ptr)
|
||
|
+ : "memory", "r0");
|
||
|
+#ifndef WORDS_BIGENDIAN
|
||
|
+ __asm__ ("xxswapd %x0, %x1"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "wa" (vecu64));
|
||
|
+#endif
|
||
|
+ return vecu64;
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
+static ASM_FUNC_ATTR_INLINE void
|
||
|
+vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr)
|
||
|
+{
|
||
|
+#ifndef WORDS_BIGENDIAN
|
||
|
+ __asm__ ("xxswapd %x0, %x1"
|
||
|
+ : "=wa" (vecu64)
|
||
|
+ : "wa" (vecu64));
|
||
|
+#endif
|
||
|
+#if __GNUC__ >= 4
|
||
|
+ if (__builtin_constant_p (offset) && offset == 0)
|
||
|
+ __asm__ ("stxvd2x %x0,0,%1\n\t"
|
||
|
+ :
|
||
|
+ : "wa" (vecu64), "r" ((uintptr_t)ptr)
|
||
|
+ : "memory");
|
||
|
+ else
|
||
|
+#endif
|
||
|
+ __asm__ ("stxvd2x %x0,%1,%2\n\t"
|
||
|
+ :
|
||
|
+ : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr)
|
||
|
+ : "memory", "r0");
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
/* SHA2 round in vector registers */
|
||
|
#define R(a,b,c,d,e,f,g,h,k,w) do \
|
||
|
{ \
|
||
|
@@ -168,13 +216,13 @@ _gcry_sha512_transform_ppc8(u64 state[8],
|
||
|
vector2x_u64 a, b, c, d, e, f, g, h, t1, t2;
|
||
|
u64 w[16];
|
||
|
|
||
|
- h0 = vec_vsx_ld (8 * 0, (unsigned long long *)state);
|
||
|
+ h0 = vec_u64_load (8 * 0, (unsigned long long *)state);
|
||
|
h1 = vec_rol_elems (h0, 1);
|
||
|
- h2 = vec_vsx_ld (8 * 2, (unsigned long long *)state);
|
||
|
+ h2 = vec_u64_load (8 * 2, (unsigned long long *)state);
|
||
|
h3 = vec_rol_elems (h2, 1);
|
||
|
- h4 = vec_vsx_ld (8 * 4, (unsigned long long *)state);
|
||
|
+ h4 = vec_u64_load (8 * 4, (unsigned long long *)state);
|
||
|
h5 = vec_rol_elems (h4, 1);
|
||
|
- h6 = vec_vsx_ld (8 * 6, (unsigned long long *)state);
|
||
|
+ h6 = vec_u64_load (8 * 6, (unsigned long long *)state);
|
||
|
h7 = vec_rol_elems (h6, 1);
|
||
|
|
||
|
while (nblks >= 2)
|
||
|
@@ -514,10 +562,10 @@ _gcry_sha512_transform_ppc8(u64 state[8],
|
||
|
h2 = vec_merge_idx0_elems (h2, h3);
|
||
|
h4 = vec_merge_idx0_elems (h4, h5);
|
||
|
h6 = vec_merge_idx0_elems (h6, h7);
|
||
|
- vec_vsx_st (h0, 8 * 0, (unsigned long long *)state);
|
||
|
- vec_vsx_st (h2, 8 * 2, (unsigned long long *)state);
|
||
|
- vec_vsx_st (h4, 8 * 4, (unsigned long long *)state);
|
||
|
- vec_vsx_st (h6, 8 * 6, (unsigned long long *)state);
|
||
|
+ vec_u64_store (h0, 8 * 0, (unsigned long long *)state);
|
||
|
+ vec_u64_store (h2, 8 * 2, (unsigned long long *)state);
|
||
|
+ vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
|
||
|
+ vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
|
||
|
|
||
|
return sizeof(w);
|
||
|
}
|
||
|
diff --git a/configure.ac b/configure.ac
|
||
|
index b6b6455a..be35ce42 100644
|
||
|
--- a/configure.ac
|
||
|
+++ b/configure.ac
|
||
|
@@ -1745,10 +1745,12 @@ AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
|
||
|
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||
|
[[#include <altivec.h>
|
||
|
typedef vector unsigned char block;
|
||
|
+ typedef vector unsigned int vecu32;
|
||
|
block fn(block in)
|
||
|
{
|
||
|
block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
|
||
|
- return vec_cipher_be (t, in);
|
||
|
+ vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
|
||
|
+ return vec_cipher_be (t, in) ^ (block)y;
|
||
|
}
|
||
|
]])],
|
||
|
[gcry_cv_cc_ppc_altivec=yes])
|
||
|
@@ -1769,10 +1771,12 @@ if test "$gcry_cv_cc_ppc_altivec" = "no" &&
|
||
|
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||
|
[[#include <altivec.h>
|
||
|
typedef vector unsigned char block;
|
||
|
+ typedef vector unsigned int vecu32;
|
||
|
block fn(block in)
|
||
|
{
|
||
|
block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
|
||
|
- return vec_cipher_be (t, in);
|
||
|
+ vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
|
||
|
+ return vec_cipher_be (t, in) ^ (block)y;
|
||
|
}]])],
|
||
|
[gcry_cv_cc_ppc_altivec_cflags=yes])])
|
||
|
if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
|
||
|
|
||
|
diff --git a/configure.ac b/configure.ac
|
||
|
index 202ac888..fd447906 100644
|
||
|
--- a/configure.ac
|
||
|
+++ b/configure.ac
|
||
|
@@ -2562,13 +2562,13 @@ if test "$found" = "1" ; then
|
||
|
GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo"
|
||
|
;;
|
||
|
powerpc64le-*-*)
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
|
||
|
;;
|
||
|
powerpc64-*-*)
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
|
||
|
;;
|
||
|
powerpc-*-*)
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
|
||
|
;;
|
||
|
esac
|
||
|
fi
|
||
|
@@ -2635,17 +2635,17 @@ if test "$found" = "1" ; then
|
||
|
;;
|
||
|
powerpc64le-*-*)
|
||
|
# Build with the crypto extension implementation
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
|
||
|
;;
|
||
|
powerpc64-*-*)
|
||
|
# Big-Endian.
|
||
|
# Build with the crypto extension implementation
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
|
||
|
;;
|
||
|
powerpc-*-*)
|
||
|
# Big-Endian.
|
||
|
# Build with the crypto extension implementation
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
|
||
|
esac
|
||
|
fi
|
||
|
|
||
|
@@ -2667,17 +2667,17 @@ if test "$found" = "1" ; then
|
||
|
;;
|
||
|
powerpc64le-*-*)
|
||
|
# Build with the crypto extension implementation
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
|
||
|
;;
|
||
|
powerpc64-*-*)
|
||
|
# Big-Endian.
|
||
|
# Build with the crypto extension implementation
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
|
||
|
;;
|
||
|
powerpc-*-*)
|
||
|
# Big-Endian.
|
||
|
# Build with the crypto extension implementation
|
||
|
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
|
||
|
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
|
||
|
esac
|
||
|
|
||
|
if test x"$neonsupport" = xyes ; then
|