diff --git a/SOURCES/nettle-3.4.1-ecdsa-verify.patch b/SOURCES/nettle-3.4.1-ecdsa-verify.patch new file mode 100644 index 0000000..c46f185 --- /dev/null +++ b/SOURCES/nettle-3.4.1-ecdsa-verify.patch @@ -0,0 +1,109 @@ +From 932ea29845da1ae350d9c056cb2cb0379a66d642 Mon Sep 17 00:00:00 2001 +From: Daiki Ueno +Date: Tue, 30 Mar 2021 09:22:47 +0200 +Subject: [PATCH] Port upstream hardening of EC scaler multiplication + +Some internal functions used in point multiplications are known to +misbehave if the scaler is out-of-range. This performs canonical +reduction on scalers, before point multiplication. + +Signed-off-by: Daiki Ueno +--- + ecc-ecdsa-sign.c | 7 +++++-- + ecc-ecdsa-verify.c | 14 ++++++++++++-- + eddsa-hash.c | 9 +++++++-- + 3 files changed, 24 insertions(+), 6 deletions(-) + +diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c +index 3b9e9cc1..45062528 100644 +--- a/ecc-ecdsa-sign.c ++++ b/ecc-ecdsa-sign.c +@@ -62,6 +62,8 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc, + mp_limb_t *rp, mp_limb_t *sp, + mp_limb_t *scratch) + { ++ mp_limb_t cy; ++ + #define P scratch + #define kinv scratch /* Needs 5*ecc->p.size for computation */ + #define hp (scratch + ecc->p.size) /* NOTE: ecc->p.size + 1 limbs! */ +@@ -91,8 +93,9 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc, + ecc_modq_mul (ecc, tp, zp, rp); + ecc_modq_add (ecc, hp, hp, tp); + ecc_modq_mul (ecc, tp, hp, kinv); +- +- mpn_copyi (sp, tp, ecc->p.size); ++ /* Ensure canonical reduction. */ ++ cy = mpn_sub_n (sp, tp, ecc->q.m, ecc->q.size); ++ cnd_copy (cy, sp, tp, ecc->q.size); + #undef P + #undef hp + #undef kinv +diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c +index d7f5b684..6b8acb07 100644 +--- a/ecc-ecdsa-verify.c ++++ b/ecc-ecdsa-verify.c +@@ -75,6 +75,8 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, + const mp_limb_t *rp, const mp_limb_t *sp, + mp_limb_t *scratch) + { ++ mp_limb_t cy; ++ + /* Procedure, according to RFC 6090, "KT-I". q denotes the group + order. + +@@ -98,6 +100,7 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, + #define P1 (scratch + 4*ecc->p.size) + #define sinv (scratch) + #define hp (scratch + ecc->p.size) ++#define tp (scratch + 4*ecc->p.size) + + if (! (ecdsa_in_range (ecc, rp) + && ecdsa_in_range (ecc, sp))) +@@ -112,10 +115,16 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, + + /* u1 = h / s, P1 = u1 * G */ + ecc_hash (&ecc->q, hp, length, digest); +- ecc_modq_mul (ecc, u1, hp, sinv); ++ ecc_modq_mul (ecc, tp, hp, sinv); ++ /* Ensure canonical reduction. */ ++ cy = mpn_sub_n (u1, tp, ecc->q.m, ecc->q.size); ++ cnd_copy (cy, u1, tp, ecc->q.size); + + /* u2 = r / s, P2 = u2 * Y */ +- ecc_modq_mul (ecc, u2, rp, sinv); ++ ecc_modq_mul (ecc, hp, rp, sinv); ++ /* Ensure canonical reduction. */ ++ cy = mpn_sub_n (u2, hp, ecc->q.m, ecc->q.size); ++ cnd_copy (cy, u2, hp, ecc->q.size); + + /* Total storage: 5*ecc->p.size + ecc->mul_itch */ + ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size); +@@ -154,4 +163,5 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, + #undef u2 + #undef hp + #undef u1 ++#undef tp + } +diff --git a/eddsa-hash.c b/eddsa-hash.c +index 4fb79f1b..53c6fc49 100644 +--- a/eddsa-hash.c ++++ b/eddsa-hash.c +@@ -45,7 +45,12 @@ void + _eddsa_hash (const struct ecc_modulo *m, + mp_limb_t *rp, const uint8_t *digest) + { ++ mp_limb_t cy; ++ + size_t nbytes = 1 + m->bit_size / 8; +- mpn_set_base256_le (rp, 2*m->size, digest, 2*nbytes); +- m->mod (m, rp); ++ mpn_set_base256_le (rp + m->size, 2*m->size, digest, 2*nbytes); ++ m->mod (m, rp + m->size); ++ /* Ensure canonical reduction. */ ++ cy = mpn_sub_n (rp, rp + m->size, m->m, m->size); ++ cnd_copy (cy, rp, rp + m->size, m->size); + } +-- +2.30.2 + diff --git a/SOURCES/nettle-3.4.1-powerpc64-aes-asm.patch b/SOURCES/nettle-3.4.1-powerpc64-aes-asm.patch new file mode 100644 index 0000000..8bcdbe7 --- /dev/null +++ b/SOURCES/nettle-3.4.1-powerpc64-aes-asm.patch @@ -0,0 +1,1142 @@ +diff --git a/Makefile.in b/Makefile.in +index b43e494f..ec46a9df 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -189,7 +189,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \ + ed25519-sha512-pubkey.c \ + ed25519-sha512-sign.c ed25519-sha512-verify.c + +-OPT_SOURCES = fat-x86_64.c fat-arm.c mini-gmp.c ++OPT_SOURCES = fat-arm.c fat-ppc.c fat-x86_64.c mini-gmp.c + + HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h \ + base16.h base64.h bignum.h buffer.h camellia.h cast128.h \ +@@ -573,7 +573,8 @@ distdir: $(DISTFILES) + done + set -e; for d in sparc32 sparc64 x86 \ + x86_64 x86_64/aesni x86_64/fat \ +- arm arm/neon arm/v6 arm/fat ; do \ ++ arm arm/neon arm/v6 arm/fat \ ++ powerpc64 powerpc64/p8 powerpc64/fat ; do \ + mkdir "$(distdir)/$$d" ; \ + find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' ')' \ + -exec cp '{}' "$(distdir)/$$d" ';' ; \ +diff --git a/aes-decrypt-internal.c b/aes-decrypt-internal.c +index 709c52f9..9e8cf34a 100644 +--- a/aes-decrypt-internal.c ++++ b/aes-decrypt-internal.c +@@ -40,6 +40,16 @@ + #include "aes-internal.h" + #include "macros.h" + ++/* For fat builds */ ++#if HAVE_NATIVE_aes_decrypt ++void ++_nettle_aes_decrypt_c(unsigned rounds, const uint32_t *keys, ++ const struct aes_table *T, ++ size_t length, uint8_t *dst, ++ const uint8_t *src); ++#define _nettle_aes_decrypt _nettle_aes_decrypt_c ++#endif ++ + void + _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, + const struct aes_table *T, +diff --git a/aes-encrypt-internal.c b/aes-encrypt-internal.c +index 9f61386d..ad17e6c1 100644 +--- a/aes-encrypt-internal.c ++++ b/aes-encrypt-internal.c +@@ -40,6 +40,16 @@ + #include "aes-internal.h" + #include "macros.h" + ++/* For fat builds */ ++#if HAVE_NATIVE_aes_encrypt ++void ++_nettle_aes_encrypt_c(unsigned rounds, const uint32_t *keys, ++ const struct aes_table *T, ++ size_t length, uint8_t *dst, ++ const uint8_t *src); ++#define _nettle_aes_encrypt _nettle_aes_encrypt_c ++#endif ++ + void + _nettle_aes_encrypt(unsigned rounds, const uint32_t *keys, + const struct aes_table *T, +diff --git a/asm.m4 b/asm.m4 +index ee377a78..59d64098 100644 +--- a/asm.m4 ++++ b/asm.m4 +@@ -51,6 +51,14 @@ define(, + <.align ifelse(ALIGN_LOG,yes,,$1) + >) + ++define(, , ++WORDS_BIGENDIAN,no,<$2>, ++,WORDS_BIGENDIAN,< ++>) ++ m4exit(1)>)>) ++define(, , <$1>)>) ++ + dnl Struct defining macros + + dnl STRUCTURE(prefix) +diff --git a/config.m4.in b/config.m4.in +index 666e34b8..e480334d 100644 +--- a/config.m4.in ++++ b/config.m4.in +@@ -9,6 +9,7 @@ define(, <@W64_ABI@>)dnl + define(, <@ASM_RODATA@>)dnl + define(,<@ASM_X86_ENDBR@>)dnl + define(,<@ASM_X86_MARK_CET_ALIGN@>)dnl ++define(, <@ASM_WORDS_BIGENDIAN@>)dnl + divert(1) + @ASM_X86_MARK_CET@ + @ASM_MARK_NOEXEC_STACK@ +diff --git a/configure.ac b/configure.ac +index 090e43a4..788e6842 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -85,6 +85,10 @@ AC_ARG_ENABLE(x86-aesni, + AC_HELP_STRING([--enable-x86-aesni], [Enable x86_64 aes instructions. (default=no)]),, + [enable_x86_aesni=no]) + ++AC_ARG_ENABLE(power-crypto-ext, ++ AC_HELP_STRING([--enable-power-crypto-ext], [Enable POWER crypto extensions. (default=no)]),, ++ [enable_power_crypto_ext=no]) ++ + AC_ARG_ENABLE(mini-gmp, + AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),, + [enable_mini_gmp=no]) +@@ -201,7 +205,11 @@ LSH_FUNC_STRERROR + # getenv_secure is used for fat overrides, + # getline is used in the testsuite + AC_CHECK_FUNCS(secure_getenv getline) +-AC_C_BIGENDIAN ++ ++ASM_WORDS_BIGENDIAN=unknown ++AC_C_BIGENDIAN([AC_DEFINE([WORDS_BIGENDIAN], 1) ++ ASM_WORDS_BIGENDIAN=yes], ++ [ASM_WORDS_BIGENDIAN=no]) + + LSH_GCC_ATTRIBUTES + +@@ -310,6 +318,17 @@ case "$host_cpu" in + AC_TRY_COMPILE([ + #if defined(__sgi) && defined(__LP64__) + #error 64-bit mips ++#endif ++ ], [], [ ++ ABI=32 ++ ], [ ++ ABI=64 ++ ]) ++ ;; ++ *powerpc64*) ++ AC_TRY_COMPILE([ ++#if defined(__PPC64__) ++#error 64-bit powerpc + #endif + ], [], [ + ABI=32 +@@ -422,6 +441,18 @@ if test "x$enable_assembler" = xyes ; then + esac + fi + ;; ++ *powerpc64*) ++ if test "$ABI" = 64 ; then ++ asm_path="powerpc64" ++ if test "x$enable_fat" = xyes ; then ++ asm_path="powerpc64/fat $asm_path" ++ OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES" ++ elif test "x$enable_power_crypto_ext" = xyes ; then ++ asm_path="powerpc64/p8 $asm_path" ++ fi ++ fi ++ ;; ++ + *) + enable_assembler=no + ;; +@@ -544,6 +575,8 @@ AC_SUBST([IF_ASM]) + AH_VERBATIM([HAVE_NATIVE], + [/* Define to 1 each of the following for which a native (ie. CPU specific) + implementation of the corresponding routine exists. */ ++#undef HAVE_NATIVE_aes_decrypt ++#undef HAVE_NATIVE_aes_encrypt + #undef HAVE_NATIVE_ecc_192_modp + #undef HAVE_NATIVE_ecc_192_redc + #undef HAVE_NATIVE_ecc_224_modp +@@ -857,6 +890,7 @@ AC_SUBST(ASM_TYPE_PROGBITS) + AC_SUBST(ASM_MARK_NOEXEC_STACK) + AC_SUBST(ASM_ALIGN_LOG) + AC_SUBST(W64_ABI) ++AC_SUBST(ASM_WORDS_BIGENDIAN) + AC_SUBST(EMULATOR) + AC_SUBST(ASM_X86_ENDBR) + AC_SUBST(ASM_X86_MARK_CET) +diff --git a/fat-ppc.c b/fat-ppc.c +new file mode 100644 +index 00000000..7198e2dd +--- /dev/null ++++ b/fat-ppc.c +@@ -0,0 +1,129 @@ ++/* fat-ppc.c ++ ++ Copyright (C) 2020 Mamone Tarsha ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++*/ ++ ++#define _GNU_SOURCE ++ ++#if HAVE_CONFIG_H ++# include "config.h" ++#endif ++ ++#include ++#include ++#include ++#include ++#if defined(__FreeBSD__) && __FreeBSD__ < 12 ++#include ++#else ++#include ++#endif ++ ++#include "nettle-types.h" ++ ++#include "aes-internal.h" ++#include "gcm.h" ++#include "fat-setup.h" ++ ++/* Define from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel */ ++#ifndef PPC_FEATURE2_VEC_CRYPTO ++#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 ++#endif ++ ++struct ppc_features ++{ ++ int have_crypto_ext; ++}; ++ ++static void ++get_ppc_features (struct ppc_features *features) ++{ ++ unsigned long hwcap2 = 0; ++#if defined(__FreeBSD__) ++#if __FreeBSD__ < 12 ++ size_t len = sizeof(hwcap2); ++ sysctlbyname("hw.cpu_features2", &hwcap2, &len, NULL, 0); ++#else ++ elf_aux_info(AT_HWCAP2, &hwcap2, sizeof(hwcap2)); ++#endif ++#else ++ hwcap2 = getauxval(AT_HWCAP2); ++#endif ++ features->have_crypto_ext = ++ (hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO ? 1 : 0; ++} ++ ++DECLARE_FAT_FUNC(_nettle_aes_encrypt, aes_crypt_internal_func) ++DECLARE_FAT_FUNC_VAR(aes_encrypt, aes_crypt_internal_func, c) ++DECLARE_FAT_FUNC_VAR(aes_encrypt, aes_crypt_internal_func, ppc64) ++ ++DECLARE_FAT_FUNC(_nettle_aes_decrypt, aes_crypt_internal_func) ++DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, c) ++DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, ppc64) ++ ++static void CONSTRUCTOR ++fat_init (void) ++{ ++ struct ppc_features features; ++ int verbose; ++ ++ get_ppc_features (&features); ++ ++ verbose = getenv (ENV_VERBOSE) != NULL; ++ if (verbose) ++ fprintf (stderr, "libnettle: cpu features: %s\n", ++ features.have_crypto_ext ? "crypto extensions" : ""); ++ ++ if (features.have_crypto_ext) ++ { ++ if (verbose) ++ fprintf (stderr, "libnettle: enabling arch 2.07 code.\n"); ++ _nettle_aes_encrypt_vec = _nettle_aes_encrypt_ppc64; ++ _nettle_aes_decrypt_vec = _nettle_aes_decrypt_ppc64; ++ } ++ else ++ { ++ _nettle_aes_encrypt_vec = _nettle_aes_encrypt_c; ++ _nettle_aes_decrypt_vec = _nettle_aes_decrypt_c; ++ } ++} ++ ++DEFINE_FAT_FUNC(_nettle_aes_encrypt, void, ++ (unsigned rounds, const uint32_t *keys, ++ const struct aes_table *T, ++ size_t length, uint8_t *dst, ++ const uint8_t *src), ++ (rounds, keys, T, length, dst, src)) ++ ++DEFINE_FAT_FUNC(_nettle_aes_decrypt, void, ++ (unsigned rounds, const uint32_t *keys, ++ const struct aes_table *T, ++ size_t length, uint8_t *dst, ++ const uint8_t *src), ++ (rounds, keys, T, length, dst, src)) +diff --git a/powerpc64/fat/aes-decrypt-internal-2.asm b/powerpc64/fat/aes-decrypt-internal-2.asm +new file mode 100644 +index 00000000..3a4e08c2 +--- /dev/null ++++ b/powerpc64/fat/aes-decrypt-internal-2.asm +@@ -0,0 +1,37 @@ ++C powerpc64/fat/aes-decrypt-internal-2.asm ++ ++ ++ifelse(< ++ Copyright (C) 2020 Mamone Tarsha ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++>) ++ ++dnl PROLOGUE(_nettle_aes_decrypt) picked up by configure ++ ++define(, <$1_ppc64>) ++include_src() +diff --git a/powerpc64/fat/aes-encrypt-internal-2.asm b/powerpc64/fat/aes-encrypt-internal-2.asm +new file mode 100644 +index 00000000..42126e4f +--- /dev/null ++++ b/powerpc64/fat/aes-encrypt-internal-2.asm +@@ -0,0 +1,37 @@ ++C powerpc64/fat/aes-encrypt-internal-2.asm ++ ++ ++ifelse(< ++ Copyright (C) 2020 Mamone Tarsha ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++>) ++ ++dnl PROLOGUE(_nettle_aes_encrypt) picked up by configure ++ ++define(, <$1_ppc64>) ++include_src() +diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 +new file mode 100644 +index 00000000..b76bb8b1 +--- /dev/null ++++ b/powerpc64/machine.m4 +@@ -0,0 +1,36 @@ ++define(, ++<.globl C_NAME($1) ++DECLARE_FUNC(C_NAME($1)) ++ifelse(WORDS_BIGENDIAN,no, ++,<.align FUNC_ALIGN>) ++C_NAME($1): ++addis 2,12,(.TOC.-C_NAME($1))@ha ++addi 2,2,(.TOC.-C_NAME($1))@l ++.localentry C_NAME($1), .-C_NAME($1)>, ++<.section ".opd","aw" ++.align 3 ++C_NAME($1): ++.quad .C_NAME($1),.TOC.@tocbase,0 ++.previous ++ifdef(,<.align FUNC_ALIGN>) ++.C_NAME($1):>) ++undefine()>) ++ ++define(, ++, ++<.size .C_NAME($1), . - .C_NAME($1) ++.size C_NAME($1), . - .C_NAME($1)>)>) ++ ++C Get vector-scalar register from vector register ++C VSR(VR) ++define(,<32+$1>) ++ ++C Load the quadword in DATA_SRC storage into ++C VEC_DST. GPR is general-purpose register ++C used to obtain the effective address of ++C DATA_SRC storage. ++C DATA_LOAD_VEC(VEC_DST, DATA_SRC, GPR) ++define(, ++) +diff --git a/powerpc64/p8/aes-decrypt-internal.asm b/powerpc64/p8/aes-decrypt-internal.asm +new file mode 100644 +index 00000000..bfedb32b +--- /dev/null ++++ b/powerpc64/p8/aes-decrypt-internal.asm +@@ -0,0 +1,356 @@ ++C powerpc64/p8/aes-decrypt-internal.asm ++ ++ifelse(< ++ Copyright (C) 2020 Mamone Tarsha ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++>) ++ ++C Register usage: ++ ++define(, <1>) ++define(, <2>) ++ ++define(, <3>) ++define(, <4>) ++define(, <6>) ++define(, <7>) ++define(, <8>) ++ ++define(, <0>) ++ ++define(, <1>) ++define(, <2>) ++define(, <3>) ++define(, <4>) ++define(, <5>) ++define(, <6>) ++define(, <7>) ++define(, <8>) ++define(, <9>) ++ ++C ZERO vector register is used in place of RoundKey ++C for vncipher instruction because the order of InvMixColumns ++C and Xor processes are flipped in that instruction. ++C The Xor process with RoundKey is executed afterward. ++define(, <10>) ++ ++.file "aes-decrypt-internal.asm" ++ ++.text ++ ++ C _aes_decrypt(unsigned rounds, const uint32_t *keys, ++ C const struct aes_table *T, ++ C size_t length, uint8_t *dst, ++ C uint8_t *src) ++ ++define(, <5>) ++PROLOGUE(_nettle_aes_decrypt) ++ vxor ZERO,ZERO,ZERO ++ ++ DATA_LOAD_VEC(swap_mask,.swap_mask,5) ++ ++ subi ROUNDS,ROUNDS,1 ++ srdi LENGTH,LENGTH,4 ++ ++ srdi 5,LENGTH,3 #8x loop count ++ cmpldi 5,0 ++ beq L4x ++ ++ std 25,-56(SP); ++ std 26,-48(SP); ++ std 27,-40(SP); ++ std 28,-32(SP); ++ std 29,-24(SP); ++ std 30,-16(SP); ++ std 31,-8(SP); ++ ++ li 25,0x10 ++ li 26,0x20 ++ li 27,0x30 ++ li 28,0x40 ++ li 29,0x50 ++ li 30,0x60 ++ li 31,0x70 ++ ++.align 5 ++Lx8_loop: ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ lxvd2x VSR(S1),25,SRC ++ lxvd2x VSR(S2),26,SRC ++ lxvd2x VSR(S3),27,SRC ++ lxvd2x VSR(S4),28,SRC ++ lxvd2x VSR(S5),29,SRC ++ lxvd2x VSR(S6),30,SRC ++ lxvd2x VSR(S7),31,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ vxor S1,S1,K ++ vxor S2,S2,K ++ vxor S3,S3,K ++ vxor S4,S4,K ++ vxor S5,S5,K ++ vxor S6,S6,K ++ vxor S7,S7,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L8x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipher S0,S0,ZERO ++ vncipher S1,S1,ZERO ++ vncipher S2,S2,ZERO ++ vncipher S3,S3,ZERO ++ vncipher S4,S4,ZERO ++ vncipher S5,S5,ZERO ++ vncipher S6,S6,ZERO ++ vncipher S7,S7,ZERO ++ vxor S0,S0,K ++ vxor S1,S1,K ++ vxor S2,S2,K ++ vxor S3,S3,K ++ vxor S4,S4,K ++ vxor S5,S5,K ++ vxor S6,S6,K ++ vxor S7,S7,K ++ addi 10,10,0x10 ++ bdnz L8x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipherlast S0,S0,K ++ vncipherlast S1,S1,K ++ vncipherlast S2,S2,K ++ vncipherlast S3,S3,K ++ vncipherlast S4,S4,K ++ vncipherlast S5,S5,K ++ vncipherlast S6,S6,K ++ vncipherlast S7,S7,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ stxvd2x VSR(S1),25,DST ++ stxvd2x VSR(S2),26,DST ++ stxvd2x VSR(S3),27,DST ++ stxvd2x VSR(S4),28,DST ++ stxvd2x VSR(S5),29,DST ++ stxvd2x VSR(S6),30,DST ++ stxvd2x VSR(S7),31,DST ++ ++ addi SRC,SRC,0x80 ++ addi DST,DST,0x80 ++ subic. 5,5,1 ++ bne Lx8_loop ++ ++ ld 25,-56(SP); ++ ld 26,-48(SP); ++ ld 27,-40(SP); ++ ld 28,-32(SP); ++ ld 29,-24(SP); ++ ld 30,-16(SP); ++ ld 31,-8(SP); ++ ++ clrldi LENGTH,LENGTH,61 ++ ++L4x: ++ srdi 5,LENGTH,2 ++ cmpldi 5,0 ++ beq L2x ++ ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ li 9,0x10 ++ lxvd2x VSR(S1),9,SRC ++ addi 9,9,0x10 ++ lxvd2x VSR(S2),9,SRC ++ addi 9,9,0x10 ++ lxvd2x VSR(S3),9,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ vxor S1,S1,K ++ vxor S2,S2,K ++ vxor S3,S3,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L4x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipher S0,S0,ZERO ++ vncipher S1,S1,ZERO ++ vncipher S2,S2,ZERO ++ vncipher S3,S3,ZERO ++ vxor S0,S0,K ++ vxor S1,S1,K ++ vxor S2,S2,K ++ vxor S3,S3,K ++ addi 10,10,0x10 ++ bdnz L4x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipherlast S0,S0,K ++ vncipherlast S1,S1,K ++ vncipherlast S2,S2,K ++ vncipherlast S3,S3,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ li 9,0x10 ++ stxvd2x VSR(S1),9,DST ++ addi 9,9,0x10 ++ stxvd2x VSR(S2),9,DST ++ addi 9,9,0x10 ++ stxvd2x VSR(S3),9,DST ++ ++ addi SRC,SRC,0x40 ++ addi DST,DST,0x40 ++ ++ clrldi LENGTH,LENGTH,62 ++ ++L2x: ++ srdi 5,LENGTH,1 ++ cmpldi 5,0 ++ beq L1x ++ ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ li 9,0x10 ++ lxvd2x VSR(S1),9,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ vxor S1,S1,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L2x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipher S0,S0,ZERO ++ vncipher S1,S1,ZERO ++ vxor S0,S0,K ++ vxor S1,S1,K ++ addi 10,10,0x10 ++ bdnz L2x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipherlast S0,S0,K ++ vncipherlast S1,S1,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ li 9,0x10 ++ stxvd2x VSR(S1),9,DST ++ ++ addi SRC,SRC,0x20 ++ addi DST,DST,0x20 ++ ++ clrldi LENGTH,LENGTH,63 ++ ++L1x: ++ cmpldi LENGTH,0 ++ beq Ldone ++ ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L1x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipher S0,S0,ZERO ++ vxor S0,S0,K ++ addi 10,10,0x10 ++ bdnz L1x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vncipherlast S0,S0,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ ++Ldone: ++ blr ++EPILOGUE(_nettle_aes_decrypt) ++ ++ .data ++ .align 4 ++.swap_mask: ++IF_LE(<.byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7>) ++IF_BE(<.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12>) +diff --git a/powerpc64/p8/aes-encrypt-internal.asm b/powerpc64/p8/aes-encrypt-internal.asm +new file mode 100644 +index 00000000..67c7e597 +--- /dev/null ++++ b/powerpc64/p8/aes-encrypt-internal.asm +@@ -0,0 +1,333 @@ ++C powerpc64/p8/aes-encrypt-internal.asm ++ ++ifelse(< ++ Copyright (C) 2020 Mamone Tarsha ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++>) ++ ++C Register usage: ++ ++define(, <1>) ++define(, <2>) ++ ++define(, <3>) ++define(, <4>) ++define(, <6>) ++define(, <7>) ++define(, <8>) ++ ++define(, <0>) ++ ++define(, <1>) ++define(, <2>) ++define(, <3>) ++define(, <4>) ++define(, <5>) ++define(, <6>) ++define(, <7>) ++define(, <8>) ++define(, <9>) ++ ++.file "aes-encrypt-internal.asm" ++ ++.text ++ ++ C _aes_encrypt(unsigned rounds, const uint32_t *keys, ++ C const struct aes_table *T, ++ C size_t length, uint8_t *dst, ++ C uint8_t *src) ++ ++define(, <5>) ++PROLOGUE(_nettle_aes_encrypt) ++ DATA_LOAD_VEC(swap_mask,.swap_mask,5) ++ ++ subi ROUNDS,ROUNDS,1 ++ srdi LENGTH,LENGTH,4 ++ ++ srdi 5,LENGTH,3 #8x loop count ++ cmpldi 5,0 ++ beq L4x ++ ++ std 25,-56(SP); ++ std 26,-48(SP); ++ std 27,-40(SP); ++ std 28,-32(SP); ++ std 29,-24(SP); ++ std 30,-16(SP); ++ std 31,-8(SP); ++ ++ li 25,0x10 ++ li 26,0x20 ++ li 27,0x30 ++ li 28,0x40 ++ li 29,0x50 ++ li 30,0x60 ++ li 31,0x70 ++ ++.align 5 ++Lx8_loop: ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ lxvd2x VSR(S1),25,SRC ++ lxvd2x VSR(S2),26,SRC ++ lxvd2x VSR(S3),27,SRC ++ lxvd2x VSR(S4),28,SRC ++ lxvd2x VSR(S5),29,SRC ++ lxvd2x VSR(S6),30,SRC ++ lxvd2x VSR(S7),31,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ vxor S1,S1,K ++ vxor S2,S2,K ++ vxor S3,S3,K ++ vxor S4,S4,K ++ vxor S5,S5,K ++ vxor S6,S6,K ++ vxor S7,S7,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L8x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipher S0,S0,K ++ vcipher S1,S1,K ++ vcipher S2,S2,K ++ vcipher S3,S3,K ++ vcipher S4,S4,K ++ vcipher S5,S5,K ++ vcipher S6,S6,K ++ vcipher S7,S7,K ++ addi 10,10,0x10 ++ bdnz L8x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipherlast S0,S0,K ++ vcipherlast S1,S1,K ++ vcipherlast S2,S2,K ++ vcipherlast S3,S3,K ++ vcipherlast S4,S4,K ++ vcipherlast S5,S5,K ++ vcipherlast S6,S6,K ++ vcipherlast S7,S7,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ stxvd2x VSR(S1),25,DST ++ stxvd2x VSR(S2),26,DST ++ stxvd2x VSR(S3),27,DST ++ stxvd2x VSR(S4),28,DST ++ stxvd2x VSR(S5),29,DST ++ stxvd2x VSR(S6),30,DST ++ stxvd2x VSR(S7),31,DST ++ ++ addi SRC,SRC,0x80 ++ addi DST,DST,0x80 ++ subic. 5,5,1 ++ bne Lx8_loop ++ ++ ld 25,-56(SP); ++ ld 26,-48(SP); ++ ld 27,-40(SP); ++ ld 28,-32(SP); ++ ld 29,-24(SP); ++ ld 30,-16(SP); ++ ld 31,-8(SP); ++ ++ clrldi LENGTH,LENGTH,61 ++ ++L4x: ++ srdi 5,LENGTH,2 ++ cmpldi 5,0 ++ beq L2x ++ ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ li 9,0x10 ++ lxvd2x VSR(S1),9,SRC ++ addi 9,9,0x10 ++ lxvd2x VSR(S2),9,SRC ++ addi 9,9,0x10 ++ lxvd2x VSR(S3),9,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ vxor S1,S1,K ++ vxor S2,S2,K ++ vxor S3,S3,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L4x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipher S0,S0,K ++ vcipher S1,S1,K ++ vcipher S2,S2,K ++ vcipher S3,S3,K ++ addi 10,10,0x10 ++ bdnz L4x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipherlast S0,S0,K ++ vcipherlast S1,S1,K ++ vcipherlast S2,S2,K ++ vcipherlast S3,S3,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ li 9,0x10 ++ stxvd2x VSR(S1),9,DST ++ addi 9,9,0x10 ++ stxvd2x VSR(S2),9,DST ++ addi 9,9,0x10 ++ stxvd2x VSR(S3),9,DST ++ ++ addi SRC,SRC,0x40 ++ addi DST,DST,0x40 ++ ++ clrldi LENGTH,LENGTH,62 ++ ++L2x: ++ srdi 5,LENGTH,1 ++ cmpldi 5,0 ++ beq L1x ++ ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ li 9,0x10 ++ lxvd2x VSR(S1),9,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ vxor S1,S1,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L2x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipher S0,S0,K ++ vcipher S1,S1,K ++ addi 10,10,0x10 ++ bdnz L2x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipherlast S0,S0,K ++ vcipherlast S1,S1,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ li 9,0x10 ++ stxvd2x VSR(S1),9,DST ++ ++ addi SRC,SRC,0x20 ++ addi DST,DST,0x20 ++ ++ clrldi LENGTH,LENGTH,63 ++ ++L1x: ++ cmpldi LENGTH,0 ++ beq Ldone ++ ++ lxvd2x VSR(K),0,KEYS ++ vperm K,K,K,swap_mask ++ ++ lxvd2x VSR(S0),0,SRC ++ ++IF_LE() ++ ++ vxor S0,S0,K ++ ++ mtctr ROUNDS ++ li 10,0x10 ++.align 5 ++L1x_round_loop: ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipher S0,S0,K ++ addi 10,10,0x10 ++ bdnz L1x_round_loop ++ ++ lxvd2x VSR(K),10,KEYS ++ vperm K,K,K,swap_mask ++ vcipherlast S0,S0,K ++ ++IF_LE() ++ ++ stxvd2x VSR(S0),0,DST ++ ++Ldone: ++ blr ++EPILOGUE(_nettle_aes_encrypt) ++ ++ .data ++ .align 4 ++.swap_mask: ++IF_LE(<.byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7>) ++IF_BE(<.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12>) diff --git a/SOURCES/nettle-3.4.1-powerpc64-ghash-asm.patch b/SOURCES/nettle-3.4.1-powerpc64-ghash-asm.patch new file mode 100644 index 0000000..255adbd --- /dev/null +++ b/SOURCES/nettle-3.4.1-powerpc64-ghash-asm.patch @@ -0,0 +1,1519 @@ +diff -up ./configure.ac.ghash ./configure.ac +--- ./configure.ac.ghash 2021-07-14 14:11:58.126891572 +0200 ++++ ./configure.ac 2021-07-14 14:11:58.130891552 +0200 +@@ -211,6 +211,22 @@ AC_C_BIGENDIAN([AC_DEFINE([WORDS_BIGENDI + ASM_WORDS_BIGENDIAN=yes], + [ASM_WORDS_BIGENDIAN=no]) + ++AC_CACHE_CHECK([for __builtin_bswap64], ++ nettle_cv_c_builtin_bswap64, ++[AC_TRY_LINK([ ++#include ++],[ ++uint64_t x = 17; ++uint64_t y = __builtin_bswap64(x); ++], ++nettle_cv_c_builtin_bswap64=yes, ++nettle_cv_c_builtin_bswap64=no)]) ++ ++AH_TEMPLATE([HAVE_BUILTIN_BSWAP64], [Define if __builtin_bswap64 is available]) ++if test "x$nettle_cv_c_builtin_bswap64" = "xyes" ; then ++ AC_DEFINE(HAVE_BUILTIN_BSWAP64) ++fi ++ + LSH_GCC_ATTRIBUTES + + # According to Simon Josefsson, looking for uint32_t and friends in +@@ -472,7 +488,7 @@ asm_replace_list="aes-encrypt-internal.a + sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4" + + # Assembler files which generate additional object files if they are used. +-asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \ ++asm_nettle_optional_list="gcm-hash.asm gcm-hash8.asm cpuid.asm \ + aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm \ + salsa20-core-internal-2.asm sha1-compress-2.asm sha256-compress-2.asm \ + sha3-permute-2.asm sha512-compress-2.asm \ +@@ -588,6 +604,10 @@ AH_VERBATIM([HAVE_NATIVE], + #undef HAVE_NATIVE_ecc_384_redc + #undef HAVE_NATIVE_ecc_521_modp + #undef HAVE_NATIVE_ecc_521_redc ++#undef HAVE_NATIVE_gcm_init_key ++#undef HAVE_NATIVE_fat_gcm_init_key ++#undef HAVE_NATIVE_gcm_hash ++#undef HAVE_NATIVE_fat_gcm_hash + #undef HAVE_NATIVE_gcm_hash8 + #undef HAVE_NATIVE_salsa20_core + #undef HAVE_NATIVE_sha1_compress +diff -up ./ctr16.c.ghash ./ctr16.c +--- ./ctr16.c.ghash 2021-07-14 14:11:58.130891552 +0200 ++++ ./ctr16.c 2021-07-14 14:11:58.130891552 +0200 +@@ -0,0 +1,106 @@ ++/* ctr16.c ++ ++ Cipher counter mode, optimized for 16-byte blocks. ++ ++ Copyright (C) 2005-2018 Niels Möller ++ Copyright (C) 2018 Red Hat, Inc. ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++*/ ++ ++#if HAVE_CONFIG_H ++# include "config.h" ++#endif ++ ++#include ++ ++#include "ctr.h" ++ ++#include "ctr-internal.h" ++#include "memxor.h" ++#include "nettle-internal.h" ++ ++#define MIN(a,b) (((a) < (b)) ? (a) : (b)) ++ ++void ++_ctr_crypt16(const void *ctx, nettle_cipher_func *f, ++ nettle_fill16_func *fill, uint8_t *ctr, ++ size_t length, uint8_t *dst, ++ const uint8_t *src) ++{ ++ if (dst != src && !((uintptr_t) dst % sizeof(uint64_t))) ++ { ++ size_t blocks = length / 16u; ++ size_t done; ++ fill (ctr, blocks, (union nettle_block16 *) dst); ++ ++ done = blocks * 16; ++ f(ctx, done, dst, dst); ++ memxor (dst, src, done); ++ ++ length -= done; ++ if (length > 0) ++ { /* Left-over partial block */ ++ union nettle_block16 block; ++ dst += done; ++ src += done; ++ assert (length < 16); ++ /* Use fill, to update ctr value in the same way in all cases. */ ++ fill (ctr, 1, &block); ++ f (ctx, 16, block.b, block.b); ++ memxor3 (dst, src, block.b, length); ++ } ++ } ++ else ++ { ++ /* Construct an aligned buffer of consecutive counter values, of ++ size at most CTR_BUFFER_LIMIT. */ ++ TMP_DECL(buffer, union nettle_block16, CTR_BUFFER_LIMIT / 16); ++ size_t blocks = (length + 15) / 16u; ++ size_t i; ++ TMP_ALLOC(buffer, MIN(blocks, CTR_BUFFER_LIMIT / 16)); ++ ++ for (i = 0; blocks >= CTR_BUFFER_LIMIT / 16; ++ i += CTR_BUFFER_LIMIT, blocks -= CTR_BUFFER_LIMIT / 16) ++ { ++ fill (ctr, CTR_BUFFER_LIMIT / 16, buffer); ++ f(ctx, CTR_BUFFER_LIMIT, buffer->b, buffer->b); ++ if (length - i < CTR_BUFFER_LIMIT) ++ goto done; ++ memxor3 (dst + i, src + i, buffer->b, CTR_BUFFER_LIMIT); ++ } ++ ++ if (blocks > 0) ++ { ++ assert (length - i < CTR_BUFFER_LIMIT); ++ fill (ctr, blocks, buffer); ++ f(ctx, blocks * 16, buffer->b, buffer->b); ++ done: ++ memxor3 (dst + i, src + i, buffer->b, length - i); ++ } ++ } ++} +diff -up ./ctr.c.ghash ./ctr.c +--- ./ctr.c.ghash 2018-12-04 21:56:05.000000000 +0100 ++++ ./ctr.c 2021-07-14 14:13:07.714539484 +0200 +@@ -41,11 +41,83 @@ + + #include "ctr.h" + ++#include "ctr-internal.h" + #include "macros.h" + #include "memxor.h" + #include "nettle-internal.h" + +-#define NBLOCKS 4 ++#define MIN(a,b) (((a) < (b)) ? (a) : (b)) ++ ++/* The 'u64' member has been added in the public header ++ (nettle-types.h). Check that the alignment is not affected with ++ it using _Static_assert. */ ++union nettle_block16_ ++{ ++ uint8_t b[16]; ++ unsigned long w[16 / sizeof(unsigned long)]; ++}; ++_Static_assert(__alignof(union nettle_block16_) == __alignof(union nettle_block16), ++ "nettle_block16 alignment should be preserved"); ++ ++static size_t ++ctr_fill (size_t block_size, uint8_t *ctr, size_t length, uint8_t *buffer) ++{ ++ size_t i; ++ for (i = 0; i + block_size <= length; i += block_size) ++ { ++ memcpy (buffer + i, ctr, block_size); ++ INCREMENT(block_size, ctr); ++ } ++ return i; ++} ++ ++#if WORDS_BIGENDIAN ++# define USE_CTR_CRYPT16 1 ++static nettle_fill16_func ctr_fill16; ++static void ++ctr_fill16(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) ++{ ++ uint64_t hi, lo; ++ size_t i; ++ hi = READ_UINT64(ctr); ++ lo = READ_UINT64(ctr + 8); ++ ++ for (i = 0; i < blocks; i++) ++ { ++ buffer[i].u64[0] = hi; ++ buffer[i].u64[1] = lo; ++ hi += !(++lo); ++ } ++ WRITE_UINT64(ctr, hi); ++ WRITE_UINT64(ctr + 8, lo); ++} ++#else /* !WORDS_BIGENDIAN */ ++# if HAVE_BUILTIN_BSWAP64 ++# define USE_CTR_CRYPT16 1 ++static nettle_fill16_func ctr_fill16; ++static void ++ctr_fill16(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) ++{ ++ uint64_t hi, lo; ++ size_t i; ++ /* Read hi in native endianness */ ++ hi = LE_READ_UINT64(ctr); ++ lo = READ_UINT64(ctr + 8); ++ ++ for (i = 0; i < blocks; i++) ++ { ++ buffer[i].u64[0] = hi; ++ buffer[i].u64[1] = __builtin_bswap64(lo); ++ if (!++lo) ++ hi = __builtin_bswap64(__builtin_bswap64(hi) + 1); ++ } ++ LE_WRITE_UINT64(ctr, hi); ++ WRITE_UINT64(ctr + 8, lo); ++} ++# else /* ! HAVE_BUILTIN_BSWAP64 */ ++# define USE_CTR_CRYPT16 0 ++# endif ++#endif /* !WORDS_BIGENDIAN */ + + void + ctr_crypt(const void *ctx, nettle_cipher_func *f, +@@ -53,84 +125,64 @@ ctr_crypt(const void *ctx, nettle_cipher + size_t length, uint8_t *dst, + const uint8_t *src) + { +- if (src != dst) ++#if USE_CTR_CRYPT16 ++ if (block_size == 16) + { +- if (length == block_size) +- { +- f(ctx, block_size, dst, ctr); +- INCREMENT(block_size, ctr); +- memxor(dst, src, block_size); +- } +- else ++ _ctr_crypt16(ctx, f, ctr_fill16, ctr, length, dst, src); ++ return; ++ } ++#endif ++ ++ if(src != dst) ++ { ++ size_t filled = ctr_fill (block_size, ctr, length, dst); ++ ++ f(ctx, filled, dst, dst); ++ memxor(dst, src, filled); ++ ++ if (filled < length) + { +- size_t left; +- uint8_t *p; ++ TMP_DECL(block, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); ++ TMP_ALLOC(block, block_size); + +- for (p = dst, left = length; +- left >= block_size; +- left -= block_size, p += block_size) +- { +- memcpy (p, ctr, block_size); +- INCREMENT(block_size, ctr); +- } +- +- f(ctx, length - left, dst, dst); +- memxor(dst, src, length - left); +- +- if (left) +- { +- TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); +- TMP_ALLOC(buffer, block_size); +- +- f(ctx, block_size, buffer, ctr); +- INCREMENT(block_size, ctr); +- memxor3(dst + length - left, src + length - left, buffer, left); +- } ++ f(ctx, block_size, block, ctr); ++ INCREMENT(block_size, ctr); ++ memxor3(dst + filled, src + filled, block, length - filled); + } + } + else + { +- if (length > block_size) +- { +- TMP_DECL(buffer, uint8_t, NBLOCKS * NETTLE_MAX_CIPHER_BLOCK_SIZE); +- size_t chunk = NBLOCKS * block_size; ++ /* For in-place CTR, construct a buffer of consecutive counter ++ values, of size at most CTR_BUFFER_LIMIT. */ ++ TMP_DECL(buffer, uint8_t, CTR_BUFFER_LIMIT); ++ ++ size_t buffer_size; ++ if (length < block_size) ++ buffer_size = block_size; ++ else if (length <= CTR_BUFFER_LIMIT) ++ buffer_size = length; ++ else ++ buffer_size = CTR_BUFFER_LIMIT; + +- TMP_ALLOC(buffer, chunk); ++ TMP_ALLOC(buffer, buffer_size); + +- for (; length >= chunk; +- length -= chunk, src += chunk, dst += chunk) +- { +- unsigned n; +- uint8_t *p; +- for (n = 0, p = buffer; n < NBLOCKS; n++, p += block_size) +- { +- memcpy (p, ctr, block_size); +- INCREMENT(block_size, ctr); +- } +- f(ctx, chunk, buffer, buffer); +- memxor(dst, buffer, chunk); +- } +- +- if (length > 0) +- { +- /* Final, possibly partial, blocks */ +- for (chunk = 0; chunk < length; chunk += block_size) +- { +- memcpy (buffer + chunk, ctr, block_size); +- INCREMENT(block_size, ctr); +- } +- f(ctx, chunk, buffer, buffer); +- memxor3(dst, src, buffer, length); +- } ++ while (length >= block_size) ++ { ++ size_t filled ++ = ctr_fill (block_size, ctr, MIN(buffer_size, length), buffer); ++ assert (filled > 0); ++ f(ctx, filled, buffer, buffer); ++ memxor(dst, buffer, filled); ++ length -= filled; ++ dst += filled; + } +- else if (length > 0) +- { +- TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); +- TMP_ALLOC(buffer, block_size); + ++ /* Final, possibly partial, block. */ ++ if (length > 0) ++ { + f(ctx, block_size, buffer, ctr); + INCREMENT(block_size, ctr); +- memxor3(dst, src, buffer, length); ++ memxor(dst, buffer, length); + } + } + } +diff -up ./ctr-internal.h.ghash ./ctr-internal.h +--- ./ctr-internal.h.ghash 2021-07-14 14:11:58.130891552 +0200 ++++ ./ctr-internal.h 2021-07-14 14:11:58.130891552 +0200 +@@ -0,0 +1,56 @@ ++/* ctr-internal.h ++ ++ Copyright (C) 2018 Niels Möller ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++*/ ++ ++#ifndef NETTLE_CTR_INTERNAL_H_INCLUDED ++#define NETTLE_CTR_INTERNAL_H_INCLUDED ++ ++#include "nettle-types.h" ++ ++/* Name mangling */ ++#define _ctr_crypt16 _nettle_ctr_crypt16 ++ ++/* Size limit for temporary stack buffers. */ ++#define CTR_BUFFER_LIMIT 512 ++ ++/* Fill BUFFER (n blocks) with incrementing CTR values. It would be ++ nice if CTR was always 64-bit aligned, but it isn't when called ++ from ctr_crypt. */ ++typedef void ++nettle_fill16_func(uint8_t *ctr, size_t n, union nettle_block16 *buffer); ++ ++void ++_ctr_crypt16(const void *ctx, nettle_cipher_func *f, ++ nettle_fill16_func *fill, uint8_t *ctr, ++ size_t length, uint8_t *dst, ++ const uint8_t *src); ++ ++ ++#endif /* NETTLE_CTR_INTERNAL_H_INCLUDED */ +diff -up ./fat-ppc.c.ghash ./fat-ppc.c +--- ./fat-ppc.c.ghash 2021-07-14 14:11:58.126891572 +0200 ++++ ./fat-ppc.c 2021-07-14 14:11:58.130891552 +0200 +@@ -49,6 +49,7 @@ + + #include "aes-internal.h" + #include "gcm.h" ++#include "gcm-internal.h" + #include "fat-setup.h" + + /* Define from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel */ +@@ -87,6 +88,16 @@ DECLARE_FAT_FUNC(_nettle_aes_decrypt, ae + DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, c) + DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, ppc64) + ++#if GCM_TABLE_BITS == 8 ++DECLARE_FAT_FUNC(_nettle_gcm_init_key, gcm_init_key_func) ++DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, c) ++DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, ppc64) ++ ++DECLARE_FAT_FUNC(_nettle_gcm_hash, gcm_hash_func) ++DECLARE_FAT_FUNC_VAR(gcm_hash, gcm_hash_func, c) ++DECLARE_FAT_FUNC_VAR(gcm_hash, gcm_hash_func, ppc64) ++#endif /* GCM_TABLE_BITS == 8 */ ++ + static void CONSTRUCTOR + fat_init (void) + { +@@ -101,17 +112,29 @@ fat_init (void) + features.have_crypto_ext ? "crypto extensions" : ""); + + if (features.have_crypto_ext) +- { +- if (verbose) +- fprintf (stderr, "libnettle: enabling arch 2.07 code.\n"); +- _nettle_aes_encrypt_vec = _nettle_aes_encrypt_ppc64; +- _nettle_aes_decrypt_vec = _nettle_aes_decrypt_ppc64; +- } ++ { ++ if (verbose) ++ fprintf (stderr, "libnettle: enabling arch 2.07 code.\n"); ++ _nettle_aes_encrypt_vec = _nettle_aes_encrypt_ppc64; ++ _nettle_aes_decrypt_vec = _nettle_aes_decrypt_ppc64; ++#if GCM_TABLE_BITS == 8 ++ /* Make sure _nettle_gcm_init_key_vec function is compatible ++ with _nettle_gcm_hash_vec function e.g. _nettle_gcm_init_key_c() ++ fills gcm_key table with values that are incompatible with ++ _nettle_gcm_hash_ppc64() */ ++ _nettle_gcm_init_key_vec = _nettle_gcm_init_key_ppc64; ++ _nettle_gcm_hash_vec = _nettle_gcm_hash_ppc64; ++#endif /* GCM_TABLE_BITS == 8 */ ++ } + else +- { +- _nettle_aes_encrypt_vec = _nettle_aes_encrypt_c; +- _nettle_aes_decrypt_vec = _nettle_aes_decrypt_c; +- } ++ { ++ _nettle_aes_encrypt_vec = _nettle_aes_encrypt_c; ++ _nettle_aes_decrypt_vec = _nettle_aes_decrypt_c; ++#if GCM_TABLE_BITS == 8 ++ _nettle_gcm_init_key_vec = _nettle_gcm_init_key_c; ++ _nettle_gcm_hash_vec = _nettle_gcm_hash_c; ++#endif /* GCM_TABLE_BITS == 8 */ ++ } + } + + DEFINE_FAT_FUNC(_nettle_aes_encrypt, void, +@@ -127,3 +150,14 @@ DEFINE_FAT_FUNC(_nettle_aes_decrypt, voi + size_t length, uint8_t *dst, + const uint8_t *src), + (rounds, keys, T, length, dst, src)) ++ ++#if GCM_TABLE_BITS == 8 ++DEFINE_FAT_FUNC(_nettle_gcm_init_key, void, ++ (union nettle_block16 *table), ++ (table)) ++ ++DEFINE_FAT_FUNC(_nettle_gcm_hash, void, ++ (const struct gcm_key *key, union nettle_block16 *x, ++ size_t length, const uint8_t *data), ++ (key, x, length, data)) ++#endif /* GCM_TABLE_BITS == 8 */ +diff -up ./fat-setup.h.ghash ./fat-setup.h +--- ./fat-setup.h.ghash 2018-12-04 21:56:06.000000000 +0100 ++++ ./fat-setup.h 2021-07-14 14:11:58.130891552 +0200 +@@ -159,6 +159,11 @@ typedef void aes_crypt_internal_func (un + size_t length, uint8_t *dst, + const uint8_t *src); + ++typedef void gcm_init_key_func (union nettle_block16 *table); ++ ++typedef void gcm_hash_func (const struct gcm_key *key, union nettle_block16 *x, ++ size_t length, const uint8_t *data); ++ + typedef void *(memxor_func)(void *dst, const void *src, size_t n); + + typedef void salsa20_core_func (uint32_t *dst, const uint32_t *src, unsigned rounds); +diff -up ./gcm.c.ghash ./gcm.c +--- ./gcm.c.ghash 2018-12-04 21:56:05.000000000 +0100 ++++ ./gcm.c 2021-07-14 14:11:58.131891547 +0200 +@@ -6,8 +6,9 @@ + See also the gcm paper at + http://www.cryptobarn.com/papers/gcm-spec.pdf. + +- Copyright (C) 2011, 2013 Niels Möller + Copyright (C) 2011 Katholieke Universiteit Leuven ++ Copyright (C) 2011, 2013, 2018 Niels Möller ++ Copyright (C) 2018 Red Hat, Inc. + + Contributed by Nikos Mavrogiannopoulos + +@@ -48,9 +49,11 @@ + + #include "gcm.h" + ++#include "gcm-internal.h" + #include "memxor.h" + #include "nettle-internal.h" + #include "macros.h" ++#include "ctr-internal.h" + + #define GHASH_POLYNOMIAL 0xE1UL + +@@ -112,7 +115,17 @@ gcm_gf_shift (union nettle_block16 *r, c + #endif /* ! WORDS_BIGENDIAN */ + } + +-#if GCM_TABLE_BITS == 0 ++#if GCM_TABLE_BITS != 8 ++/* The native implementations (currently ppc64 only) depend on the ++ GCM_TABLE_BITS == 8 layout */ ++#undef HAVE_NATIVE_gcm_hash ++#undef HAVE_NATIVE_gcm_init_key ++#undef HAVE_NATIVE_fat_gcm_hash ++#undef HAVE_NATIVE_fat_gcm_init_key ++#endif ++ ++#if !HAVE_NATIVE_gcm_hash ++# if GCM_TABLE_BITS == 0 + /* Sets x <- x * y mod r, using the plain bitwise algorithm from the + specification. y may be shorter than a full block, missing bytes + are assumed zero. */ +@@ -140,15 +153,15 @@ gcm_gf_mul (union nettle_block16 *x, con + } + memcpy (x->b, Z.b, sizeof(Z)); + } +-#else /* GCM_TABLE_BITS != 0 */ ++# else /* GCM_TABLE_BITS != 0 */ + +-# if WORDS_BIGENDIAN +-# define W(left,right) (0x##left##right) +-# else +-# define W(left,right) (0x##right##left) +-# endif ++# if WORDS_BIGENDIAN ++# define W(left,right) (0x##left##right) ++# else ++# define W(left,right) (0x##right##left) ++# endif + +-# if GCM_TABLE_BITS == 4 ++# if GCM_TABLE_BITS == 4 + static const uint16_t + shift_table[0x10] = { + W(00,00),W(1c,20),W(38,40),W(24,60),W(70,80),W(6c,a0),W(48,c0),W(54,e0), +@@ -177,26 +190,13 @@ gcm_gf_shift_4(union nettle_block16 *x) + # error Unsupported word size. */ + #endif + #else /* ! WORDS_BIGENDIAN */ +-# if SIZEOF_LONG == 4 +-#define RSHIFT_WORD(x) \ +- ((((x) & 0xf0f0f0f0UL) >> 4) \ +- | (((x) & 0x000f0f0f) << 12)) +- reduce = shift_table[(w[3] >> 24) & 0xf]; +- w[3] = RSHIFT_WORD(w[3]) | ((w[2] >> 20) & 0xf0); +- w[2] = RSHIFT_WORD(w[2]) | ((w[1] >> 20) & 0xf0); +- w[1] = RSHIFT_WORD(w[1]) | ((w[0] >> 20) & 0xf0); +- w[0] = RSHIFT_WORD(w[0]) ^ reduce; +-# elif SIZEOF_LONG == 8 +-#define RSHIFT_WORD(x) \ +- ((((x) & 0xf0f0f0f0f0f0f0f0UL) >> 4) \ +- | (((x) & 0x000f0f0f0f0f0f0fUL) << 12)) +- reduce = shift_table[(w[1] >> 56) & 0xf]; +- w[1] = RSHIFT_WORD(w[1]) | ((w[0] >> 52) & 0xf0); +- w[0] = RSHIFT_WORD(w[0]) ^ reduce; +-# else +-# error Unsupported word size. */ +-# endif +-# undef RSHIFT_WORD ++# define RSHIFT_WORD_4(x) \ ++ ((((x) & UINT64_C(0xf0f0f0f0f0f0f0f0)) >> 4) \ ++ | (((x) & UINT64_C(0x000f0f0f0f0f0f0f)) << 12)) ++ reduce = shift_table[(u64[1] >> 56) & 0xf]; ++ u64[1] = RSHIFT_WORD_4(u64[1]) | ((u64[0] >> 52) & 0xf0); ++ u64[0] = RSHIFT_WORD_4(u64[0]) ^ reduce; ++# undef RSHIFT_WORD_4 + #endif /* ! WORDS_BIGENDIAN */ + } + +@@ -219,10 +219,10 @@ gcm_gf_mul (union nettle_block16 *x, con + } + memcpy (x->b, Z.b, sizeof(Z)); + } +-# elif GCM_TABLE_BITS == 8 +-# if HAVE_NATIVE_gcm_hash8 ++# elif GCM_TABLE_BITS == 8 ++# if HAVE_NATIVE_gcm_hash8 + +-#define gcm_hash _nettle_gcm_hash8 ++#define _nettle_gcm_hash _nettle_gcm_hash8 + void + _nettle_gcm_hash8 (const struct gcm_key *key, union nettle_block16 *x, + size_t length, const uint8_t *data); +@@ -317,18 +317,46 @@ gcm_gf_mul (union nettle_block16 *x, con + gcm_gf_shift_8(&Z); + gcm_gf_add(x, &Z, &table[x->b[0]]); + } +-# endif /* ! HAVE_NATIVE_gcm_hash8 */ +-# else /* GCM_TABLE_BITS != 8 */ +-# error Unsupported table size. +-# endif /* GCM_TABLE_BITS != 8 */ ++# endif /* ! HAVE_NATIVE_gcm_hash8 */ ++# else /* GCM_TABLE_BITS != 8 */ ++# error Unsupported table size. ++# endif /* GCM_TABLE_BITS != 8 */ ++ ++# undef W ++# endif /* GCM_TABLE_BITS != 0 */ ++#endif /* !HAVE_NATIVE_gcm_hash */ + +-#undef W +- +-#endif /* GCM_TABLE_BITS */ + + /* Increment the rightmost 32 bits. */ + #define INC32(block) INCREMENT(4, (block.b) + GCM_BLOCK_SIZE - 4) + ++#if !HAVE_NATIVE_gcm_init_key ++# if !HAVE_NATIVE_fat_gcm_hash ++# define _nettle_gcm_init_key _nettle_gcm_init_key_c ++static ++# endif ++void ++_nettle_gcm_init_key_c(union nettle_block16 *table) ++{ ++#if GCM_TABLE_BITS ++ /* Middle element if GCM_TABLE_BITS > 0, otherwise the first ++ element */ ++ unsigned i = (1<h[0].b, 0, GCM_BLOCK_SIZE); + f (cipher, GCM_BLOCK_SIZE, key->h[i].b, key->h[0].b); +- +-#if GCM_TABLE_BITS +- /* Algorithm 3 from the gcm paper. First do powers of two, then do +- the rest by adding. */ +- while (i /= 2) +- gcm_gf_shift(&key->h[i], &key->h[2*i]); +- for (i = 2; i < 1<h[i+j], &key->h[i],&key->h[j]); +- } +-#endif ++ ++ _nettle_gcm_init_key(key->h); + } + +-#ifndef gcm_hash +-static void +-gcm_hash(const struct gcm_key *key, union nettle_block16 *x, +- size_t length, const uint8_t *data) ++#if !(HAVE_NATIVE_gcm_hash || HAVE_NATIVE_gcm_hash8) ++# if !HAVE_NATIVE_fat_gcm_hash ++# define _nettle_gcm_hash _nettle_gcm_hash_c ++static ++# endif ++void ++_nettle_gcm_hash_c(const struct gcm_key *key, union nettle_block16 *x, ++ size_t length, const uint8_t *data) + { + for (; length >= GCM_BLOCK_SIZE; + length -= GCM_BLOCK_SIZE, data += GCM_BLOCK_SIZE) +@@ -377,7 +398,7 @@ gcm_hash(const struct gcm_key *key, unio + gcm_gf_mul (x, key->h); + } + } +-#endif /* !gcm_hash */ ++#endif /* !(HAVE_NATIVE_gcm_hash || HAVE_NATIVE_gcm_hash8) */ + + static void + gcm_hash_sizes(const struct gcm_key *key, union nettle_block16 *x, +@@ -391,7 +412,7 @@ gcm_hash_sizes(const struct gcm_key *key + WRITE_UINT64 (buffer, auth_size); + WRITE_UINT64 (buffer + 8, data_size); + +- gcm_hash(key, x, GCM_BLOCK_SIZE, buffer); ++ _nettle_gcm_hash(key, x, GCM_BLOCK_SIZE, buffer); + } + + /* NOTE: The key is needed only if length != GCM_IV_SIZE */ +@@ -410,7 +431,7 @@ gcm_set_iv(struct gcm_ctx *ctx, const st + else + { + memset(ctx->iv.b, 0, GCM_BLOCK_SIZE); +- gcm_hash(key, &ctx->iv, length, iv); ++ _nettle_gcm_hash(key, &ctx->iv, length, iv); + gcm_hash_sizes(key, &ctx->iv, 0, length); + } + +@@ -429,47 +450,68 @@ gcm_update(struct gcm_ctx *ctx, const st + assert(ctx->auth_size % GCM_BLOCK_SIZE == 0); + assert(ctx->data_size == 0); + +- gcm_hash(key, &ctx->x, length, data); ++ _nettle_gcm_hash(key, &ctx->x, length, data); + + ctx->auth_size += length; + } + ++static nettle_fill16_func gcm_fill; ++#if WORDS_BIGENDIAN + static void +-gcm_crypt(struct gcm_ctx *ctx, const void *cipher, nettle_cipher_func *f, +- size_t length, uint8_t *dst, const uint8_t *src) ++gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) + { +- uint8_t buffer[GCM_BLOCK_SIZE]; ++ uint64_t hi, mid; ++ uint32_t lo; ++ size_t i; ++ hi = READ_UINT64(ctr); ++ mid = (uint64_t) READ_UINT32(ctr + 8) << 32; ++ lo = READ_UINT32(ctr + 12); + +- if (src != dst) ++ for (i = 0; i < blocks; i++) + { +- for (; length >= GCM_BLOCK_SIZE; +- (length -= GCM_BLOCK_SIZE, +- src += GCM_BLOCK_SIZE, dst += GCM_BLOCK_SIZE)) +- { +- f (cipher, GCM_BLOCK_SIZE, dst, ctx->ctr.b); +- memxor (dst, src, GCM_BLOCK_SIZE); +- INC32 (ctx->ctr); +- } ++ buffer[i].u64[0] = hi; ++ buffer[i].u64[1] = mid + lo++; + } +- else ++ WRITE_UINT32(ctr + 12, lo); ++ ++} ++#elif HAVE_BUILTIN_BSWAP64 ++/* Assume __builtin_bswap32 is also available */ ++static void ++gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) ++{ ++ uint64_t hi, mid; ++ uint32_t lo; ++ size_t i; ++ hi = LE_READ_UINT64(ctr); ++ mid = LE_READ_UINT32(ctr + 8); ++ lo = READ_UINT32(ctr + 12); ++ ++ for (i = 0; i < blocks; i++) + { +- for (; length >= GCM_BLOCK_SIZE; +- (length -= GCM_BLOCK_SIZE, +- src += GCM_BLOCK_SIZE, dst += GCM_BLOCK_SIZE)) +- { +- f (cipher, GCM_BLOCK_SIZE, buffer, ctx->ctr.b); +- memxor3 (dst, src, buffer, GCM_BLOCK_SIZE); +- INC32 (ctx->ctr); +- } ++ buffer[i].u64[0] = hi; ++ buffer[i].u64[1] = mid + ((uint64_t)__builtin_bswap32(lo) << 32); ++ lo++; + } +- if (length > 0) ++ WRITE_UINT32(ctr + 12, lo); ++} ++#else ++static void ++gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) ++{ ++ uint32_t c; ++ ++ c = READ_UINT32(ctr + GCM_BLOCK_SIZE - 4); ++ ++ for (; blocks-- > 0; buffer++, c++) + { +- /* A final partial block */ +- f (cipher, GCM_BLOCK_SIZE, buffer, ctx->ctr.b); +- memxor3 (dst, src, buffer, length); +- INC32 (ctx->ctr); ++ memcpy(buffer->b, ctr, GCM_BLOCK_SIZE - 4); ++ WRITE_UINT32(buffer->b + GCM_BLOCK_SIZE - 4, c); + } ++ ++ WRITE_UINT32(ctr + GCM_BLOCK_SIZE - 4, c); + } ++#endif + + void + gcm_encrypt (struct gcm_ctx *ctx, const struct gcm_key *key, +@@ -478,8 +520,8 @@ gcm_encrypt (struct gcm_ctx *ctx, const + { + assert(ctx->data_size % GCM_BLOCK_SIZE == 0); + +- gcm_crypt(ctx, cipher, f, length, dst, src); +- gcm_hash(key, &ctx->x, length, dst); ++ _ctr_crypt16(cipher, f, gcm_fill, ctx->ctr.b, length, dst, src); ++ _nettle_gcm_hash(key, &ctx->x, length, dst); + + ctx->data_size += length; + } +@@ -491,8 +533,8 @@ gcm_decrypt(struct gcm_ctx *ctx, const s + { + assert(ctx->data_size % GCM_BLOCK_SIZE == 0); + +- gcm_hash(key, &ctx->x, length, src); +- gcm_crypt(ctx, cipher, f, length, dst, src); ++ _nettle_gcm_hash(key, &ctx->x, length, src); ++ _ctr_crypt16(cipher, f, gcm_fill, ctx->ctr.b, length, dst, src); + + ctx->data_size += length; + } +diff -up ./gcm-internal.h.ghash ./gcm-internal.h +--- ./gcm-internal.h.ghash 2021-07-14 14:11:58.131891547 +0200 ++++ ./gcm-internal.h 2021-07-14 14:11:58.131891547 +0200 +@@ -0,0 +1,54 @@ ++/* gcm-internal.h ++ ++ Copyright (C) 2020 Niels Möller ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++*/ ++ ++#ifndef NETTLE_GCM_INTERNAL_H_INCLUDED ++#define NETTLE_GCM_INTERNAL_H_INCLUDED ++ ++/* Functions available only in some configurations */ ++void ++_nettle_gcm_init_key (union nettle_block16 *table); ++ ++void ++_nettle_gcm_hash(const struct gcm_key *key, union nettle_block16 *x, ++ size_t length, const uint8_t *data); ++ ++#if HAVE_NATIVE_fat_gcm_init_key ++void ++_nettle_gcm_init_key_c (union nettle_block16 *table); ++#endif ++ ++#if HAVE_NATIVE_fat_gcm_hash ++void ++_nettle_gcm_hash_c (const struct gcm_key *key, union nettle_block16 *x, ++ size_t length, const uint8_t *data); ++#endif ++ ++#endif /* NETTLE_GCM_INTERNAL_H_INCLUDED */ +diff -up ./Makefile.in.ghash ./Makefile.in +--- ./Makefile.in.ghash 2021-07-14 14:11:58.124891582 +0200 ++++ ./Makefile.in 2021-07-14 14:11:58.131891547 +0200 +@@ -96,7 +96,7 @@ nettle_SOURCES = aes-decrypt-internal.c + chacha-crypt.c chacha-core-internal.c \ + chacha-poly1305.c chacha-poly1305-meta.c \ + chacha-set-key.c chacha-set-nonce.c \ +- ctr.c des.c des3.c des-compat.c \ ++ ctr.c ctr16.c des.c des3.c des-compat.c \ + eax.c eax-aes128.c eax-aes128-meta.c \ + gcm.c gcm-aes.c \ + gcm-aes128.c gcm-aes128-meta.c \ +@@ -233,6 +233,8 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt + cast128_sboxes.h desinfo.h desCode.h \ + memxor-internal.h nettle-internal.h nettle-write.h \ + rsa-internal.h \ ++ ctr-internal.h \ ++ gcm-internal.h \ + gmp-glue.h ecc-internal.h fat-setup.h \ + mini-gmp.h asm.m4 \ + nettle.texinfo nettle.info nettle.html nettle.pdf sha-example.c +diff -up ./nettle-types.h.ghash ./nettle-types.h +--- ./nettle-types.h.ghash 2018-12-04 21:56:06.000000000 +0100 ++++ ./nettle-types.h 2021-07-14 14:11:58.131891547 +0200 +@@ -48,6 +48,7 @@ union nettle_block16 + { + uint8_t b[16]; + unsigned long w[16 / sizeof(unsigned long)]; ++ uint64_t u64[2]; + }; + + /* Randomness. Used by key generation and dsa signature creation. */ +diff -up ./powerpc64/fat/gcm-hash.asm.ghash ./powerpc64/fat/gcm-hash.asm +--- ./powerpc64/fat/gcm-hash.asm.ghash 2021-07-14 14:11:58.131891547 +0200 ++++ ./powerpc64/fat/gcm-hash.asm 2021-07-14 14:11:58.131891547 +0200 +@@ -0,0 +1,39 @@ ++C powerpc64/fat/gcm-hash.asm ++ ++ ++ifelse(< ++ Copyright (C) 2020 Mamone Tarsha ++ ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++>) ++ ++dnl picked up by configure ++dnl PROLOGUE(_nettle_fat_gcm_init_key) ++dnl PROLOGUE(_nettle_fat_gcm_hash) ++ ++define(, <$1_ppc64>) ++include_src() +diff -up ./powerpc64/p8/gcm-hash.asm.ghash ./powerpc64/p8/gcm-hash.asm +--- ./powerpc64/p8/gcm-hash.asm.ghash 2021-07-14 14:11:58.131891547 +0200 ++++ ./powerpc64/p8/gcm-hash.asm 2021-07-14 14:11:58.131891547 +0200 +@@ -0,0 +1,499 @@ ++C powerpc64/p8/gcm-hash.asm ++ ++ifelse(< ++ Copyright (C) 2020 Niels Möller and Mamone Tarsha ++ This file is part of GNU Nettle. ++ ++ GNU Nettle is free software: you can redistribute it and/or ++ modify it under the terms of either: ++ ++ * the GNU Lesser General Public License as published by the Free ++ Software Foundation; either version 3 of the License, or (at your ++ option) any later version. ++ ++ or ++ ++ * the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your ++ option) any later version. ++ ++ or both in parallel, as here. ++ ++ GNU Nettle is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received copies of the GNU General Public License and ++ the GNU Lesser General Public License along with this program. If ++ not, see http://www.gnu.org/licenses/. ++>) ++ ++C gcm_set_key() assigns H value in the middle element of the table ++define(, <128>) ++ ++C Register usage: ++ ++define(, <1>) ++define(, <2>) ++ ++define(, <3>) ++ ++define(, <0>) ++define(, <1>) ++define(, <16>) ++define(, <17>) ++define(, <1>) ++ ++define(, <2>) ++define(

, <3>) ++define(

, <4>) ++define(

, <5>) ++define(, <6>) ++define(, <7>) ++define(, <8>) ++define(, <9>) ++define(, <10>) ++define(, <11>) ++define(, <12>) ++define(, <13>) ++define(, <14>) ++define(, <15>) ++define(, <13>) ++define(, <14>) ++define(, <15>) ++define(, <16>) ++define(, <17>) ++define(, <18>) ++ ++define(, <18>) ++define(, <19>) ++ ++.file "gcm-hash.asm" ++ ++.text ++ ++ C void gcm_init_key (union gcm_block *table) ++ ++C This function populates the gcm table as the following layout ++C ******************************************************************************* ++C | H1M = (H1 div x⁶⁴)||((H1 mod x⁶⁴) × (x⁶⁴+x⁶³+x⁶²+x⁵⁷)) div x⁶⁴ | ++C | H1L = (H1 mod x⁶⁴)||(((H1 mod x⁶⁴) × (x⁶³+x⁶²+x⁵⁷)) mod x⁶⁴) + (H1 div x⁶⁴) | ++C | | ++C | H2M = (H2 div x⁶⁴)||((H2 mod x⁶⁴) × (x⁶⁴+x⁶³+x⁶²+x⁵⁷)) div x⁶⁴ | ++C | H2L = (H2 mod x⁶⁴)||(((H2 mod x⁶⁴) × (x⁶³+x⁶²+x⁵⁷)) mod x⁶⁴) + (H2 div x⁶⁴) | ++C | | ++C | H3M = (H3 div x⁶⁴)||((H3 mod x⁶⁴) × (x⁶⁴+x⁶³+x⁶²+x⁵⁷)) div x⁶⁴ | ++C | H3L = (H3 mod x⁶⁴)||(((H3 mod x⁶⁴) × (x⁶³+x⁶²+x⁵⁷)) mod x⁶⁴) + (H3 div x⁶⁴) | ++C | | ++C | H4M = (H3 div x⁶⁴)||((H4 mod x⁶⁴) × (x⁶⁴+x⁶³+x⁶²+x⁵⁷)) div x⁶⁴ | ++C | H4L = (H3 mod x⁶⁴)||(((H4 mod x⁶⁴) × (x⁶³+x⁶²+x⁵⁷)) mod x⁶⁴) + (H4 div x⁶⁴) | ++C ******************************************************************************* ++ ++define(, <5>) ++PROLOGUE(_nettle_gcm_init_key) ++ DATA_LOAD_VEC(POLY,.polynomial,7) C 0xC2000000000000000000000000000001 ++IF_LE(< ++ li 8,0 ++ lvsl LE_MASK,0,8 C 0x000102030405060708090A0B0C0D0E0F ++ vspltisb LE_TEMP,0x07 C 0x07070707070707070707070707070707 ++ vxor LE_MASK,LE_MASK,LE_TEMP C 0x07060504030201000F0E0D0C0B0A0908 ++>) ++ ++ C 'H' is assigned by gcm_set_key() to the middle element of the table ++ li 10,H_Idx*16 ++ lxvd2x VSR(H),10,TABLE C load 'H' ++ C byte-reverse of each doubleword permuting on little-endian mode ++IF_LE(< ++ vperm H,H,H,LE_MASK ++>) ++ ++ C --- calculate H = H << 1 mod P(X), P(X) = (x¹²⁸+x¹²⁷+x¹²⁶+x¹²¹+1) --- ++ ++ vupkhsb EMSB,H C extend most significant bit to first byte ++ vspltisb B1,1 C 0x01010101010101010101010101010101 ++ vspltb EMSB,EMSB,0 C first byte quadword-extend ++ vsl H,H,B1 C H = H << 1 ++ vand EMSB,EMSB,POLY C EMSB &= 0xC2000000000000000000000000000001 ++ vxor ZERO,ZERO,ZERO C 0x00000000000000000000000000000000 ++ vxor H,H,EMSB C H ^= EMSB ++ ++ C --- calculate H^2 = H*H --- ++ ++ xxmrghd VSR(POLY_L),VSR(ZERO),VSR(POLY) C 0x0000000000000000C200000000000000 ++ ++ C --- Hp = (H mod x⁶⁴) / x⁶⁴ mod P(X) --- ++ C --- Hp = (H mod x⁶⁴) × (x⁶⁴+x⁶³+x⁶²+x⁵⁷) mod P(X), deg(Hp) ≤ 127 --- ++ C --- Hp = (H mod x⁶⁴) × (x⁶⁴+x⁶³+x⁶²+x⁵⁷) --- ++ vpmsumd Hp,H,POLY_L C Hp = (H mod x⁶⁴) × (x⁶³+x⁶²+x⁵⁷) ++ xxswapd VSR(Hm),VSR(H) ++ xxmrgld VSR(Hl),VSR(H),VSR(ZERO) C Hl = (H mod x⁶⁴) × x⁶⁴ ++ vxor Hm,Hm,Hp C Hm = Hm + Hp ++ vxor Hl,Hl,Hp C Hl = Hl + Hp ++ xxmrgld VSR(H1L),VSR(H),VSR(Hm) C H1L = (H mod x⁶⁴)||(Hl mod x⁶⁴) ++ xxmrghd VSR(H1M),VSR(H),VSR(Hl) C H1M = (H div x⁶⁴)||(Hl div x⁶⁴) ++ ++ vpmsumd F,H1L,H C F = (H1Lh × Hh) + (H1Ll × Hl) ++ vpmsumd R,H1M,H C R = (H1Mh × Hh) + (H1Ml × Hl) ++ ++ C --- rduction --- ++ vpmsumd T,F,POLY_L C T = (F mod x⁶⁴) × (x⁶³+x⁶²+x⁵⁷) ++ xxswapd VSR(H2),VSR(F) ++ vxor R,R,T C R = R + T ++ vxor H2,R,H2 ++ ++ xxmrgld VSR(Hl),VSR(H2),VSR(ZERO) ++ xxswapd VSR(Hm),VSR(H2) ++ vpmsumd Hp,H2,POLY_L ++ vxor Hl,Hl,Hp ++ vxor Hm,Hm,Hp ++ xxmrghd VSR(H2M),VSR(H2),VSR(Hl) ++ xxmrgld VSR(H2L),VSR(H2),VSR(Hm) ++ ++ C store H1M, H1L, H2M, H2L ++ li 8,1*16 ++ li 9,2*16 ++ li 10,3*16 ++ stxvd2x VSR(H1M),0,TABLE ++ stxvd2x VSR(H1L),8,TABLE ++ stxvd2x VSR(H2M),9,TABLE ++ stxvd2x VSR(H2L),10,TABLE ++ ++ C --- calculate H^3 = H^1*H^2, H^4 = H^2*H^2 --- ++ ++ vpmsumd F,H1L,H2 ++ vpmsumd F2,H2L,H2 ++ vpmsumd R,H1M,H2 ++ vpmsumd R2,H2M,H2 ++ ++ vpmsumd T,F,POLY_L ++ vpmsumd T2,F2,POLY_L ++ xxswapd VSR(H3),VSR(F) ++ xxswapd VSR(H4),VSR(F2) ++ vxor R,R,T ++ vxor R2,R2,T2 ++ vxor H3,R,H3 ++ vxor H4,R2,H4 ++ ++ xxmrgld VSR(Hl),VSR(H3),VSR(ZERO) ++ xxmrgld VSR(Hl2),VSR(H4),VSR(ZERO) ++ xxswapd VSR(Hm),VSR(H3) ++ xxswapd VSR(Hm2),VSR(H4) ++ vpmsumd Hp,H3,POLY_L ++ vpmsumd Hp2,H4,POLY_L ++ vxor Hl,Hl,Hp ++ vxor Hl2,Hl2,Hp2 ++ vxor Hm,Hm,Hp ++ vxor Hm2,Hm2,Hp2 ++ xxmrghd VSR(H1M),VSR(H3),VSR(Hl) ++ xxmrghd VSR(H2M),VSR(H4),VSR(Hl2) ++ xxmrgld VSR(H1L),VSR(H3),VSR(Hm) ++ xxmrgld VSR(H2L),VSR(H4),VSR(Hm2) ++ ++ C store H3M, H3L, H4M, H4L ++ li 7,4*16 ++ li 8,5*16 ++ li 9,6*16 ++ li 10,7*16 ++ stxvd2x VSR(H1M),7,TABLE ++ stxvd2x VSR(H1L),8,TABLE ++ stxvd2x VSR(H2M),9,TABLE ++ stxvd2x VSR(H2L),10,TABLE ++ ++ blr ++EPILOGUE(_nettle_gcm_init_key) ++ ++define(

, <3>) ++define(, <4>) ++define(, <5>) ++define(, <6>) ++ ++define(, <16>) ++define(, <17>) ++define(, <0>) ++ ++define(, <1>) ++define(, <2>) ++define(, <3>) ++define(, <4>) ++define(, <5>) ++define(, <6>) ++define(, <7>) ++define(, <8>) ++define(, <9>) ++define(, <10>) ++define(, <11>) ++define(, <12>) ++define(, <13>) ++define(, <14>) ++define(, <15>) ++define(, <16>) ++define(, <17>) ++define(, <18>) ++define(, <20>) ++define(, <21>) ++define(, <22>) ++define(, <23>) ++ ++define(, <18>) ++define(, <19>) ++ ++ C void gcm_hash (const struct gcm_key *key, union gcm_block *x, ++ C size_t length, const uint8_t *data) ++ ++define(, <5>) ++PROLOGUE(_nettle_gcm_hash) ++ vxor ZERO,ZERO,ZERO ++ DATA_LOAD_VEC(POLY,.polynomial,7) ++IF_LE(< ++ li 8,0 ++ lvsl LE_MASK,0,8 ++ vspltisb LE_TEMP,0x07 ++ vxor LE_MASK,LE_MASK,LE_TEMP ++>) ++ xxmrghd VSR(POLY_L),VSR(ZERO),VSR(POLY) ++ ++ lxvd2x VSR(D),0,X C load 'X' pointer ++ C byte-reverse of each doubleword permuting on little-endian mode ++IF_LE(< ++ vperm D,D,D,LE_MASK ++>) ++ ++ C --- process 4 blocks '128-bit each' per one loop --- ++ ++ srdi. 7,LENGTH,6 C 4-blocks loop count 'LENGTH / (4 * 16)' ++ beq L2x ++ ++ mtctr 7 C assign counter register to loop count ++ ++ C store non-volatile vector registers ++ addi 8,SP,-64 ++ stvx 20,0,8 ++ addi 8,8,16 ++ stvx 21,0,8 ++ addi 8,8,16 ++ stvx 22,0,8 ++ addi 8,8,16 ++ stvx 23,0,8 ++ ++ C load table elements ++ li 8,1*16 ++ li 9,2*16 ++ li 10,3*16 ++ lxvd2x VSR(H1M),0,TABLE ++ lxvd2x VSR(H1L),8,TABLE ++ lxvd2x VSR(H2M),9,TABLE ++ lxvd2x VSR(H2L),10,TABLE ++ li 7,4*16 ++ li 8,5*16 ++ li 9,6*16 ++ li 10,7*16 ++ lxvd2x VSR(H3M),7,TABLE ++ lxvd2x VSR(H3L),8,TABLE ++ lxvd2x VSR(H4M),9,TABLE ++ lxvd2x VSR(H4L),10,TABLE ++ ++ li 8,0x10 ++ li 9,0x20 ++ li 10,0x30 ++.align 5 ++L4x_loop: ++ C input loading ++ lxvd2x VSR(C0),0,DATA C load C0 ++ lxvd2x VSR(C1),8,DATA C load C1 ++ lxvd2x VSR(C2),9,DATA C load C2 ++ lxvd2x VSR(C3),10,DATA C load C3 ++ ++IF_LE(< ++ vperm C0,C0,C0,LE_MASK ++ vperm C1,C1,C1,LE_MASK ++ vperm C2,C2,C2,LE_MASK ++ vperm C3,C3,C3,LE_MASK ++>) ++ ++ C previous digest combining ++ vxor C0,C0,D ++ ++ C polynomial multiplication ++ vpmsumd F2,H3L,C1 ++ vpmsumd R2,H3M,C1 ++ vpmsumd F3,H2L,C2 ++ vpmsumd R3,H2M,C2 ++ vpmsumd F4,H1L,C3 ++ vpmsumd R4,H1M,C3 ++ vpmsumd F,H4L,C0 ++ vpmsumd R,H4M,C0 ++ ++ C deferred recombination of partial products ++ vxor F3,F3,F4 ++ vxor R3,R3,R4 ++ vxor F,F,F2 ++ vxor R,R,R2 ++ vxor F,F,F3 ++ vxor R,R,R3 ++ ++ C reduction ++ vpmsumd T,F,POLY_L ++ xxswapd VSR(D),VSR(F) ++ vxor R,R,T ++ vxor D,R,D ++ ++ addi DATA,DATA,0x40 ++ bdnz L4x_loop ++ ++ C restore non-volatile vector registers ++ addi 8,SP,-64 ++ lvx 20,0,8 ++ addi 8,8,16 ++ lvx 21,0,8 ++ addi 8,8,16 ++ lvx 22,0,8 ++ addi 8,8,16 ++ lvx 23,0,8 ++ ++ clrldi LENGTH,LENGTH,58 C 'set the high-order 58 bits to zeros' ++L2x: ++ C --- process 2 blocks --- ++ ++ srdi. 7,LENGTH,5 C 'LENGTH / (2 * 16)' ++ beq L1x ++ ++ C load table elements ++ li 8,1*16 ++ li 9,2*16 ++ li 10,3*16 ++ lxvd2x VSR(H1M),0,TABLE ++ lxvd2x VSR(H1L),8,TABLE ++ lxvd2x VSR(H2M),9,TABLE ++ lxvd2x VSR(H2L),10,TABLE ++ ++ C input loading ++ li 10,0x10 ++ lxvd2x VSR(C0),0,DATA C load C0 ++ lxvd2x VSR(C1),10,DATA C load C1 ++ ++IF_LE(< ++ vperm C0,C0,C0,LE_MASK ++ vperm C1,C1,C1,LE_MASK ++>) ++ ++ C previous digest combining ++ vxor C0,C0,D ++ ++ C polynomial multiplication ++ vpmsumd F2,H1L,C1 ++ vpmsumd R2,H1M,C1 ++ vpmsumd F,H2L,C0 ++ vpmsumd R,H2M,C0 ++ ++ C deferred recombination of partial products ++ vxor F,F,F2 ++ vxor R,R,R2 ++ ++ C reduction ++ vpmsumd T,F,POLY_L ++ xxswapd VSR(D),VSR(F) ++ vxor R,R,T ++ vxor D,R,D ++ ++ addi DATA,DATA,0x20 ++ clrldi LENGTH,LENGTH,59 C 'set the high-order 59 bits to zeros' ++L1x: ++ C --- process 1 block --- ++ ++ srdi. 7,LENGTH,4 C 'LENGTH / (1 * 16)' ++ beq Lmod ++ ++ C load table elements ++ li 8,1*16 ++ lxvd2x VSR(H1M),0,TABLE ++ lxvd2x VSR(H1L),8,TABLE ++ ++ C input loading ++ lxvd2x VSR(C0),0,DATA C load C0 ++ ++IF_LE(< ++ vperm C0,C0,C0,LE_MASK ++>) ++ ++ C previous digest combining ++ vxor C0,C0,D ++ ++ C polynomial multiplication ++ vpmsumd F,H1L,C0 ++ vpmsumd R,H1M,C0 ++ ++ C reduction ++ vpmsumd T,F,POLY_L ++ xxswapd VSR(D),VSR(F) ++ vxor R,R,T ++ vxor D,R,D ++ ++ addi DATA,DATA,0x10 ++ clrldi LENGTH,LENGTH,60 C 'set the high-order 60 bits to zeros' ++Lmod: ++ C --- process the modulo bytes, padding the low-order bytes with zeros --- ++ ++ cmpldi LENGTH,0 ++ beq Ldone ++ ++ C load table elements ++ li 8,1*16 ++ lxvd2x VSR(H1M),0,TABLE ++ lxvd2x VSR(H1L),8,TABLE ++ ++ C push every modulo byte to the stack and load them with padding into vector register ++ vxor ZERO,ZERO,ZERO ++ addi 8,SP,-16 ++ stvx ZERO,0,8 ++Lstb_loop: ++ subic. LENGTH,LENGTH,1 ++ lbzx 7,LENGTH,DATA ++ stbx 7,LENGTH,8 ++ bne Lstb_loop ++ lxvd2x VSR(C0),0,8 ++ ++IF_LE(< ++ vperm C0,C0,C0,LE_MASK ++>) ++ ++ C previous digest combining ++ vxor C0,C0,D ++ ++ C polynomial multiplication ++ vpmsumd F,H1L,C0 ++ vpmsumd R,H1M,C0 ++ ++ C reduction ++ vpmsumd T,F,POLY_L ++ xxswapd VSR(D),VSR(F) ++ vxor R,R,T ++ vxor D,R,D ++ ++Ldone: ++ C byte-reverse of each doubleword permuting on little-endian mode ++IF_LE(< ++ vperm D,D,D,LE_MASK ++>) ++ stxvd2x VSR(D),0,X C store digest 'D' ++ ++ blr ++EPILOGUE(_nettle_gcm_hash) ++ ++.data ++ C 0xC2000000000000000000000000000001 ++.polynomial: ++.align 4 ++IF_BE(< ++.byte 0xC2 ++.rept 14 ++.byte 0x00 ++.endr ++.byte 0x01 ++>,< ++.byte 0x01 ++.rept 14 ++.byte 0x00 ++.endr ++.byte 0xC2 ++>) diff --git a/SOURCES/nettle-3.4.1-rsa-decrypt.patch b/SOURCES/nettle-3.4.1-rsa-decrypt.patch new file mode 100644 index 0000000..ecfba91 --- /dev/null +++ b/SOURCES/nettle-3.4.1-rsa-decrypt.patch @@ -0,0 +1,609 @@ +From 5646ca77ee92de0ae33e7d2e0a3383c61a4091ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Niels=20M=C3=B6ller?= +Date: Thu, 6 May 2021 21:30:23 +0200 +Subject: [PATCH 1/4] Add check that message length to _pkcs1_sec_decrypt is + valid. + +* pkcs1-sec-decrypt.c (_pkcs1_sec_decrypt): Check that message +length is valid, for given key size. +* testsuite/rsa-sec-decrypt-test.c (test_main): Add test cases for +calls to rsa_sec_decrypt specifying a too large message length. + +(cherry picked from commit 7616541e6eff73353bf682c62e3a68e4fe696707) +--- + ChangeLog | 8 ++++++++ + pkcs1-sec-decrypt.c | 4 +++- + testsuite/rsa-sec-decrypt-test.c | 17 ++++++++++++++++- + 3 files changed, 27 insertions(+), 2 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 4c7338a1..7cd0455e 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,11 @@ ++2021-05-06 Niels Möller ++ ++ Bug fixes merged from from 3.7.3 release (starting from 2021-05-06). ++ * pkcs1-sec-decrypt.c (_pkcs1_sec_decrypt): Check that message ++ length is valid, for given key size. ++ * testsuite/rsa-sec-decrypt-test.c (test_main): Add test cases for ++ calls to rsa_sec_decrypt specifying a too large message length. ++ + 2018-12-04 Niels Möller + + * Released nettle-3.4.1. +diff --git a/pkcs1-sec-decrypt.c b/pkcs1-sec-decrypt.c +index 722044b0..02fd07e1 100644 +--- a/pkcs1-sec-decrypt.c ++++ b/pkcs1-sec-decrypt.c +@@ -64,7 +64,9 @@ _pkcs1_sec_decrypt (size_t length, uint8_t *message, + volatile int ok; + size_t i, t; + +- assert (padded_message_length >= length); ++ /* Message independent branch */ ++ if (length + 11 > padded_message_length) ++ return 0; + + t = padded_message_length - length - 1; + +diff --git a/testsuite/rsa-sec-decrypt-test.c b/testsuite/rsa-sec-decrypt-test.c +index 64f0b13c..4a9f301b 100644 +--- a/testsuite/rsa-sec-decrypt-test.c ++++ b/testsuite/rsa-sec-decrypt-test.c +@@ -55,6 +55,7 @@ rsa_decrypt_for_test(const struct rsa_public_key *pub, + #endif + + #define PAYLOAD_SIZE 50 ++#define DECRYPTED_SIZE 256 + void + test_main(void) + { +@@ -63,7 +64,7 @@ test_main(void) + struct knuth_lfib_ctx random_ctx; + + uint8_t plaintext[PAYLOAD_SIZE]; +- uint8_t decrypted[PAYLOAD_SIZE]; ++ uint8_t decrypted[DECRYPTED_SIZE]; + uint8_t verifybad[PAYLOAD_SIZE]; + unsigned n_size = 1024; + mpz_t gibberish; +@@ -98,6 +99,20 @@ test_main(void) + PAYLOAD_SIZE, decrypted, gibberish) == 1); + ASSERT (MEMEQ (PAYLOAD_SIZE, plaintext, decrypted)); + ++ ASSERT (pub.size > 10); ++ ASSERT (pub.size <= DECRYPTED_SIZE); ++ ++ /* Check that too large message length is rejected, largest ++ valid size is pub.size - 11. */ ++ ASSERT (!rsa_decrypt_for_test (&pub, &key, &random_ctx, ++ (nettle_random_func *) knuth_lfib_random, ++ pub.size - 10, decrypted, gibberish)); ++ ++ /* This case used to result in arithmetic underflow and a crash. */ ++ ASSERT (!rsa_decrypt_for_test (&pub, &key, &random_ctx, ++ (nettle_random_func *) knuth_lfib_random, ++ pub.size, decrypted, gibberish)); ++ + /* bad one */ + memcpy(decrypted, verifybad, PAYLOAD_SIZE); + nettle_mpz_random_size(garbage, &random_ctx, +-- +2.31.1 + + +From 743cdf38353f6dd5d3d91eadc769106cfc116301 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Niels=20M=C3=B6ller?= +Date: Tue, 8 Jun 2021 21:30:48 +0200 +Subject: [PATCH 2/4] Fix comment typos. + +(cherry picked from commit 0a714543136de97c7fd34f1c6ac1592dc5036879) +--- + pkcs1-sec-decrypt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/pkcs1-sec-decrypt.c b/pkcs1-sec-decrypt.c +index 02fd07e1..a7f85c2e 100644 +--- a/pkcs1-sec-decrypt.c ++++ b/pkcs1-sec-decrypt.c +@@ -102,8 +102,8 @@ _pkcs1_sec_decrypt_variable(size_t *length, uint8_t *message, + + /* length is discovered in a side-channel silent way. + * not_found goes to 0 when the terminator is found. +- * offset strts at 3 as it includes the terminator and +- * the fomat bytes already */ ++ * offset starts at 3 as it includes the terminator and ++ * the format bytes already */ + offset = 3; + for (i = 2; i < padded_message_length; i++) + { +-- +2.31.1 + + +From dfce46c4540d2abf040073070cff15f9d1708050 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Niels=20M=C3=B6ller?= +Date: Tue, 8 Jun 2021 21:31:39 +0200 +Subject: [PATCH 3/4] Change _rsa_sec_compute_root_tr to take a fix input size. + +Improves consistency with _rsa_sec_compute_root, and fixes zero-input bug. + +(cherry picked from commit 485b5e2820a057e873b1ba812fdb39cae4adf98c) +--- + ChangeLog | 17 +++++++++- + rsa-decrypt-tr.c | 7 ++--- + rsa-internal.h | 4 +-- + rsa-sec-decrypt.c | 9 ++++-- + rsa-sign-tr.c | 61 +++++++++++++++++------------------- + testsuite/rsa-encrypt-test.c | 14 ++++++++- + 6 files changed, 69 insertions(+), 43 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 7cd0455e..ae660fc0 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,6 +1,21 @@ +-2021-05-06 Niels Möller ++2021-05-14 Niels Möller + + Bug fixes merged from from 3.7.3 release (starting from 2021-05-06). ++ * rsa-sign-tr.c (rsa_sec_blind): Delete mn argument. ++ (_rsa_sec_compute_root_tr): Delete mn argument, instead require ++ that input size matches key size. Rearrange use of temporary ++ storage, to support in-place operation, x == m. Update all ++ callers. ++ ++ * rsa-decrypt-tr.c (rsa_decrypt_tr): Make zero-padded copy of ++ input, for calling _rsa_sec_compute_root_tr. ++ * rsa-sec-decrypt.c (rsa_sec_decrypt): Likewise. ++ ++ * testsuite/rsa-encrypt-test.c (test_main): Test calling all of ++ rsa_decrypt, rsa_decrypt_tr, and rsa_sec_decrypt with zero input. ++ ++2021-05-06 Niels Möller ++ + * pkcs1-sec-decrypt.c (_pkcs1_sec_decrypt): Check that message + length is valid, for given key size. + * testsuite/rsa-sec-decrypt-test.c (test_main): Add test cases for +diff --git a/rsa-decrypt-tr.c b/rsa-decrypt-tr.c +index 5dfb91b1..c118e852 100644 +--- a/rsa-decrypt-tr.c ++++ b/rsa-decrypt-tr.c +@@ -52,14 +52,13 @@ rsa_decrypt_tr(const struct rsa_public_key *pub, + mp_size_t key_limb_size; + int res; + +- key_limb_size = NETTLE_OCTET_SIZE_TO_LIMB_SIZE(key->size); ++ key_limb_size = mpz_size(pub->n); + + TMP_GMP_ALLOC (m, key_limb_size); + TMP_GMP_ALLOC (em, key->size); ++ mpz_limbs_copy(m, gibberish, key_limb_size); + +- res = _rsa_sec_compute_root_tr (pub, key, random_ctx, random, m, +- mpz_limbs_read(gibberish), +- mpz_size(gibberish)); ++ res = _rsa_sec_compute_root_tr (pub, key, random_ctx, random, m, m); + + mpn_get_base256 (em, key->size, m, key_limb_size); + +diff --git a/rsa-internal.h b/rsa-internal.h +index bd667bc2..64a7edf6 100644 +--- a/rsa-internal.h ++++ b/rsa-internal.h +@@ -53,12 +53,12 @@ _rsa_sec_compute_root(const struct rsa_private_key *key, + mp_limb_t *scratch); + + /* Safe side-channel silent variant, using RSA blinding, and checking the +- * result after CRT. */ ++ * result after CRT. In-place calls, with x == m, is allowed. */ + int + _rsa_sec_compute_root_tr(const struct rsa_public_key *pub, + const struct rsa_private_key *key, + void *random_ctx, nettle_random_func *random, +- mp_limb_t *x, const mp_limb_t *m, size_t mn); ++ mp_limb_t *x, const mp_limb_t *m); + + /* additional resistance to memory access side-channel attacks. + * Note: message buffer is returned unchanged on error */ +diff --git a/rsa-sec-decrypt.c b/rsa-sec-decrypt.c +index e6a4b267..633a6852 100644 +--- a/rsa-sec-decrypt.c ++++ b/rsa-sec-decrypt.c +@@ -57,9 +57,12 @@ rsa_sec_decrypt(const struct rsa_public_key *pub, + TMP_GMP_ALLOC (m, mpz_size(pub->n)); + TMP_GMP_ALLOC (em, key->size); + +- res = _rsa_sec_compute_root_tr (pub, key, random_ctx, random, m, +- mpz_limbs_read(gibberish), +- mpz_size(gibberish)); ++ /* We need a copy because m can be shorter than key_size, ++ * but _rsa_sec_compute_root_tr expect all inputs to be ++ * normalized to a key_size long buffer length */ ++ mpz_limbs_copy(m, gibberish, mpz_size(pub->n)); ++ ++ res = _rsa_sec_compute_root_tr (pub, key, random_ctx, random, m, m); + + mpn_get_base256 (em, key->size, m, mpz_size(pub->n)); + +diff --git a/rsa-sign-tr.c b/rsa-sign-tr.c +index 59c9bd07..141a52c7 100644 +--- a/rsa-sign-tr.c ++++ b/rsa-sign-tr.c +@@ -131,35 +131,34 @@ int + _rsa_sec_compute_root_tr(const struct rsa_public_key *pub, + const struct rsa_private_key *key, + void *random_ctx, nettle_random_func *random, +- mp_limb_t *x, const mp_limb_t *m, size_t mn) ++ mp_limb_t *x, const mp_limb_t *m) + { ++ mp_size_t nn; + mpz_t mz; + mpz_t xz; + int res; + +- mpz_init(mz); + mpz_init(xz); + +- mpn_copyi(mpz_limbs_write(mz, mn), m, mn); +- mpz_limbs_finish(mz, mn); ++ nn = mpz_size (pub->n); + +- res = rsa_compute_root_tr(pub, key, random_ctx, random, xz, mz); ++ res = rsa_compute_root_tr(pub, key, random_ctx, random, xz, ++ mpz_roinit_n(mz, m, nn)); + + if (res) +- mpz_limbs_copy(x, xz, mpz_size(pub->n)); ++ mpz_limbs_copy(x, xz, nn); + +- mpz_clear(mz); + mpz_clear(xz); + return res; + } + #else + /* Blinds m, by computing c = m r^e (mod n), for a random r. Also +- returns the inverse (ri), for use by rsa_unblind. */ ++ returns the inverse (ri), for use by rsa_unblind. Must have c != m, ++ no in-place operation.*/ + static void + rsa_sec_blind (const struct rsa_public_key *pub, + void *random_ctx, nettle_random_func *random, +- mp_limb_t *c, mp_limb_t *ri, const mp_limb_t *m, +- mp_size_t mn) ++ mp_limb_t *c, mp_limb_t *ri, const mp_limb_t *m) + { + const mp_limb_t *ep = mpz_limbs_read (pub->e); + const mp_limb_t *np = mpz_limbs_read (pub->n); +@@ -177,15 +176,15 @@ rsa_sec_blind (const struct rsa_public_key *pub, + + /* c = m*(r^e) mod n */ + itch = mpn_sec_powm_itch(nn, ebn, nn); +- i2 = mpn_sec_mul_itch(nn, mn); ++ i2 = mpn_sec_mul_itch(nn, nn); + itch = MAX(itch, i2); +- i2 = mpn_sec_div_r_itch(nn + mn, nn); ++ i2 = mpn_sec_div_r_itch(2*nn, nn); + itch = MAX(itch, i2); + i2 = mpn_sec_invert_itch(nn); + itch = MAX(itch, i2); + +- TMP_GMP_ALLOC (tp, nn + mn + itch); +- scratch = tp + nn + mn; ++ TMP_GMP_ALLOC (tp, 2*nn + itch); ++ scratch = tp + 2*nn; + + /* ri = r^(-1) */ + do +@@ -198,9 +197,8 @@ rsa_sec_blind (const struct rsa_public_key *pub, + while (!mpn_sec_invert (ri, tp, np, nn, 2 * nn * GMP_NUMB_BITS, scratch)); + + mpn_sec_powm (c, rp, nn, ep, ebn, np, nn, scratch); +- /* normally mn == nn, but m can be smaller in some cases */ +- mpn_sec_mul (tp, c, nn, m, mn, scratch); +- mpn_sec_div_r (tp, nn + mn, np, nn, scratch); ++ mpn_sec_mul (tp, c, nn, m, nn, scratch); ++ mpn_sec_div_r (tp, 2*nn, np, nn, scratch); + mpn_copyi(c, tp, nn); + + TMP_GMP_FREE (r); +@@ -208,7 +206,7 @@ rsa_sec_blind (const struct rsa_public_key *pub, + TMP_GMP_FREE (tp); + } + +-/* m = c ri mod n */ ++/* m = c ri mod n. Allows x == c. */ + static void + rsa_sec_unblind (const struct rsa_public_key *pub, + mp_limb_t *x, mp_limb_t *ri, const mp_limb_t *c) +@@ -298,7 +296,7 @@ int + _rsa_sec_compute_root_tr(const struct rsa_public_key *pub, + const struct rsa_private_key *key, + void *random_ctx, nettle_random_func *random, +- mp_limb_t *x, const mp_limb_t *m, size_t mn) ++ mp_limb_t *x, const mp_limb_t *m) + { + TMP_GMP_DECL (c, mp_limb_t); + TMP_GMP_DECL (ri, mp_limb_t); +@@ -306,7 +304,7 @@ _rsa_sec_compute_root_tr(const struct rsa_public_key *pub, + size_t key_limb_size; + int ret; + +- key_limb_size = NETTLE_OCTET_SIZE_TO_LIMB_SIZE(key->size); ++ key_limb_size = mpz_size(pub->n); + + /* mpz_powm_sec handles only odd moduli. If p, q or n is even, the + key is invalid and rejected by rsa_private_key_prepare. However, +@@ -320,19 +318,18 @@ _rsa_sec_compute_root_tr(const struct rsa_public_key *pub, + } + + assert(mpz_size(pub->n) == key_limb_size); +- assert(mn <= key_limb_size); + + TMP_GMP_ALLOC (c, key_limb_size); + TMP_GMP_ALLOC (ri, key_limb_size); + TMP_GMP_ALLOC (scratch, _rsa_sec_compute_root_itch(key)); + +- rsa_sec_blind (pub, random_ctx, random, x, ri, m, mn); ++ rsa_sec_blind (pub, random_ctx, random, c, ri, m); + +- _rsa_sec_compute_root(key, c, x, scratch); ++ _rsa_sec_compute_root(key, x, c, scratch); + +- ret = rsa_sec_check_root(pub, c, x); ++ ret = rsa_sec_check_root(pub, x, c); + +- rsa_sec_unblind(pub, x, ri, c); ++ rsa_sec_unblind(pub, x, ri, x); + + cnd_mpn_zero(1 - ret, x, key_limb_size); + +@@ -356,17 +353,17 @@ rsa_compute_root_tr(const struct rsa_public_key *pub, + mpz_t x, const mpz_t m) + { + TMP_GMP_DECL (l, mp_limb_t); ++ mp_size_t nn = mpz_size(pub->n); + int res; + +- mp_size_t l_size = NETTLE_OCTET_SIZE_TO_LIMB_SIZE(key->size); +- TMP_GMP_ALLOC (l, l_size); ++ TMP_GMP_ALLOC (l, nn); ++ mpz_limbs_copy(l, m, nn); + +- res = _rsa_sec_compute_root_tr (pub, key, random_ctx, random, l, +- mpz_limbs_read(m), mpz_size(m)); ++ res = _rsa_sec_compute_root_tr (pub, key, random_ctx, random, l, l); + if (res) { +- mp_limb_t *xp = mpz_limbs_write (x, l_size); +- mpn_copyi (xp, l, l_size); +- mpz_limbs_finish (x, l_size); ++ mp_limb_t *xp = mpz_limbs_write (x, nn); ++ mpn_copyi (xp, l, nn); ++ mpz_limbs_finish (x, nn); + } + + TMP_GMP_FREE (l); +diff --git a/testsuite/rsa-encrypt-test.c b/testsuite/rsa-encrypt-test.c +index 87525f78..d3bc374b 100644 +--- a/testsuite/rsa-encrypt-test.c ++++ b/testsuite/rsa-encrypt-test.c +@@ -19,6 +19,7 @@ test_main(void) + uint8_t after; + + mpz_t gibberish; ++ mpz_t zero; + + rsa_private_key_init(&key); + rsa_public_key_init(&pub); +@@ -101,6 +102,17 @@ test_main(void) + ASSERT(decrypted[decrypted_length] == after); + ASSERT(decrypted[0] == 'A'); + ++ /* Test zero input. */ ++ mpz_init_set_ui (zero, 0); ++ decrypted_length = msg_length; ++ ASSERT(!rsa_decrypt(&key, &decrypted_length, decrypted, zero)); ++ ASSERT(!rsa_decrypt_tr(&pub, &key, ++ &lfib, (nettle_random_func *) knuth_lfib_random, ++ &decrypted_length, decrypted, zero)); ++ ASSERT(!rsa_sec_decrypt(&pub, &key, ++ &lfib, (nettle_random_func *) knuth_lfib_random, ++ decrypted_length, decrypted, zero)); ++ ASSERT(decrypted_length == msg_length); + + /* Test invalid key. */ + mpz_add_ui (key.q, key.q, 2); +@@ -112,6 +124,6 @@ test_main(void) + rsa_private_key_clear(&key); + rsa_public_key_clear(&pub); + mpz_clear(gibberish); ++ mpz_clear(zero); + free(decrypted); + } +- +-- +2.31.1 + + +From f601611b3c315aba373c0ab2ddf24772e88c1b3e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Niels=20M=C3=B6ller?= +Date: Tue, 8 Jun 2021 21:32:38 +0200 +Subject: [PATCH 4/4] Add input check to rsa_decrypt family of functions. + +(cherry picked from commit 0ad0b5df315665250dfdaa4a1e087f4799edaefe) +--- + ChangeLog | 10 +++++++++- + rsa-decrypt-tr.c | 4 ++++ + rsa-decrypt.c | 10 ++++++++++ + rsa-sec-decrypt.c | 4 ++++ + rsa.h | 5 +++-- + testsuite/rsa-encrypt-test.c | 38 ++++++++++++++++++++++++++++++------ + 6 files changed, 62 insertions(+), 9 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index ae660fc0..27f022db 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,6 +1,14 @@ +-2021-05-14 Niels Möller ++2021-05-17 Niels Möller + + Bug fixes merged from from 3.7.3 release (starting from 2021-05-06). ++ * rsa-decrypt-tr.c (rsa_decrypt_tr): Check up-front that input is ++ in range. ++ * rsa-sec-decrypt.c (rsa_sec_decrypt): Likewise. ++ * rsa-decrypt.c (rsa_decrypt): Likewise. ++ * testsuite/rsa-encrypt-test.c (test_main): Add tests with input > n. ++ ++2021-05-14 Niels Möller ++ + * rsa-sign-tr.c (rsa_sec_blind): Delete mn argument. + (_rsa_sec_compute_root_tr): Delete mn argument, instead require + that input size matches key size. Rearrange use of temporary +diff --git a/rsa-decrypt-tr.c b/rsa-decrypt-tr.c +index c118e852..1ba3d286 100644 +--- a/rsa-decrypt-tr.c ++++ b/rsa-decrypt-tr.c +@@ -52,6 +52,10 @@ rsa_decrypt_tr(const struct rsa_public_key *pub, + mp_size_t key_limb_size; + int res; + ++ /* First check that input is in range. */ ++ if (mpz_sgn (gibberish) < 0 || mpz_cmp (gibberish, pub->n) >= 0) ++ return 0; ++ + key_limb_size = mpz_size(pub->n); + + TMP_GMP_ALLOC (m, key_limb_size); +diff --git a/rsa-decrypt.c b/rsa-decrypt.c +index 7681439d..540d8baa 100644 +--- a/rsa-decrypt.c ++++ b/rsa-decrypt.c +@@ -48,6 +48,16 @@ rsa_decrypt(const struct rsa_private_key *key, + int res; + + mpz_init(m); ++ ++ /* First check that input is in range. Since we don't have the ++ public key available here, we need to reconstruct n. */ ++ mpz_mul (m, key->p, key->q); ++ if (mpz_sgn (gibberish) < 0 || mpz_cmp (gibberish, m) >= 0) ++ { ++ mpz_clear (m); ++ return 0; ++ } ++ + rsa_compute_root(key, m, gibberish); + + res = pkcs1_decrypt (key->size, m, length, message); +diff --git a/rsa-sec-decrypt.c b/rsa-sec-decrypt.c +index 633a6852..53113c69 100644 +--- a/rsa-sec-decrypt.c ++++ b/rsa-sec-decrypt.c +@@ -54,6 +54,10 @@ rsa_sec_decrypt(const struct rsa_public_key *pub, + TMP_GMP_DECL (em, uint8_t); + int res; + ++ /* First check that input is in range. */ ++ if (mpz_sgn (gibberish) < 0 || mpz_cmp (gibberish, pub->n) >= 0) ++ return 0; ++ + TMP_GMP_ALLOC (m, mpz_size(pub->n)); + TMP_GMP_ALLOC (em, key->size); + +diff --git a/rsa.h b/rsa.h +index 0aac6a26..54c35688 100644 +--- a/rsa.h ++++ b/rsa.h +@@ -433,13 +433,14 @@ rsa_sec_decrypt(const struct rsa_public_key *pub, + size_t length, uint8_t *message, + const mpz_t gibberish); + +-/* Compute x, the e:th root of m. Calling it with x == m is allowed. */ ++/* Compute x, the e:th root of m. Calling it with x == m is allowed. ++ It is required that 0 <= m < n. */ + void + rsa_compute_root(const struct rsa_private_key *key, + mpz_t x, const mpz_t m); + + /* Safer variant, using RSA blinding, and checking the result after +- CRT. */ ++ CRT. It is required that 0 <= m < n. */ + int + rsa_compute_root_tr(const struct rsa_public_key *pub, + const struct rsa_private_key *key, +diff --git a/testsuite/rsa-encrypt-test.c b/testsuite/rsa-encrypt-test.c +index d3bc374b..d1a440f6 100644 +--- a/testsuite/rsa-encrypt-test.c ++++ b/testsuite/rsa-encrypt-test.c +@@ -19,11 +19,12 @@ test_main(void) + uint8_t after; + + mpz_t gibberish; +- mpz_t zero; ++ mpz_t bad_input; + + rsa_private_key_init(&key); + rsa_public_key_init(&pub); + mpz_init(gibberish); ++ mpz_init(bad_input); + + knuth_lfib_init(&lfib, 17); + +@@ -103,15 +104,40 @@ test_main(void) + ASSERT(decrypted[0] == 'A'); + + /* Test zero input. */ +- mpz_init_set_ui (zero, 0); ++ mpz_set_ui (bad_input, 0); + decrypted_length = msg_length; +- ASSERT(!rsa_decrypt(&key, &decrypted_length, decrypted, zero)); ++ ASSERT(!rsa_decrypt(&key, &decrypted_length, decrypted, bad_input)); + ASSERT(!rsa_decrypt_tr(&pub, &key, + &lfib, (nettle_random_func *) knuth_lfib_random, +- &decrypted_length, decrypted, zero)); ++ &decrypted_length, decrypted, bad_input)); + ASSERT(!rsa_sec_decrypt(&pub, &key, + &lfib, (nettle_random_func *) knuth_lfib_random, +- decrypted_length, decrypted, zero)); ++ decrypted_length, decrypted, bad_input)); ++ ASSERT(decrypted_length == msg_length); ++ ++ /* Test input that is slightly larger than n */ ++ mpz_add(bad_input, gibberish, pub.n); ++ decrypted_length = msg_length; ++ ASSERT(!rsa_decrypt(&key, &decrypted_length, decrypted, bad_input)); ++ ASSERT(!rsa_decrypt_tr(&pub, &key, ++ &lfib, (nettle_random_func *) knuth_lfib_random, ++ &decrypted_length, decrypted, bad_input)); ++ ASSERT(!rsa_sec_decrypt(&pub, &key, ++ &lfib, (nettle_random_func *) knuth_lfib_random, ++ decrypted_length, decrypted, bad_input)); ++ ASSERT(decrypted_length == msg_length); ++ ++ /* Test input that is considerably larger than n */ ++ mpz_mul_2exp (bad_input, pub.n, 100); ++ mpz_add (bad_input, bad_input, gibberish); ++ decrypted_length = msg_length; ++ ASSERT(!rsa_decrypt(&key, &decrypted_length, decrypted, bad_input)); ++ ASSERT(!rsa_decrypt_tr(&pub, &key, ++ &lfib, (nettle_random_func *) knuth_lfib_random, ++ &decrypted_length, decrypted, bad_input)); ++ ASSERT(!rsa_sec_decrypt(&pub, &key, ++ &lfib, (nettle_random_func *) knuth_lfib_random, ++ decrypted_length, decrypted, bad_input)); + ASSERT(decrypted_length == msg_length); + + /* Test invalid key. */ +@@ -124,6 +150,6 @@ test_main(void) + rsa_private_key_clear(&key); + rsa_public_key_clear(&pub); + mpz_clear(gibberish); +- mpz_clear(zero); ++ mpz_clear(bad_input); + free(decrypted); + } +-- +2.31.1 + diff --git a/SPECS/nettle.spec b/SPECS/nettle.spec index 814ab18..5bde11d 100644 --- a/SPECS/nettle.spec +++ b/SPECS/nettle.spec @@ -2,7 +2,7 @@ Name: nettle Version: 3.4.1 -Release: 2%{?dist} +Release: 7%{?dist} Summary: A low-level cryptographic library Group: Development/Libraries @@ -13,6 +13,11 @@ Source0: %{name}-%{version}-hobbled.tar.xz Patch0: nettle-3.3-remove-ecc-testsuite.patch Patch1: nettle-3.4-annocheck.patch Patch2: nettle-3.4.1-enable-intel-cet.patch +# https://lists.lysator.liu.se/pipermail/nettle-bugs/2021/009458.html +Patch3: nettle-3.4.1-ecdsa-verify.patch +Patch4: nettle-3.4.1-powerpc64-aes-asm.patch +Patch5: nettle-3.4.1-powerpc64-ghash-asm.patch +Patch6: nettle-3.4.1-rsa-decrypt.patch BuildRequires: gcc BuildRequires: gmp-devel, m4 @@ -54,6 +59,10 @@ sed 's/ecc-224.c//g' -i Makefile.in %patch0 -p1 %patch1 -p1 %patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 %build autoreconf -ifv @@ -126,6 +135,22 @@ fi %changelog +* Wed Jul 14 2021 Daiki Ueno - 3.4.1-7 +- Backport CVE-2021-3580 from upstream 3.7.3 release (#1967990) + +* Wed Jul 14 2021 Daiki Ueno - 3.4.1-6 +- Enable CTR mode optimization when the block size is 16 + +* Wed Jun 30 2021 Daiki Ueno - 3.4.1-5 +- Backport powerpc64 optimization patches from upstream (#1855228) + Patch from Christopher M. Riedl. + +* Wed Apr 7 2021 Daiki Ueno - 3.4.1-4 +- Fix patch application + +* Tue Mar 30 2021 Daiki Ueno - 3.4.1-3 +- Port fixes for potential miscalculation in ecdsa_verify (#1942925) + * Fri May 15 2020 Anderson Sasaki - 3.4.1-2 - Enable Intel CET support (#1737542)